diff --git a/censor-local.bib b/censor-local.bib index ca63db0..126c7f1 100644 --- a/censor-local.bib +++ b/censor-local.bib @@ -1,6 +1,27 @@ % These are entries that replace entries in censor.bib % that I want to edit in some way. +% Use www.bamsoftware.com URL. +@inproceedings{Fifield2012a-local, + author = {David Fifield and Nate Hardison and Jonathan Ellithorpe and Emily Stark and Roger Dingledine and Phil Porras and Dan Boneh}, + booktitle = {Privacy Enhancing Technologies Symposium}, + pages = {239--258}, + publisher = {Springer}, + title = {Evading Censorship with Browser-Based Proxies}, + year = {2012}, + url = {https://www.bamsoftware.com/papers/flashproxy.pdf}, +} + +% Use www.bamsoftware.com URL. +@inproceedings{Fifield2013a-local, + author = {David Fifield and Gabi Nakibly and Dan Boneh}, + booktitle = {Privacy Enhancing Technologies Symposium}, + publisher = {Springer}, + title = {{OSS}: Using Online Scanning Services for Censorship Circumvention}, + year = {2013}, + url = {https://www.bamsoftware.com/papers/oss.pdf}, +} + % Use \enquote to avoid double quotes within double quotes. @inproceedings{Marczak2015a-local, author = {Bill Marczak and Nicholas Weaver and Jakub Dalek and Roya Ensafi and David Fifield and Sarah McKune and Arn Rey and John Scott-Railton and Ron Deibert and Vern Paxson}, @@ -22,3 +43,23 @@ year = {2015}, url = {https://www.bamsoftware.com/papers/fronting/}, } + +% Link to presentation page instead of directly to the PDF. +@inproceedings{Fifield2016a-local, + author = {David Fifield and Lynn Tsai}, + title = {Censors' Delay in Blocking Circumvention Proxies}, + booktitle = {Free and Open Communications on the Internet}, + publisher = {USENIX}, + year = {2016}, + url = {https://www.usenix.org/conference/foci16/workshop-program/presentation/fifield}, +} + +% Link to internet-freedom-science.org URL. +@inproceedings{Tschantz2016a-local, + author = {Michael Carl Tschantz and Sadia Afroz and Anonymous and Vern Paxson}, + title = {{SoK}: Towards Grounding Censorship Circumvention in Empiricism}, + booktitle = {Symposium on Security \& Privacy}, + publisher = {IEEE}, + year = {2016}, + url = {https://internet-freedom-science.org/circumvention-survey/sp2016/}, +} diff --git a/local.bib b/local.bib index f06547b..6fc4e45 100644 --- a/local.bib +++ b/local.bib @@ -1,15 +1,16 @@ -@techreport{eavesdroppersdilemma, +@techreport{Cronin2005a, title = {The Eavesdropper's Dilemma}, author = {Eric Cronin and Micah Sherr and Matt Blaze}, number = {MS-CIS-05-24}, + year = 2005, institution = {Department of Computer and Information Science, University of Pennsylvania}, url = {http://www.crypto.com/papers/internet-tap.pdf}, } @misc{toosheh, title = {{Toosheh}}, - key = {Toosheh}, - url = {http://www.toosheh.org/en.html}, + author = {{NetFreedom Pioneers}}, + url = {https://www.toosheh.org/en.html}, } @misc{oni-china-2005, @@ -570,7 +571,7 @@ author = {Matthew Prince}, month = mar, year = 2015, - howpublished = {Hacker News comment}, + howpublished = {Hacker News}, url = {https://news.ycombinator.com/item?id=9234367}, } @@ -613,6 +614,7 @@ @misc{greatfire-we-are-under-attack, author = {Charlie Smith}, + title = {We are under attack}, month = mar, year = 2015, howpublished = {GreatFire}, @@ -745,3 +747,59 @@ year = 2017, url = {https://metrics.torproject.org/userstats-bridge-combined.html?start=2016-06-01&end=2017-10-01&country=br}, } + +@misc{Gwagwa_a_study_of_internet-based_information_controls_in_rwanda, + author = {Arthur Gwagwa}, + title = {A study of {Internet}-based information controls in {Rwanda}, with a particular focus on the period around the 4~{August} 2017 General Elections}, + month = oct, + year = 2017, + url = {https://www.opentech.fund/sites/default/files/attachments/a_study_of_internet-based_information_controls_in_rwanda-arthur_gwagwa_final.pdf}, +} + +@article{Tan2015, + title = {Towards Measuring Unobservability in Anonymous Communcation Systems}, + author = {Tan, Qingfeng and Shi, Jinqiao and Fang, Binxing and Guo, Li and Zhang, Wentao and Wang, Xuebin and Wei, Bingjie}, + journal = {Journal of Computer Research and Development}, + month = oct, + year = 2015, + volume = {52}, + number = {10}, + url = {http://crad.ict.ac.cn/EN/10.7544/issn1000-1239.2015.20150562}, +} + +@inproceedings{Narain2014a, + author = {Abhinav Narain and Nick Feamster and Alex C. Snoeren}, + title = {Deniable Liaisons}, + booktitle = {Computer and Communications Security}, + publisher = {ACM}, + year = {2014}, + url = {https://cseweb.ucsd.edu/~snoeren/papers/denali-ccs14.pdf}, +} + +@techreport{Ptacek1998a, + author = {Thomas H. Ptacek and Timothy N. Newsham}, + title = {Insertion, Evasion, and Denial of Service: Eluding Network Intrusion Detection}, + month = jan, + year = 1998, + institution = {Secure Networks, Inc.}, + url = {https://www.icir.org/vern/Ptacek-Newsham-Evasion-98.pdf}, +} + +@article{Paxson1999a, + author = {Vern Paxson}, + title = {{Bro}: A System for Detecting Network Intruders in Real-Time}, + journal = {Computer Networks}, + volume = {31}, + number = {23-24}, + month = dec, + year = 1999, + pages = {2435-2463}, + url = {https://www.icir.org/vern/papers/bro-CN99.pdf}, +} + +@misc{brdgrd, + author = {Philipp Winter}, + title = {{brdgrd}}, + year = 2012, + url = {https://github.com/NullHypothesis/brdgrd}, +} diff --git a/thesis.tex b/thesis.tex index ce2a97a..31f1842 100644 --- a/thesis.tex +++ b/thesis.tex @@ -25,6 +25,9 @@ \DeclareFieldFormat{url}{\url{#1}} \bibliography{local,censor-local,censor} \usepackage[hidelinks]{hyperref} +\def\chapterautorefname{Chapter} +\def\sectionautorefname{Section} +\def\figurenutorefname{Figure} \urlstyle{same} @@ -43,7 +46,7 @@ \usepackage{yfonts} -\newcommand{\dragons}{\bigskip\noindent\textfrak{\Large here be dragons}\bigskip} +\newcommand{\dragons}{\bigskip\noindent\textfrak{\Large here be dragons:}\bigskip} \begin{document} @@ -121,7 +124,7 @@ it is necessary to limit the scope. My research is focused on an important special case of censorship, which I call the ``border firewall'' case. -It is illustrated in Figure~\ref{fig:border-firewall}. +It is illustrated in \autoref{fig:border-firewall}. \begin{figure} \centering @@ -226,13 +229,30 @@ Other forms of censorship that are \emph{not} in scope include: and does not cross the border \item forum moderation and deletion of social media posts \item deletion-resistant publishing like - the Eternity Service~\cite{Anderson1996a}---except + the Eternity Service~\cite{Anderson1996a} + (what Köpsell and Hillig call ``censorship resistant publishing systems''), + except insofar as access to such services may be blocked % Dagster~\cite{Stubblefield2001a} % Publius~\cite{Waldman2000a} % Tangler~\cite{Waldman2001a} \end{itemize} +Many parts of the abstract model are deliberately +left unspecified, to allow for the many variations that arise in practice. +The precise nature of ``blocking'' can take many forms, +from packet dropping, to injection of false responses, +and softer forms of disruption such as bandwidth throttling. +Detection need not be purely passive. +The censor is permitted to do work outside the context of a single connection; +for example, it may compute aggregate statistics over many connections, +make lists of suspected IP addresses, +and defer some analysis for offline processing. +The client may cooperate with other entities +inside and outside the censor's network, +and indeed almost all circumvention will require +the cooperation of a willing proxy on the outside. + Some have objected to the use of the generic term ``Internet censorship''\index{``Internet censorship'' as a term} to refer to the narrow case of the border firewall. @@ -244,6 +264,131 @@ to refer to the border firewall case. % Even within this narrowed scope, there is plenty to do. +\section{Overview} + +\dragons + + +\section{My past work} + +\dragons + +My blind spots: VPNs, +systems without research documentation (FreeGate, Ultrasurf, Shadowsocks), +foreign-language documentation and forums. + + +\subsection{Flash proxy} + +I began working on censorship circumvention with flash proxy in 2011. +Flash proxy is targeted at the difficult problem +of proxy address blocking: +it is designed against a censor model +in which the censor can block any IP address it chooses, +but only on a relatively slow timeline of several hours. + +Flash proxy works by running tiny JavaScript proxies in +ordinary users' web browsers. +The mini-proxies serve as temporary stepping stones +to a full-fledged proxy, such as a Tor relay. +The idea is that the flash proxies are too numerous, +diverse, and quickly changing to block effectively. +A censored user may use a particular proxy for +only seconds or minutes before switching to another. +If the censor manages to block the IP address of one proxy, +there is little harm, +because many other temporary proxies are ready to take its place. + +The flash proxy system was designed under interesting constraints +imposed by being partly implemented in JavaScript in the browser. +The proxies sent and received data using the WebSocket protocol, +which allows for socket-like +persistent TCP connections in browsers, but with a catch: +the browser can only +make outgoing connections, not receive incoming ones as a traditional proxy would. +The censored client must somehow inform the system of its own public address, +and then the proxy connects \emph{back} to the client. +This architectural constraint was probably +the biggest impediment to the usability of flash proxy, +because it required users to configure their local router +to permit incoming connections. +(Removing this impediment is the main reason +for the development of Snowflake, described later.) +Flash proxy does not itself try to obfuscate patterns +in the underlying traffic; +it only provides address diversity. + +For the initial ``rendezvous'' step in which a client advertises +its address and a request for proxy service, +flash proxy uses a neat idea: +a low-capacity, but highly covert channel bootstraps +the high-capacity, general-purpose WebSocket channel. +For example, we implemented an automated email-based rendezvous, +in which the client would send its address in an encrypted email to a special address. +While it is difficult to build a useful low-latency bidirectional channel +on top of email, +email is difficult to block +and it is only needed once, at the beginning of a session. +We later replaced the email-based rendezvous with one based on domain fronting, +which would later inspire +meek, described below. + +I was the leader of the flash proxy project and the main developer of its code. +Flash proxy was among the first circumvention systems built for Tor---only +obfs2 is older. +It was first deployed in Tor Browser in January 2013, +and was later retired in January 2016 +after it ceased to see appreciable use. +Its spirit lives on in Snowflake, now under development. + +Flash proxy appeared in the 2012 research paper +``Evading Censorship with Browser-Based Proxies''~\cite{Fifield2012a-local}, +which I coauthored with +Nate Hardison, Jonathan Ellithorpe, Emily Stark, Roger Dingledine, Phil Porras, and Dan Boneh. + +\subsection{OSS, a circumvention prototype} + +OSS, for ``online scanning service,'' +is a design for circumvention based on the use of +third-party web services that issue HTTP requests +to user-specified destinations, +such as an online translation service. +OSS is designed against the model of a censor that +is unwilling to block useful web services that are used for circumvention, +because of the useful service they provide. + +In OSS, the client sends messages to a censored destination +by bouncing them through a third-party scanning service. +The key idea is a deliberate conflation of address and content. +The client asks the scanning service to scan a long URL +that is constructed to encode both the destination host and a data payload. +The destination receives the HTTP request and decodes its payload. +The destination sends data downstream by abusing HTTP redirection, +instructing the scanning service to send another +HTTP request back to the client, with a different payload. +The resistance to blocking of the OSS system hinges +on the abundance of online scanning services that exist. + +% https://trac.torproject.org/projects/tor/ticket/7559 +OSS was never deployed to users. +I judged its overhead and potential to annoy webmasters +to be too great to be practical. +The core idea, however, did see use +as a rendezvous method for flash proxy. +In this method, a helper program +would encode the client's IP address +into a URL\@. The user would then copy and paste the URL into any online scanning service, +which would then forward the information to the flash proxy system. +In fact, this URL encoding was used internally by +the domain fronting--based rendezvous as well, +using a URL as a convenient vehicle for data transfer. + +OSS appeared in the 2013 research paper +``OSS: Using Online Scanning Services for Censorship Circumvention''~\cite{Fifield2013a-local}, +which I coauthored with +Gabi Nakibly and Dan Boneh. + + \chapter{Principles of circumvention} \label{chap:principles} @@ -259,13 +404,11 @@ Once the censor detects some prohibited communication, it must take some action to stop the communication, such as terminating the connection at a network router. A censor must be able both to detect and to block. -(Detection without blocking would be called not censorship, but surveillance.) +(Detection without blocking would be called surveillance, not censorship.) The flip side of this statement is that a circumventor succeeds either by eluding detection, or, once detected, somehow resist the censor's blocking action. -Research on circumvention has mostly dealt with the detection problem---a -minority of research is on resisting blocking despite being detected. A censor is, then, essentially a traffic classifier coupled with @@ -275,106 +418,189 @@ and many complications are possible, at its heart it must decide, for each communication, whether to block or allow, and then effect blocks as appropriate. -Any classifier is liable to make mistakes. +Like any classifier, a censor is liable to make mistakes. When the censor fails to block something that it would have preferred to block, it is an error called a \emph{false negative}; when the censor accidentally blocks something that it would have preferred to allow, it is a \emph{false positive}. -Forcing the censor to trade false negative for false positives +Techniques for avoiding detection are often called +network protocol ``obfuscation,'' +and the term is apt. +It reflects not an attitude of security through obscurity; +but rather a recognition that avoiding detection is about +making the censor's classification problem more difficult, +and therefore more costly. +Forcing the censor to trade false positives for false negatives is the core of all circumvention that is based on avoiding detection. -Understanding the relative importance of -misclassification errors to the censor---knowing -what it prefers to allow and to block---is -important for designing circumvention systems. +The costs of misclassifications cannot be understood +in absolute terms: +they only have meaning relative to a given censor +and its specific resources and motivations. +Understanding the relative importance the censor +assigns to classification errors---knowing +what it prefers to allow and to block---is helpful. +Through good modeling, +we can make the tradeoffs less favorable for the censor +and more favorable for the circumventor. + +The censor may base its detection decision +on whatever criteria it find practical. +I like to divide detection techniques into two classes: +\emph{detection by content} and \emph{detection by address}. +Detection by content is based on the content or topic +of the message: +keyword filtering and protocol identification fall into this class. +Detection by address is based on the sender or recipient +of the message: +IP address blacklists and DNS response tampering fall into this class. +An ``address'' may be any kind of identifier: +an IP address, a domain name, an email address. +Of these two classes, my experience is that +detection by address is harder to defeat. +Of course, there is no clear separation between +what is content and what is an address. +The layered nature of network protocols means that +an address at one layer is content at another. +Nevertheless, I find it useful to think about +detection techniques in these terms. + +The censor may block the address of the destination, +preventing direct access. +Any communication between the client and the destination +must therefore be indirect. +The intermediary between client and destination +is called a \emph{proxy}, +and it must do two things: +provide an unblocked address for the client to contact; and +somehow mask the contents of the channel +and the eventual destination address. +Throughout this thesis, I will use the word ``proxy'' +with an abstract meaning of ``one that acts of behalf of another.'' +A proxy need not be what is typically understood by the term ``proxy server,'' +a single host accepting and forwarding connections. +A VPN (virtual private network) is also a kind of proxy, +as is the Tor network, +as may be a specially configured router. +In \autoref{chap:domain-fronting} we will see +a network of cloud servers acting as a proxy. +In \autoref{chap:snowflake} the proxy will +be a pool of temporary instances of some JavaScript code. + +Proxies solve the first-order effects of censorship +(detection by content and address), +but they induce a second-order effect: +the censor must now seek out and block proxies, +in addition to the contents and addresses +that are its primary targets. +This is where circumvention research really begins: +not with access to the destination per~se, +but access to a proxy, which transitively gives +access to the destination. +The censor attempts deals with detecting and blocking communication with proxies +using the same tools it would for any other communication. +Just as it may look for forbidden keywords in text, +it may look for distinctive features of proxy protocols; +just as it may block politically sensitive web sites, +it may block the addresses of any proxies it can discover. +The challenge for the circumventor +is to use proxy addresses and proxy protocols +that are difficult for the censor to detect or block. + +The way of organizing censorship and circumvention techniques +that I have presented is not the only way. +Köpsell and Hillig divide detection into +``content'' and ``circumstances''~\cite[\S~4]{Koepsell2004a}; +their circumstances include addresses and also what I would consider more content-like: +timing, data transfer characteristics, and protocols. +Philipp Winter divides circumvention into three problems: +bootstrapping, endpoint blocking, and traffic obfuscation~\cite[\S~1.1]{Winter2014c}. +Endpoint blocking and traffic obfuscation correspond to my +detection by address and detection by content; +bootstrapping is the challenge of getting a copy of circumvention software +and discovering initial proxy addresses. +I tend to fold bootstrapping in with address-based detection, +though for details on one aspect of the problem, +that of discovering bridge addresses, +see \autoref{sec:bridge-distribution}. +Khattak, Elahi, et~al., +in their 2016 survey and systematization of circumvention systems, +break detection into four aspects: +destinations, +content, +flow properties, +and protocol semantics~\cite[\S~2.4]{Khattak2016a}. +I think of their ``content,'' ``flow properties,'' and ``protocol semantics'' +as all fitting under the heading of content. +Tschantz2016a et~al.\ identify ``setup'' and ``usage''~\cite[\S~V]{Tschantz2016a-local}, +and Khattak, Elahi et~al.\ identify +``communication establishment'' and ``conversation''~\cite[\S~3.1]{Khattak2016a}, +as targets of obfuscation; +these mostly correspond to address and content. +What I call ``detection'' and ``blocking,'' +Khattak, Elahi, et~al.\ call ``fingerprinting'' and ``direct censorship''~\cite[\S~2.3]{Khattak2016a}, +and Tschantz et~al.\ call ``detection'' and ``action''~\cite[\S~II]{Tschantz2016a-local}. -\dragons - -Detection ranges from almost trivial to very complicated. - - -detection can be trivial or complicated -can be precomputed -limits on scale may constrin what censors can do - - -``obfuscation'' term is apt. -not reflecting a mindset of security through obscurity; -rather a recognition that it's about making the classification more difficult, -and forcing the censor to trade false positives for false negatives. - -The censor can block direct access to any destination, -so circumvention typically uses, at minimum, -some kind of indirect access, such as connecting -through a proxy server. - - -% paper on costs of shutdowns. - -Cite Pfitzmann + Hansen~\cite{Pfitzmann2010a}: undetectability, -unobservability, -unblockability. -Houmansadr?: entanglement. -I prefer to think of it in terms of costs. - -eavesdropper's dilemma~\cite{eavesdroppersdilemma} -(as an example of having an empty sphere of visibility?) -reach exceeds grasp -The sphere of influence is a subset of the sphere of visibility. -It is usual, -when evaluating circumvention designs, -to assume (conservatively) -that the sphere of influence and sphere of visibility are equal: -wherever the censor can observe, it can act. -But there are real-world cases where the censor -might observe traffic it would rather block, -and yet lack the ability to stop it. -Ignoring the Great Firewall of China~\cite{Clayton2006a}: -detection succeeds but not blocking. -Flakiness of firewalls, etc. -``blocking'' include throttling, disruption more generally -detection can include preprocessing - -I find it helpful to break detection into two classes: -detection by content and detection by address. -... -The first is blocking by content; that is, -by what you say. -HTTP request keyword filtering and blocking based on deep packet inspection -fall into this category. -The second is blocking by address; that is, -by whom you talk to. -IP address blocking and DNS tampering fall into this category. -The third is active probing, -in which the censor imitates a client in order to discover proxy servers. -Active probing is usually used as input for an address-blocking mechanism. -Of these challenges, address blocking is probably the hardest, -because it is efficient to implement in firewall hardware, -and because network addresses are a scarcer resource -than protocol variations. - -Appendix~\ref{sec:list-of-circ} contains a summary of -censorship circumvention systems and how they have changed over time -in response to changing censorship threats. - -This taxonomy of censorship techniques is not the only one possible. -Philipp Winter divides it into three problems~\cite[\S~1.1]{Winter2014c}: -the bootstrapping problem; -the endpoint blocking problem; -and the traffic obfuscation problem. -Khattak, Elahi, et~al.~\cite{Khattak2016a} call these two tasks -``fingerprinting'' and ``direct censorship''; -Tschantz et~al.~\cite{Tschantz2016a} call them -``detection'' and ``action.'' +\section{Collateral damage} +What's to prevent the censor from +shutting down all connectivity within its network, +trivially preventing the client from reaching the destination? +The answer is that the censor derives some kind of benefit +from allowing network connectivity, +other than that which it tries to censor. +Or to put it another way: +the censor \emph{incurs a cost} +whenever it commits a false positive +(also called overblocking: inadvertently blocking something it would +have preferred to allow). +Because it wants to block some things +and allow others, +the censor is forced to run as a classifier. +In order to avoid harm to itself, +the censor permits some measure of circumvention traffic. + +The cost of false positives is of so central importance to circumvention +that researchers have a special term for it: \emph{collateral damage}. +The term is a bit unfortunate, +evoking as it does negative connotations from other contexts. +It helps to focus more on the ``collateral'' than the ``damage'': +collateral damage is any cost +\emph{experienced by the censor} +as a result of incidental blocking done in the course of censorship. +It must trade its desire to block forbidden communications +against its desire to avoid harm to itself, +balance underblocking with overblocking. +Ideally, we force the censor into a dilemma: +unable to distinguish between circumvention and other traffic, +it must choose either to allow circumvention along with everything else, +or else block everything and suffer maximum collateral damage. +It is not necessary to fully reach this ideal before +circumvention becomes possible. +Better obfuscation drives up the censor's error rate +and therefore the cost of any blocking. + +Collateral damage, as with other aspects of censorship, +cannot be understood in isolation, +but only in relation to a particular censor. +Suppose that blocking one web site +results in the collateral blocking of a hundred more. +Is that a large amount of collateral damage? +It depends. +Are those other sites likely to be visited by clients +in the censor's network? +Are they in the local language? +Do professionals and officials rely on them +to get their job done? +Is someone in the censorship bureau likely to get fired +as a result of their blocking? +If the answers to these question is yes, +then yes, the collateral damage is likely to be high. +But if not, then the censor could take or leave those hundred sites---it +doesn't matter. -\section{Collateral damage} +\dragons -The cost of false positives is so important to circumvention -that researchers have a specialized term for it: collateral damage. -Collateral damage encompasses all the harm suffered by the censor -through inadvertent, ancillary blocking done in the course of censorship. -The term is a bit unfortunate, because it is easily misunderstood. If circumventors do things right, the potential ``damage'' is never realized, because the censor sees the cost as being too great. @@ -382,10 +608,6 @@ Circumventors try to make false positives so expensive that the censor has no choice but to allow false negatives; that is, to permit circumvention traffic. -collateral damage -not a nice name -means the same as ``making the classification problem difficult'' -if you think of the censor as a classifier. false positive and false negative costs---circumventor's tactic is to bind FPs and FNs tightly together. underlies all circumvention according to the usual threat models @@ -393,17 +615,6 @@ underlies all circumvention according to the usual threat models even look-like-something, stego transports ultimately depend on collateral damage (lengthy explanation and examples) -There are some forms of circumvention that do not rely -on collateral damage; they are those in which the -censor's sphere of influence\index{sphere of influence (of a censor)} is nil. -That it, they rely on a channel that the censor is willing to block, -but somehow actually unable to block. -A hypothetical example might be a radio broadcast that the censor -cannot jam because it lacks the necessary equipment. -This is an example of a censor having an empty sphere of influence\index{sphere of influence (of a censor)} -and a nonempty sphere of visibility\index{sphere of visibility (of a censor)}: it can look, but not touch. -A real-life example is Toosheh~\cite{toosheh}\ldots (also has receiver anonymity) - Don't need to be vague, saying that there is some communication the censor is unwilling to block. Make it concrete: this is what collateral damage the censor would have to incur to block this. If that collateral damage is large, then you win. @@ -412,13 +623,150 @@ turn your assumptions into testable or quantifiable hypotheses don't say, "the censor cannot do X"; say, "in order to do X, the censor would have to..." make the threat models falsifiable: not just assumptions but hypotheses about how the world works (or will work) +real shutdowns not a paradox +paper on costs of shutdowns. + +I believe that collateral damage provides a more productive way +to think about the limitations of censors. +...what's more, it is defined relative to a specific censor's +resources and motivations, +rather than being ``unblockable'' in absolute terms. +Cite Pfitzmann + Hansen~\cite{Pfitzmann2010a}: undetectability, +unobservability, +unblockability. +Houmansadr?: entanglement. +someone?: deniability +I prefer to think of it in terms of costs. + + +\section{Spheres of influence and visibility} + +\begin{itemize} +\item Deniable Liaisons~\cite{Narain2014a} +\end{itemize} + +It is usual to assume (conservatively) +that whatever the censor can detect, +it also can block. +That is, to ignore blocking per~se +and focus only on the detection problem. +We know from experience, however, +that there are cases in practice +where a censor's reach exceeds its grasp: +where it is able to detect circumvention +but not block it, +Sometimes it is useful to consider this possibility when modeling. +Khattak, Elahi, et~al.~\cite{Khattak2016a} +express it nicely by subdividing the censor's network into a +\emph{sphere of influence}\index{sphere of influence (of a censor)} +within which the censor has active control, +and a potentially larger \emph{sphere of visibility}\index{sphere of visibility (of a censor)} +within which the censor may only observe, not act. + +A landmark example of this kind of thinking +is the 2006 research on +``Ignoring the Great Firewall of China'' by Clayton et~al.~\cite{Clayton2006a}. +They found that the firewall would block connections by injecting +phony TCP RST\index{RST (TCP flag)} packets +(which cause the connection to be torn down) +or SYN/ACK\index{SYN (TCP flag)}\index{ACK (TCP flag)}\index{SYN/ACK} packets +(which cause the client to become unsynchronized), +and that simply ignoring the anomalous packets +rendered blocking ineffective. +(Why then, did the censor choose to \emph{inject} its own packets, +rather than \emph{drop} the client's or server's? +The answer is probably that injection is technically +easier to achieve, highlighting a limit on the censor's power.) +One can think of this ignoring as shrinking the censor's +sphere of influence: it can still technically act within this sphere, +but not in a way that actually effects blocking. +Additionally, intensive measurements revealed many failures +to block, and blocking rates that changed over time, +suggesting that even when the firewall intends +a general policy of blocking, it does not always succeed. + +Another fascinating example of ``look, but don't touch'' +communication is the ``filecasting'' technique used by Toosheh~\cite{toosheh}, +a file distribution service based on satellite TV broadcasts. +Clients tune their satellite receivers to a certain channel +and record the broadcast to a USB flash drive. +Later, they run a program on the recording +that decodes the information and extracts a bundle of files. +The system is unidirectional: clients +can only receive the files that the Toosheh operators choose to provide. +The censor can easily see that Toosheh is in use---it's +a broadcast, after all---but cannot identify users, +or block the signal +in any way short of continuous radio jamming or +tearing down satellite dishes. +% But news reports say that the government of Iran does jam? +% http://www.pbs.org/wgbh/pages/frontline/tehranbureau/2012/11/briefing-satellite-wars-why-iran-keeps-jamming.html +% https://www.wired.com/2016/04/ingenious-way-iranians-using-satellite-tv-beam-banned-data/ "Yahsat’s satellite hovers over the Middle East, making it harder for the Iranian government to jam the satellite’s signal as it’s broadcast directly down to Iranian dishes" + +There are parallels between the study of Internet censorship +and that of network intrusion detection. +One is that a censor's detector may be implemented as a +network intrusion detection system or monitor, +a device ``on the side'' of a communication link +that receives a copy of the packets that flow over the link, +but that, unlike a router, +is not responsible for forwarding the packets onward. +Another parallel is that censors are susceptible to the same kinds +of evasion and obfuscation attacks that affect network monitors more generally. +In 1998, Ptacek and Newsham~\cite{Ptacek1998a} +and Paxson~\cite[\S~5.3]{Paxson1999a} outlined various attacks +against network intrusion detection systems---such as +manipulating the IP time-to-live field +or sending overlapping IP fragments---that +cause a monitor either to accept what the receiver will reject, +or reject what the receiver will accept. +A basic problem is that a monitor's position in the middle of the network +does not able it to predict exactly how each packet will be interpreted +by the endpoints. +Cronin et~al.~\cite{Cronin2005a} posit that the monitor's +conflicting goals of +of sensitivity (recording all that is relevant) +and selectivity (recording \emph{only} what is relevant) +give rise to an unavoidable ``eavesdropper's dilemma.'' + +Monitor evasion techniques can be used to reduce +a censor's sphere of visibility---eliminating certain +traffic features from its consideration. +Crandall et~al.~\cite{Crandall2007a} in 2007 suggested +using IP fragmentation to prevent keyword matching +(splitting keywords across fragments). +In 2008 and 2009, Park and Crandall~\cite{Park2010a} explicitly characterized +the Great Firewall as a network intrusion detection system +and found that a lack of TCP reassembly allowed evading keyword matching. +Winter and Lindskog~\cite{Winter2012a} found that +the Great Firewall still did not do TCP segment reassembly in 2012, +in the course of studying the firewall's proxy-discovery probes. +(Such probes are the subject of \autoref{chap:active-probing}.) +They released a tool, brdgrd~\cite{brdgrd}, +that by manipulating the TCP window size, +prevented the censor's scanners from receiving a full response +in the first packet, thereby foiling active probing. +They reported that the tool stopped working in 2013. +Anderson~\cite{Anderson2012splinternet} gave technical information +on the implementation of the Great Firewall as it existed in 2012, +and observed that it is implemented as an ``on-the-side'' monitor. +% https://www.cs.kau.se/philwint/gfw/ +Khattak et~al.~\cite{Khattak2013a} applied a wide array +of evasion experiments to the Great Firewall in 2013, +identifying classes of working evasions and +estimating the cost to counteract them. + \section{Bridge distribution} +\label{sec:bridge-distribution} + +\dragons Resistance to blocking by address; obfuscated protocol then prevents blocking by content. \begin{itemize} +\item Untrusted Messenger Discovery~\cite{Feamster2003a} \item Kaleidoscope~\cite{Sovran2008a,Sovran2008b} \item Mahdian~\cite{Mahdian2010a} \item Proximax~\cite{McCoy2011a} @@ -441,10 +789,6 @@ that can inject, but not drop, packets. the very strong threat model may be appropriate for e.g. whitelisting corporate or university censors -address blocking -content blocking -(could also separate out e.g. timing (and something else? check Khattak2016a)) -The harder problem is address blocking: bridge distribution and rendezvous The mass censors we know are weak if you are not being specifically targeted Pick a proxy server used by you and no one else @@ -467,9 +811,6 @@ Denali infrastructure-based, decoy routing and domain fronting -Packet fragmentation tricks, etc. -Cite brdgrd, \cite{Khattak2013a} - pluggable transports Tying questions of ethics\index{ethics} to questions about censor behavior, motivation: @@ -481,6 +822,8 @@ Censors may come to conclusions different than what we expect \section{Early censorship and circumvention} +\dragons + Early censors (around the time of the late 1990s and early 2000s) would be considered weak by today's standards. They were mostly easy to circumvent by simple countermeasures, @@ -507,56 +850,25 @@ HTML rewriting proxies changing dns servers -\section{Open problems in censor modeling} - -Ongoing, longitudinal measurement of censorship -remains a challenge. -Studies tend to be limited to one geographical region -and one period of time. -Dedicated measurement platforms such as -OONI~\cite{Filasto2012a} and ICLab~\cite{iclab} -are starting to make a dent in this problem, -by providing regular measurements from many locations worldwide. -Even with these, there are challenges around -getting probes into challenging locations -and keeping them running. - -Apart from a few reports of, for example, -per annum spending on filtering hardware, -not much is known about how much censorship costs to implement. -In general, contemporary threat models tend to ignore -resource limitations on the part of the censor. - +relationship of censorship to network monitoring/NIDS +risks of flow blocking (Telex/TapDance~\cite{Frolov2017a}) +% http://www.icir.org/vern/papers/activemap-oak03.pdf +% http://www.icir.org/vern/papers/norm-usenix-sec-01.pdf -\chapter{Measurement studies and measurement platforms} +The early development \emph{was} an arms race +or cat-and-mouse game, +but there is no reason to assume it will always be so. -Analyzing Internet Censorship in Pakistan\cite{Aceto2016a} - -informing our threat models - -censors' capabilities---presumed and actual -e.g. ip blocking (reaction time?) -active probing - -Khattak and Elahi et~al.~\cite{Khattak2016a} -put it nicely with the terms -``sphere of influence''\index{sphere of influence (of a censor)} -and ``sphere of visibility\index{sphere of visibility (of a censor)}.'' -proxy-probe +\chapter{Censor capabilities} -Internet curfews (Gabon), limited time of shutdowns shows sensitivity to collateral damage. - -commercial firewalls (Citizen Lab) and bespoke systems - - -\chapter{Studies of censors} +\dragons This section surveys past measurement studies in order to draw specific and general conclusions about censor models. The objects of this survey are based on those in the evaluation study done by -me and others in 2016~\cite[\S IV.A]{Tschantz2016a}. +me and others in 2016~\cite[\S IV.A]{Tschantz2016a-local}. The main tool we have to build relevant threat models is the natural study of censors. @@ -1028,14 +1340,106 @@ blocked it, and only sporadically. % \cite{Dalek2013a} % \cite{Gill2015a} +\cite{Gwagwa_a_study_of_internet-based_information_controls_in_rwanda} +and other OONI. + +Analyzing Internet Censorship in Pakistan\cite{Aceto2016a} + +informing our threat models + +censors' capabilities---presumed and actual +e.g. ip blocking (reaction time?) +active probing + +Internet curfews (Gabon), limited time of shutdowns shows sensitivity to collateral damage. + +commercial firewalls (Citizen Lab) and bespoke systems + +\section{Open problems in censor modeling} + +\dragons + +Ongoing, longitudinal measurement of censorship +remains a challenge. +Studies tend to be limited to one geographical region +and one period of time. +Dedicated measurement platforms such as +OONI~\cite{Filasto2012a} and ICLab~\cite{iclab} +are starting to make a dent in this problem, +by providing regular measurements from many locations worldwide. +Even with these, there are challenges around +getting probes into challenging locations +and keeping them running. + +Apart from a few reports of, for example, +per annum spending on filtering hardware, +not much is known about how much censorship costs to implement. +In general, contemporary threat models tend to ignore +resource limitations on the part of the censor. + + +\chapter{Circumvention systems} + +\dragons + +Evaluating the quality of circumvention systems is tricky, +whether they are only proposed or actually deployed. +The problem of evaluation is directly tied to threat modeling. +Circumvention is judged according to how well it works +under a given model; +the evaluation is therefore meaningful only as far as +the threat model reflects reality. +Without grounding in reality, researchers +risk running an imaginary arms race +that evolves independently of the real one. + +This kind of work is rather different than +the direct evaluations of circumvention tools +that have happened before, for example those done by +the Berkman Center~\cite{Berkman2011} +and Freedom House~\cite{FreedomHouse2011} in 2011. +Rather than testing tools against censors, we evaluated +how closely calibrated designers' own models were to +models derived from actual observations of censors. + +This research was partly born out of +frustration with some typical assumptions made +in academic research on circumvention, +which we felt placed undue emphasis +on steganography and obfuscation of traffic streams, +while not paying enough attention to +the perhaps more important problems of bridge distribution and rendezvous. +Indeed, in our survey of over 50 circumvention tools, +we found that academic designs tended to be concerned +with detection in the steady state after a connection +is established, +while actually deployed systems cared more about +how the connection is established initially. +We wanted to help bridge the gap by laying out a research agenda +to align the incentives of researchers with those of circumventors. +This work was built on extensive surveys +of circumvention tools, measurement studies, +and known censorship events against Tor. + +This work on evaluation appeared in the 2016 research paper +``Towards Grounding Censorship Circumvention in Empiricism''~\cite{Tschantz2016a-local}, +which I coauthored with +Michael Carl Tschantz, Sadia Afroz, and Vern Paxson. + +Do they check the right things? + +what's used and what's not used + \section{Summary of circumvention systems} \label{sec:list-of-circ} +\dragons + % (BIFSO article predicting failure of censorship, leading to CGIProxy?) Many circumvention systems have been proposed or deployed. -My survey with Tschantz, Afroz, and Paxson~\cite{Tschantz2016a} +My survey with Tschantz, Afroz, and Paxson~\cite{Tschantz2016a-local} covered 54 systems; a later one by Khattak, Elahi, et~al.~\cite{Khattak2016a} covered 73. @@ -1108,9 +1512,10 @@ Cirripede~\cite{Houmansadr2011a}. % shadowsocks, whatever +\chapter{Active probing} +\label{chap:active-probing} - -\chapter{Empirically testing real-world censors} +\dragons In 2015 I helped study the phenomenon of ``active probing'' by the Great Firewall to discover hidden proxy servers. @@ -1147,6 +1552,11 @@ The work on active probing appeared in the 2015 research paper which I coauthored with Roya Ensafi, Philipp Winter, Nick Feamster, Nicholas Weaver, Vern Paxson. + +\chapter{Time delays in censors' reactions} + +\dragons + I am interested in understanding censors at a deeper level. To that end, I am working on a project to measure @@ -1155,7 +1565,7 @@ So far, our technique has been to monitor the reachability of newly added Tor Browser bridges, to see how long after they are introduced they get blocked. Portions of this work have already appeared in the 2016 research paper -``Censors' Delay in Blocking Circumvention Proxies''~\cite{Fifield2016a}, +``Censors' Delay in Blocking Circumvention Proxies''~\cite{Fifield2016a-local}, which I coauthored with Lynn Tsai. We discovered some interesting, previously undocumented behaviors of the Great Firewall of China. @@ -1185,6 +1595,8 @@ of censors' priorities with respect to circumvention. \chapter{Domain fronting} \label{chap:domain-fronting} +\dragons + My most influential contribution to the world of circumvention is my research on domain fronting. @@ -1272,6 +1684,14 @@ which I coauthored with Chang Lan, Rod Hynes, Percy Wegmann, and Vern Paxson. \section{An unvarnished history of meek deployment} +\begin{itemize} +\item First release of Orbot that had meek? +\item Funding/grant timespans +\item cost table +\item ``Seeing Through Network-Protocol Obfuscation''~\cite{Wang2015a} October 2015 +\item ``Towards Measuring Unobservability in Anonymous Communication Systems''~\cite{Tan2015} October 2015 +\end{itemize} + \begin{figure} \centering \includegraphics{figures/metrics-clients-meek} @@ -1293,17 +1713,11 @@ of meek over its entire existence. I hope to share the benefit of my experience by commentating the history with surprises and lessons learned. -Figure~\ref{fig:metrics-clients-meek} +\autoref{fig:metrics-clients-meek} shows the estimated concurrent number of users of meek over its entire existence. The counts come from Tor Metrics~\cite{tor-tr-2012-10-001}. -\begin{itemize} -\item First release of Orbot that had meek? -\item Funding/grant timespans -\item cost table -\end{itemize} - \subsection*{2013: Precursors; prototypes} The prehistory of meek begins in 2013 with flash proxy. @@ -1877,138 +2291,25 @@ Between July~29 and August~17, meek-amazon had another outage due to an expired TLS certificate. -\chapter{Building circumvention systems} - -Over the past five years I have been involved in the development -of four noteworthy circumvention designs: -\begin{itemize} -\item Flash proxy~\cite{Fifield2012a}, based on temporary proxies running in web browsers. -\item OSS~\cite{Fifield2013a}, using third-party web scanning services. -\item Domain fronting~\cite{Fifield2015a-local}, using popular web services for cover. -\item Snowflake~\cite{snowflake-wiki,FifieldGilEpnerWebRTC} (in progress), based on peer-to-peer proxies in web browsers; flash proxy redux. -\end{itemize} -These have evolved according to the needs of the time -and my growing understanding of how censorship should be modeled. - -My main interest is resistance to address blocking, -which I regard as more difficult to achieve than resistance to content blocking. -The first two systems, flash proxy and OSS, -made no special effort to avoid their content being blocked, -leaving content obfuscation to be done by another layer. -My later designs have taken the threats of content blocking -and active probing more integrally into account. - -\section{Flash proxy, a circumvention system} - -I began working on censorship circumvention with flash proxy in 2011. -Flash proxy is targeted at the difficult problem -of proxy address blocking: -it is designed against a censor model -in which the censor can block any IP address it chooses, -but only on a relatively slow timeline of several hours. - -Flash proxy works by running tiny JavaScript proxies in -ordinary users' web browsers. -The mini-proxies serve as temporary stepping stones -to a full-fledged proxy, such as a Tor relay. -The idea is that the flash proxies are too numerous, -diverse, and quickly changing to block effectively. -A censored user may use a particular proxy for -only seconds or minutes before switching to another. -If the censor manages to block the IP address of one proxy, -there is little harm, -because many other temporary proxies are ready to take its place. - -The flash proxy system was designed under interesting constraints -imposed by being partly implemented in JavaScript in the browser. -The proxies sent and received data using the WebSocket protocol, -which allows for socket-like -persistent TCP connections in browsers, but with a catch: -the browser can only -make outgoing connections, not receive incoming ones as a traditional proxy would. -The censored client must somehow inform the system of its own public address, -and then the proxy connects \emph{back} to the client. -This architectural constraint was probably -the biggest impediment to the usability of flash proxy, -because it required users to configure their local router -to permit incoming connections. -(Removing this impediment is the main reason -for the development of Snowflake, described later.) -Flash proxy does not itself try to obfuscate patterns -in the underlying traffic; -it only provides address diversity. - -For the initial ``rendezvous'' step in which a client advertises -its address and a request for proxy service, -flash proxy uses a neat idea: -a low-capacity, but highly covert channel bootstraps -the high-capacity, general-purpose WebSocket channel. -For example, we implemented an automated email-based rendezvous, -in which the client would send its address in an encrypted email to a special address. -While it is difficult to build a useful low-latency bidirectional channel -on top of email, -email is difficult to block -and it is only needed once, at the beginning of a session. -We later replaced the email-based rendezvous with one based on domain fronting, -which would later inspire -meek, described below. - -I was the leader of the flash proxy project and the main developer of its code. -Flash proxy was among the first circumvention systems built for Tor---only -obfs2 is older. -It was first deployed in Tor Browser in January 2013, -and was later retired in January 2016 -after it ceased to see appreciable use. -Its spirit lives on in Snowflake, now under development. - -Flash proxy appeared in the 2012 research paper -``Evading Censorship with Browser-Based Proxies''~\cite{Fifield2012a}, -which I coauthored with -Nate Hardison, Jonathan Ellithorpe, Emily Stark, Roger Dingledine, Phil Porras, and Dan Boneh. - -\section{OSS, a circumvention prototype} +\chapter{Snowflake} +\label{chap:snowflake} -OSS, for ``online scanning service,'' -is a design for circumvention based on the use of -third-party web services that issue HTTP requests -to user-specified destinations, -such as an online translation service. -OSS is designed against the model of a censor that -is unwilling to block useful web services that are used for circumvention, -because of the useful service they provide. +\dragons -In OSS, the client sends messages to a censored destination -by bouncing them through a third-party scanning service. -The key idea is a deliberate conflation of address and content. -The client asks the scanning service to scan a long URL -that is constructed to encode both the destination host and a data payload. -The destination receives the HTTP request and decodes its payload. -The destination sends data downstream by abusing HTTP redirection, -instructing the scanning service to send another -HTTP request back to the client, with a different payload. -The resistance to blocking of the OSS system hinges -on the abundance of online scanning services that exist. +\begin{figure} +\centering +\includegraphics{figures/snowflake} +\caption{ +Diagram of Snowflake. +} +\label{fig:snowflake} +\end{figure} -% https://trac.torproject.org/projects/tor/ticket/7559 -OSS was never deployed to users. -I judged its overhead and potential to annoy webmasters -to be too great to be practical. -The core idea, however, did see use -as a rendezvous method for flash proxy. -In this method, a helper program -would encode the client's IP address -into a URL\@. The user would then copy and paste the URL into any online scanning service, -which would then forward the information to the flash proxy system. -In fact, this URL encoding was used internally by -the domain fronting--based rendezvous as well, -using a URL as a convenient vehicle for data transfer. +Flash proxy revisited -OSS appeared in the 2013 research paper -``OSS: Using Online Scanning Services for Censorship Circumvention''~\cite{Fifield2013a}, -which I coauthored with -Gabi Nakibly and Dan Boneh. +WebRTC fingerprinting -\section{Snowflake, a circumvention system} +Engineering challenges I am working on a new circumvention system, a transport for Tor called Snowflake. @@ -2049,66 +2350,6 @@ the project's wiki page~\cite{snowflake-wiki}. Mia Gil Epner and I wrote a preprint on the fingerprinting hazards of WebRTC~\cite{FifieldGilEpnerWebRTC}. - -\chapter{How circumvention technologies are evaluated} - -Evaluating the quality of circumvention systems is tricky, -whether they are only proposed or actually deployed. -The problem of evaluation is directly tied to threat modeling. -Circumvention is judged according to how well it works -under a given model; -the evaluation is therefore meaningful only as far as -the threat model reflects reality. -Without grounding in reality, researchers -risk running an imaginary arms race -that evolves independently of the real one. - -This kind of work is rather different than -the direct evaluations of circumvention tools -that have happened before, for example those done by -the Berkman Center~\cite{Berkman2011} -and Freedom House~\cite{FreedomHouse2011} in 2011. -Rather than testing tools against censors, we evaluated -how closely calibrated designers' own models were to -models derived from actual observations of censors. - -This research was partly born out of -frustration with some typical assumptions made -in academic research on circumvention, -which we felt placed undue emphasis -on steganography and obfuscation of traffic streams, -while not paying enough attention to -the perhaps more important problems of bridge distribution and rendezvous. -Indeed, in our survey of over 50 circumvention tools, -we found that academic designs tended to be concerned -with detection in the steady state after a connection -is established, -while actually deployed systems cared more about -how the connection is established initially. -We wanted to help bridge the gap by laying out a research agenda -to align the incentives of researchers with those of circumventors. -This work was built on extensive surveys -of circumvention tools, measurement studies, -and known censorship events against Tor. - -This work on evaluation appeared in the 2016 research paper -``Towards Grounding Censorship Circumvention in Empiricism''~\cite{Tschantz2016a}, -which I coauthored with -Michael Carl Tschantz, Sadia Afroz, and Vern Paxson. - -Do they check the right things? - -what's used and what's not used - -\section{Flash proxy} - -\section{Domain fronting and meek} - - -\section{Snowflake: flash proxy revisited} - -WebRTC fingerprinting - % \section{How does it end?} % Probably the circumstances of the world change