diff --git a/censor.bib b/censor.bib index 1b0a3fc..053e814 100644 --- a/censor.bib +++ b/censor.bib @@ -1,3 +1,48 @@ +@inproceedings{Nasr2017a, + author = {Milad Nasr and Hadi Zolfaghari and Amir Houmansadr}, + title = {The Waterfall of Liberty: Decoy Routing Circumvention that Resists Routing Attacks}, + booktitle = {Computer and Communications Security}, + publisher = {ACM}, + year = {2017}, + url = {https://acmccs.github.io/papers/p2037-nasrA.pdf}, +} + +@inproceedings{McLachlan2009a, + author = {Jon McLachlan and Nicholas Hopper}, + title = {On the risks of serving whenever you surf: Vulnerabilities in {Tor}'s blocking resistance design}, + booktitle = {Workshop on Privacy in the Electronic Society}, + publisher = {ACM}, + year = {2009}, + url = {https://www-users.cs.umn.edu/~hopper/surf_and_serve.pdf}, +} + +@inproceedings{Wang2017a, + author = {Zhongjie Wang and Yue Cao and Zhiyun Qian and Chengyu Song and Srikanth V. Krishnamurthy}, + title = {Your State is Not Mine: A Closer Look at Evading Stateful {Internet} Censorship}, + booktitle = {Internet Measurement Conference}, + publisher = {ACM}, + year = {2017}, + url = {http://www.cs.ucr.edu/~krish/imc17.pdf}, +} + +@inproceedings{Li2017a, + author = {Fangfan Li and Abbas Razaghpanah and Arash Molavi Kakhki and Arian Akhavan Niaki and David Choffnes and Phillipa Gill and Alan Mislove}, + title = {lib$\cdot$erate, (n): A library for exposing (traffic-classification) rules and avoiding them efficiently}, + booktitle = {Internet Measurement Conference}, + publisher = {ACM}, + year = {2017}, + url = {https://people.cs.umass.edu/~phillipa/papers/imc2017_liberate_paper.pdf}, +} + +@inproceedings{Morshed2017a, + author = {Mehrab Bin Morshed and Michaelanne Dye and Syed Ishtiaque Ahmed and Neha Kumar}, + title = {When the {Internet} Goes Down in {Bangladesh}}, + booktitle = {Computer-Supported Cooperative Work and Social Computing}, + publisher = {ACM}, + year = {2017}, + url = {https://nehakumardotorg.files.wordpress.com/2014/03/p1591-bin-morshed.pdf}, +} + @inproceedings{Singh2017a, author = {Rachee Singh and Rishab Nithyanand and Sadia Afroz and Paul Pearce and Michael Carl Tschantz and Phillipa Gill and Vern Paxson}, title = {Characterizing the Nature and Dynamics of {Tor} Exit Blocking}, @@ -103,7 +148,7 @@ booktitle = {Symposium on Security \& Privacy}, publisher = {IEEE}, year = {2017}, - url = {http://www.ieee-security.org/TC/SP2017/papers/586.pdf}, + url = {https://www.ieee-security.org/TC/SP2017/papers/586.pdf}, } @article{Heydari2017a, @@ -313,7 +358,7 @@ booktitle = {European Symposium on Security \& Privacy}, publisher = {IEEE}, year = {2016}, - url = {http://www3.cs.stonybrook.edu/~phillipa/papers/castle.pdf}, + url = {https://people.cs.umass.edu/~phillipa/papers/castle.pdf}, } @inproceedings{Ellard2015a, @@ -456,7 +501,7 @@ booktitle = {Ethics in Networked Systems Research}, publisher = {ACM}, year = {2015}, - url = {http://ensr.oii.ox.ac.uk/wp-content/uploads/2015/07/Forgive-Us-Our-SYNs-Technical-and-Ethical-Considerations-for-Measuring-Internet-Censorship.pdf} + url = {http://ensr.oii.ox.ac.uk/wp-content/uploads/2015/07/Forgive-Us-Our-SYNs-Technical-and-Ethical-Considerations-for-Measuring-Internet-Censorship.pdf}, } @inproceedings{Smits2011a, @@ -755,7 +800,7 @@ booktitle = {Conference on Online Social Networks}, publisher = {ACM}, year = {2013}, - url = {http://www.ccs.neu.edu/home/cbw/pdf/weibo-cosn13.pdf}, + url = {https://cbw.sh/static/pdf/weibo-cosn13.pdf}, } @inproceedings{Clayton2006a, @@ -829,7 +874,7 @@ title = {An anomaly-based censorship-detection system for {Tor}}, institution = {The Tor Project}, year = {2011}, - url = {https://research.torproject.org/techreports/detector-2011-09-09.pdf} + url = {https://research.torproject.org/techreports/detector-2011-09-09.pdf}, } @inproceedings{Detal2013a, @@ -957,7 +1002,7 @@ publisher = {ACM}, title = {{Cirripede}: Circumvention Infrastructure using Router Redirection with Plausible Deniability}, year = {2011}, - url = {http://hatswitch.org/~nikita/papers/cirripede-ccs11.pdf}, + url = {https://hatswitch.org/~nikita/papers/cirripede-ccs11.pdf}, } @inproceedings{Houmansadr2013a, @@ -1066,7 +1111,7 @@ journal = {American Political Science Review}, title = {How Censorship in {China} Allows Government Criticism but Silences Collective Expression}, year = {2012}, - url = {http://gking.harvard.edu/files/censored.pdf}, + url = {https://gking.harvard.edu/files/censored.pdf}, } @inproceedings{Knockel2011a, @@ -1112,7 +1157,7 @@ publisher = {IEEE}, title = {Extensive Analysis and Large-Scale Empirical Evaluation of {Tor} Bridge Discovery}, year = {2012}, - url = {http://www.cs.uml.edu/~xinwenfu/paper/Bridge.pdf}, + url = {https://www.cs.uml.edu/~xinwenfu/paper/Bridge.pdf}, } @inproceedings{Liu2011a, @@ -1129,7 +1174,7 @@ title = {The Great {DNS} Wall of {China}}, institution = {New York University}, year = {2007}, - url = {https://cs.nyu.edu/~pcw216/work/nds/final.pdf}, + url = {https://censorbib.nymity.ch/pdf/Lowe2007a.pdf}, } @inproceedings{Luchaup2014a, @@ -1366,7 +1411,7 @@ booktitle = {Foundations \& Practice of Security}, publisher = {Springer}, year = {2013}, - url = {http://grothoff.org/christian/fps2013wachs.pdf}, + url = {https://grothoff.org/christian/fps2013wachs.pdf}, } @inproceedings{Wachs2014a, @@ -1384,7 +1429,7 @@ booktitle = {USENIX Security Symposium}, publisher = {USENIX}, year = {2000}, - url = {http://www.eecs.harvard.edu/~mema/courses/cs264/papers/waldman00publius.pdf}, + url = {https://www.eecs.harvard.edu/~mema/courses/cs264/papers/waldman00publius.pdf}, } @inproceedings{Waldman2001a, @@ -1394,7 +1439,7 @@ publisher = {ACM}, year = {2001}, pages = {126--135}, - url = {http://www.cs.nyu.edu/~waldman/tangler.ps}, + url = {https://www.cs.nyu.edu/~waldman/tangler.ps}, } @inproceedings{Wang2012a, @@ -1403,7 +1448,7 @@ booktitle = {Computer and Communications Security}, publisher = {ACM}, year = {2012}, - url = {http://hatswitch.org/~nikita/papers/censorspoofer.pdf}, + url = {https://hatswitch.org/~nikita/papers/censorspoofer.pdf}, } @inproceedings{Wang2013a, @@ -1430,7 +1475,7 @@ booktitle = {Computer and Communications Security}, publisher = {ACM}, year = {2012}, - url = {http://www.frankwang.org/papers/ccs2012.pdf}, + url = {https://www.frankwang.org/files/papers/ccs2012.pdf}, } @techreport{Wiley2011a, diff --git a/local.bib b/local.bib index b50377a..6585596 100644 --- a/local.bib +++ b/local.bib @@ -248,7 +248,7 @@ @misc{sixfour, author = {{Mixter}}, - title = {The {Six/Four} System}, + title = {The {Six/Four System}}, year = 2003, month = feb, url = {https://web.archive.org/web/20030630201226/http://mixter.warrior2k.com:80/sixfourdocs/intro.html}, @@ -256,14 +256,34 @@ % https://en.wikipedia.org/wiki/Hacktivismo#The_Six.2FFour_System % https://web.archive.org/web/20031002125618/http://www.hacktivismo.com:80/news/modules.php?name=Content&pa=showpage&pid=19 -@techreport{ten-ways-discover-tor-bridges, +@techreport{tor-techreport-2006-11-001, + title = {Design of a blocking-resistant anonymity system}, + author = {Roger Dingledine and Nick Mathewson}, + institution = {The Tor Project}, + number = {2006-11-001}, + month = nov, + year = 2006, + url = {https://research.torproject.org/techreports/blocking-2006-11.pdf}, +} + +@techreport{tor-techreport-2011-05-001, + author = {Roger Dingledine}, + title = {Strategies for getting more bridge addresses}, + institution = {The Tor Project}, + number = {2011-05-001}, + month = may, + year = 2011, + url = {https://research.torproject.org/techreports/strategies-getting-more-bridge-addresses-2011-05-13.pdf}, +} + +@techreport{tor-techreport-2011-10-002, title = {Ten ways to discover Tor bridges}, author = {Roger Dingledine}, institution = {The Tor Project}, number = {2011-10-002}, month = oct, year = 2011, - note = {\url{https://research.torproject.org/techreports/ten-ways-discover-tor-bridges-2011-10-31.pdf}}, + url = {https://research.torproject.org/techreports/ten-ways-discover-tor-bridges-2011-10-31.pdf}, } @article{NeumannWeinsteinRisks, @@ -444,6 +464,14 @@ url = {https://lists.torproject.org/pipermail/tor-dev/2014-March/006356.html}, } +@misc{tor-talk-bridge-announce, + author = {Roger Dingledine}, + title = {Please run a bridge relay! (was {Re}: {Tor} 0.2.0.13-alpha is out)}, + month = dec, + year = 2007, + url = {https://lists.torproject.org/pipermail/tor-talk/2007-December/003854.html}, +} + @misc{tor-trac-8860, author = {Arlo Breault and David Fifield and George Kadianakis}, title = {Registration over {App Engine}}, @@ -858,11 +886,90 @@ url = {https://wikileaks.org/sony/docs/05/docs/Anti-Piracy/CDSA/EANTC-Survey-1.5-unsecured.pdf}, } -@inproceedings{McLachlan2009a, - author = {Jon McLachlan and Nicholas Hopper}, - title = {On the risks of serving whenever you surf: Vulnerabilities in {Tor}'s blocking resistance design}, - booktitle = {Workshop on Privacy in the Electronic Society}, - publisher = {ACM}, - year = {2009}, - url = {https://www-users.cs.umn.edu/~hoppernj/surf_and_serve.pdf}, +@misc{SafeWebTriangleBoy, + author = {{SafeWeb}}, + title = {{TriangleBoy} Whitepaper}, + url = {http://www.webrant.com/safeweb_site/html/www/tboy_whitepaper.html}, +} + +@inproceedings{Martin2002a, + author = {David Martin and Andrew Schulman}, + title = {Deanonymizing Users of the {SafeWeb} Anonymizing Service}, + booktitle = {USENIX Security Symposium}, + publisher = {USENIX}, + year = 2002, + url = {https://www.usenix.org/legacy/publications/library/proceedings/sec02/martin.html}, +} + +@misc{ReQrypt, + author = {basil00}, + title = {{ReQrypt}}, + url = {https://reqrypt.org/reqrypt.html}, +} + +@misc{BridgeDB, + author = {{The Tor Project}}, + title = {{BridgeDB}}, + url = {https://bridges.torproject.org/}, +} + +@article{wired-china-3, + author = {Geremie R. Barme and Ye Sang}, + title = {The Great Firewall of {China}}, + month = jun, + year = {1997}, + journal = {Wired}, + url = {https://archive.wired.com/wired/archive/5.06/china_pr.html}, +} + +@misc{Meeks1996a, + author = {Brock N. Meeks and Declan B. McCullagh}, + title = {Jacking in from the \enquote{Keys to the Kingdom} Port}, + month = jul, + year = {1996}, + howpublished = {CyberWire Dispatch}, + url = {https://cyberwire.com/cwd/cwd.96.07.03.html}, +} + +@misc{Peacefire-censorware, + author = {Bennett Haselton}, + title = {{Peacefire} Censorware Pages}, + howpublished = {Peacefire}, + url = {http://www.peacefire.org/censorware/}, +} + +@misc{Peacefire-circumventor, + author = {Bennett Haselton}, + title = {{Circumventor}}, + howpublished = {Peacefire}, + url = {http://peacefire.org/circumventor/}, +} + +@misc{CGIProxy, + author = {James Marshall}, + title = {{CGIProxy}}, + url = {https://jmarshall.com/tools/cgiproxy/}, +} + +@misc{Psiphon1.0, + title = {{Psiphon}}, + author = {{The Citizen Lab}}, + month = oct, + year = {2006}, + url = {https://web.archive.org/web/20061026081356/http://psiphon.civisec.org/}, +} + +@article{theguardian-how-to-get-around-turkeys-twitter-ban, + author = {Elena Cresci}, + title = {How to get around {Turkey's} {Twitter} ban}, + journal = {The Guardian}, + month = mar, + year = {2014}, + url = {https://www.theguardian.com/world/2014/mar/21/how-to-get-around-turkeys-twitter-ban}, +} + +@misc{refraction-network, + title = {Refraction Networking}, + key = {Refraction networking}, + url = {https://refraction.network/}, } diff --git a/thesis.tex b/thesis.tex index d8e4ee6..c4ce977 100644 --- a/thesis.tex +++ b/thesis.tex @@ -47,7 +47,7 @@ \usepackage{yfonts} -\newcommand{\dragons}{\bigskip\noindent\textfrak{\Large here be dragons:}\bigskip} +\newcommand{\dragons}{\bigskip\noindent\textfrak{\Large here be dragons:}\bigskip\noindent} \begin{document} @@ -229,7 +229,7 @@ Other forms of censorship that are \emph{not} in scope include: \item anything that takes place entirely within the censor's network and does not cross the border \item forum moderation and deletion of social media posts -\item deletion-resistant publishing like +\item deletion-resistant publishing in the vein of the Eternity Service~\cite{Anderson1996a} (what Köpsell and Hillig call ``censorship resistant publishing systems''), except @@ -544,6 +544,34 @@ What I call ``detection'' and ``blocking,'' Khattak, Elahi, et~al.\ call ``fingerprinting'' and ``direct censorship''~\cite[\S~2.3]{Khattak2016a}, and Tschantz et~al.\ call ``detection'' and ``action''~\cite[\S~II]{Tschantz2016a-local}. +A major difficulty in developing circumvention systems is that +however much you model and try to predict the reactions of a censor, +real-world stress testing is expensive. +If you really want to test a design against a censor, +not only must you write and deploy an implementation, +integrate it with client-facing software like web browser, +and work out details of distribution---you +must also attract enough users to +merit a censor's attention. +Any system, even a fundamentally broken one, +will work to circumvent most censors, +as long as it is used only by one or a few clients. +The true test arises only after the system has begun to scale +and the censor to fight back. +This phenomenon may have contributed to the unfortunate +characterization of censorship and circumvention as a cat-and-mouse game: +deploying a weak circumvention system, +watching it get blocked as it becomes popular, +and starting over again with another similarly weak system. +In my opinion, the cat-and-mouse game is not inevitable. +It is possible to develop systems that resist blocking---not +absolutely, but quantifiably in terms of costs to the blocker---even +after it has become popular. +We should think of the honeymoon period +while a system is too small to be worth noticing, +not as the beginning and end of a system's useful life, +but as a time to work out growing pains. + \section{Collateral damage} \label{sec:collateral-damage} @@ -954,76 +982,230 @@ it is not necessarily useful for detecting other server instances. \section{Address blocking resistance strategies} \label{sec:address-strategies} -\dragons - -Resistance to blocking by address; -obfuscated protocol then prevents blocking by content. - -\begin{itemize} -\item Untrusted Messenger Discovery~\cite{Feamster2003a} -\item Kaleidoscope~\cite{Sovran2008a,Sovran2008b} -\item Mahdian~\cite{Mahdian2010a} -\item Proximax~\cite{McCoy2011a} -\item rBridge~\cite{Wang2013a} -\item Salmon~\cite{Douglas2016a} -\item Hyphae~\cite{LovecruftDeValence2017a} -\item Enemy at the Gateways~\cite{Nasr2017a} -\end{itemize} - -GFW enumerated HTTPS- and email-sourced bridges -\cite{ten-ways-discover-tor-bridges} - -In the usual threat models, though, the censor is assumed to be quite powerful, -capable of dropping, replaying, and forging arbitrary packets, -of \dots -there is usually a concession to the censor needing to operate at line rate, -or of needing to protect important communications (which is an argument about collateral damage), -which provides the weakness that the circumvention system in question exploits. -we already know that such a strong censor model is a fiction for national censors, -for example the GFW acts like an ``on-path'' network monitor -that can inject, but not drop, packets. -the very strong threat model may be -appropriate for e.g. whitelisting corporate or university censors - - -The mass censors we know are weak if you are not being specifically targeted -Pick a proxy server used by you and no one else -Do any silly thing for obfuscation, it will work, because who cares -There are true challenges in making it scale to large numbers of users -and an adaptive adversary -The cat-and-mouse game is not inevitable---don't think of it as -``circumvention works until it gets popular, then it gets blocked'' -rather as -``you get a free ride until you get popular, after that your thing has to actually work.'' - -Generic rendezvous: BridgeDB and others - -Mass scanning for bridges -Durumeric et~al.~\cite[\S~4.4]{Durumeric2013a} found about 80\% -of Tor bridges by scanning TCP ports 443 and 9001 on IPv4. - -depending on physical aspects of networks -Denali - -infrastructure-based, decoy routing and domain fronting - -Tying questions of ethics\index{ethics} to questions about censor behavior, motivation: -\cite{Wright2011a} (also mentions ``organisational requirements, administrative burden'') -\cite{Jones2015a} -\cite{Crandall2015a} -Censors may come to conclusions different than what we expect -(have a clue or not). - -``Decoy routing'' systems put proxies at the middle of network paths. -A special cooperating router lies between the client and the apparent destination of a TCP stream. -The router looks for a special cryptographic ``tag'' that is undetectable to the censor. -On finding a tag, the router begins to redirect the client's traffic -away from its declared destination and towards a censored destination instead. -There are several decoy routing proposals, each with advantages and disadvantages; -those that began the line of research are called -Curveball~\cite{Karlin2011a}, -Telex~\cite{Wustrow2011a}, and -Cirripede~\cite{Houmansadr2011a}. +The first-order solution for reaching a destination +whose address is blocked is to instead route through a proxy. +But a single, static proxy is not much better than direct access, +from a circumvention point of view---a censor can block +the proxy just as easily as it can block the destination. +Circumvention systems must come up with ways +of addressing this problem. + +There are two reasons why resistance to blocking by address +is challenging. +The first is due to the nature of network routing: +the client must, somehow, encode +the address of the destination into what it sends, +where it can be observed by the censor, +if the encoding is sufficiently transparent. +The second is the insider attack: +legitimate clients must have some way to discover +addresses of, e.g., proxies. +By pretending to be a legitimate client, +the censor can learn those addresses in the same way. + +Compared to content obfuscation, +there are relatively few strategies for +resistance to blocking by address. +They are basically five: +private proxies shared by only a few clients; +having a large population of secret proxies and +distributing them carefully; +having a very large population of proxies and +treating them as disposable; +proxying through a service with high collateral damage; +and address spoofing. + +The simplest proxy infrastructure is no infrastructure at all: +require every client to set up and maintain a proxy +for their own personal use, or for a few of their friends. +As long as the use of any single address remains low, +it may escape the censor's notice~\cite[\S~4.2]{tor-techreport-2006-11-001}. +The problem with this strategy, of course, is usability and scalability. +If it were easy for everyone to set up their own proxy +on an unblocked address, they would do it, +and blocking by address would not be a concern. +The challenge is making such techniques general +so they are usable by more than experts. +uProxy~\cite{uproxy} is now working on just that: +automating the process of setting up a proxy on a server. + +What Köpsell and Hillig call the ``many access points'' model +has been adopted in some form by many circumvention systems. +In this model, there are many proxies in operation. +They may be full-fledged general-purpose proxies, +or only simple forwarders to a more capable proxy. +They may be operated by volunteers or coordinated centrally. +In any case, the success of the system hinges on +being able to sustain a population of proxies, and +distribute information about them to legitimate users, +without revealing them all to the censor. +Both of these considerations pose challenges. + +Tor's blocking resistance design~\cite{tor-techreport-2006-11-001}, +based on secret proxies called ``bridges,'' was of this kind. +Volunteers run bridges, which report themselves to central database +called BridgeDB~\cite{BridgeDB}. +Clients contact BridgeDB through some unblocked out-of-band channel +(HTTPS, email, or word of mouth) in order to learn bridge addresses. +The BridgeDB server takes steps to prevent easy enumeration of the entire database. +Each request returns only a small set of bridges, +and repeated requests by the same client +return the same small set +(keyed by a hash of the client's IP address prefix or email address). +Requests through the HTTPS interface require the client +to solve a captcha, and email requests are permitted only +from the domains of email providers that are known to +limit the rate of account creation. +The population of bridges is partitioned into ``pools''---one +pool for HTTPS distribution, one for email, and so on---so that +an exploit allowing enumeration of one distribution method +does not affect the others. +But even these defenses may not be enough: +despite public appeals for volunteers to run bridges +(see for example Dingledine's initial call in 2007~\cite{tor-talk-bridge-announce}), +there have never been more than a few thousand of them, +and Dingledine reported in 2011 that the Great Firewall of China +had managed to enumerate both the HTTPS and email distribution +pools~\cite[\S~1]{tor-techreport-2011-05-001}\cite[\S~1]{tor-techreport-2011-10-002}, +presumably taking advantage of its greater resources. +% (A curious fact, though, is that nearly But nearly all clients use the default bridges~\cite{Matic2017a}. +% I will cover this seeming paradox in more detail in +% \autoref{chap:proxy-probe}.) + +Tor relies on BridgeDB to provide address blocking resistance +for all its transports that otherwise only have content obfuscation. +And that is a great strength of such a system. +It enables, to some extent, content obfuscation to be developed independently, +and rely on an existing generic proxy distribution mechanism +in order to produce an overall plausibly working system. +There is a whole line of research, in fact, +on the question of how best to distribute information +about an existing population of proxies, +which is known as the ``bridge distribution problem'' +or ``proxy discovery problem.'' +I will give just a summary of various proposals. +\todo{Short summaries of proxy distribution papers.} +\todo{Better understanding of Kaleidoscope.} +\todo{Enemy at the Gateways~\cite{Nasr2017a}} +% Keyspace hopping~\cite{Feamster2003a} has each client switch +% between a small number of proxies according to a pseudorandom schedule; +% Kaleidoscope~\cite{Sovran2008b,Sovran2008a} +% leverages real-world trust connections in order to inhibit sybils; +% Mahdian~\cite{Mahdian2010a} +% treats algorithmically a simplified version of the problem +% and shows how to isolate malicious client nodes; +% Proximax~\cite{McCoy2011a} +% rBridge~\cite{Wang2013a} +% Salmon~\cite{Douglas2016a} +% Hyphae~\cite{LovecruftDeValence2017a} + +A way to make proxy distribution more robust against censors +(but at the same time less usable by clients) +is to ``poison'' the set of proxy addresses +with the addresses of important servers, +blocking which would result in high collateral damage. +VPN Gate employed this idea~\cite[\S~4.2]{Nobori2014a}, +mixing into the their public proxy list +the addresses of root DNS servers +and Windows Update servers. + +Apart from ``in-band'' discovery of bridges +via subversion of a proxy distribution system, +one must also worry about ``out-of-band'' discovery, +for example by mass scanning~\cite[\S~6]{tor-techreport-2011-10-002}\cite[\S~9.3]{tor-techreport-2006-11-001}. +Durumeric et~al. found about 80\% of existing (unobfuscated) +Tor bridges~\cite[\S~4.4]{Durumeric2013a} +by scanning all of IPv4 on a handful of common bridge ports. +% surf and serve~\cite{McLachlan2009a} (didn't actually scan) +% extensive analysis~\cite{Ling2012a} (didn't scan) +Matic et~al. had similar results in 2017~\cite[\S~V.D]{Matic2017a}, +using public search engines in lieu of active scanning. +The best solution to the scanning problem is to +do as ScrambleSuit and obfs4 do, +and associate with each proxy a secret, +without which a client cannot initiate a connection. +The critical part is that the +IP address and port must not constitute +the whole of the information needed to connect to the proxy. +Scanning for bridges is closely related to +active probing, the topic of \autoref{chap:active-probing}. + +An alternative way of achieving address blocking resistance +is to treat proxies as temporary and disposable, +rather than permanent and valuable. +This is the idea underlying +flash proxy~\cite{Fifield2012a-local} and Snowflake~\cite{snowflake-wiki}. +(Snowflake is the topic of \autoref{chap:snowflake}.) +Even proxy distribution strategies that take churn into account +have in mind proxies that last on the order of at least days. +In contrast, disposable proxies may last only minutes or hours. +Setting up a Tor bridge or even something lighter-weight +like a SOCKS proxy still requires installing some software +on a server somewhere. +Flash proxy and Snowflake proxies have a low set-up and tear-down cost: +you can run one just by visiting a web page. +These designs do not to need a sophisticated proxy distribution strategy +as long as the rate of proxy creation is kept higher than the censor's +rate of discovery. + +The logic behind diffusing many proxies widely +is that a censor would have to block large swaths of the Internet +in order to effectively block them. +However, it also makes sense to take the opposite tack: +have just one or a few proxies, +but choose them to have such high collateral damage +that the censor does not dare block them. +% Pudd'nhead Wilson: Put all your eggs in the one basket and---watch that basket! +Refraction networking~\cite{refraction-network}, +also called decoy routing, +puts proxy capability into network routers---in +the middle of paths, rather than at the end. +Clients tag certain flows in a way that is invisible +to the censor but detectable to a refraction-capable router, +which redirects from its apparent destination to some other, +covert destination. +The censor has to induce routes that avoid the special routers~\cite{Schuchard2012a}, +which is costly~\cite{Houmansadr2014a}. +Domain fronting~\cite{Fifield2015a-local} +has similar properties. +Rather than a router, it uses another kind of +network intermediary: a content delivery network. +Using properties of HTTPS, a client may request one site +while appearing (to the censor) to request another. +Domain fronting is the topic of \autoref{chap:domain-fronting}. + +The final strategy for address blocking resistance is address spoofing. +The notable design in this category is +CensorSpoofer~\cite{Wang2012a}. +A CensorSpoofer client never communicates directly with a proxy. +It sends upstream data +through a low-bandwidth, indirect channel such as email or instant messaging, +and downstream data through a simulated VoIP conversation, +spoofed to appear as if it were coming from some unrelated dummy IP address. +The asymmetric design is feasible because of the nature +of web browsing: typical clients send much less than they receive. +The client never even needs to know the actual address of the proxy, +meaning that CensorSpoofer has high resistance to insider attack: +even running the same software as a legitimate client, +the censor does not learn enough information to effect a block. +The idea of address spoofing goes back farther; +as early as 2001 +TriangleBoy~\cite{SafeWebTriangleBoy} +employed lighter-weight intermediate proxies that +would simply forward client requests +to a long-lived proxy at a static, easily blockable address. +% But http://nms.csail.mit.edu/papers/disc-pet2003.pdf footnote 3 says: "TriangleBoy nodes must be trusted (since they are intermediaries in the SSL handshake with the Safeweb server). +In the downstream direction, the long-lived proxy would, +rather than route back through the intermediate proxy, +spoof its responses so they appeared to originate from the intermediate proxy. +TriangleBoy did not match CensorSpoofer's resistance to insider attack, +because clients still needed to find and communicate directly with a proxy, +so the whole system basically reduced to the proxy discovery problem, +despite the use of address spoofing. + +% ReQrypt~\cite{ReQrypt}, introduced in 2017, +% proxies only in one direction. +% [no spoofing]. \section{Spheres of influence and visibility} @@ -1115,6 +1297,10 @@ conflicting goals of of sensitivity (recording all that is relevant) and selectivity (recording \emph{only} what is relevant) give rise to an unavoidable ``eavesdropper's dilemma.'' +% risks of flow blocking (Telex/TapDance~\cite{Frolov2017a}) +% http://www.icir.org/vern/papers/activemap-oak03.pdf +% http://www.icir.org/vern/papers/norm-usenix-sec-01.pdf + Monitor evasion techniques can be used to reduce a censor's sphere of visibility---eliminating certain @@ -1144,49 +1330,110 @@ identifying classes of working evasions and estimating the cost to counteract them. -\section{Active probing} - -\dragons - - \section{Early censorship and circumvention} -\dragons - -Early censors (around the time of the late 1990s and early 2000s) -would be considered weak by today's standards. -They were mostly easy to circumvent by simple countermeasures, +Internet censorship and circumvention began to rise to importance +in the mid-1900s, conciding with the popularization of the World Wide Web. +At that time, online censorship focused mainly on the web. +Computer security companies were developing technology +for IP address, URL, and web page filtering. +Even before national-level censorship by governments +became an issue, researchers investigated +the blocking policies of personal firewall products---those +intended, for example, for parents to install on the family computer. +Meeks and McCullagh~\cite{Meeks1996a} reported in 1996 +on the secret blocking lists of several programs. +Bennett Haselton and Peacefire~\cite{Peacefire-censorware} +found many cases of programs blocking more than they claimed, +including web sites related to politics and health. + +% Tools for Rendering Censorship Firewalls Ineffective +% http://cypherpunks.venona.com/date/1996/09/msg02561.html + +Governments were not far behind in building legal +and technical structures to control the flow of information +on the web. +The term ``Great Firewall of China'' first appeared in an article in +\textsl{Wired} magazine~\cite{wired-china-3} in 1997. +In some cases adapting the same +technology originally developed for personal firewalls. +In the wake of the first signs of blocking by ISPs, % DFN/Radikal? +people were thinking about how to bypass filters. +The circumvention systems of that era were largely +HTML-rewriting web proxies: +essentially a form on a web page into which a client would enter a URL. +The server would fetch the desired URL on behalf of the client, +and before returning the response, rewrite all the links +and external references in the page to make the relative +to the proxy. +CGIProxy~\cite{CGIProxy}, +SafeWeb~\cite{Martin2002a}, +Circumventor~\cite{Peacefire-circumventor}, +and the first version of Psiphon~\cite{Psiphon1.0} +were all of this kind. + +These systems were effective against their censors of their day---at +least with respect to destination blocking. +And they had the major advantage of requiring no +special client-side software other than a web browser. +The difficulty they faced was second-order blocking +as censors discovered and blocked the proxies themselves. +Circumvention designers deployed some countermeasures; +for example Circumventor had a mailing list~\cite[\S~7.4]{tor-techreport-2006-11-001} +which would send out fresh proxy addresses every few days. +A 1996 article by Rich Morin~\cite{Morin1996Rover} +presented a prototype HTML-rewriting proxy called Rover, +which eventually became CGIProxy. +The article predicted the failure of censorship +based on URL or IP address, +as long as a significant fraction of web servers +ran such proxies. +That vision clearly did not come to pass. +Accumulating a sufficient number of proxies +and communicating their addresses securely to clients---in +short, the proxy distribution problem---turned +out not to follow automatically, +but to be a major sub-problem of its own. + +% Around 2001, Peekabooty, Six/Four. +% Peekabooty whitepaper, uses "piggybacking" for collateral damage. + +Threat models had to evolve along with +censor capabilities. +The first censors +would be considered weak by today's standards, +mostly easy to circumvent by simple countermeasures, such as tweaking a protocol or using an alternative DNS server. -But as censors become more capable, -our models have to evolve to match. -Indeed, my interest in threat modeling -might be described as a sort of meta-modeling, -learning about how threat models change -over time and according to circumstances. - -\cite{Clayton2006a} -\cite{Clayton2006b} -Thailand (1996, first?) - -\cite{peacefire-list-of-possible-weaknesses} -\cite{dit-hj} first report on DNS hijacking? -Freedom House Freedom on the Net -\index{Freedom on the Net} - -anonymizer, dialectizer sites -HTML rewriting proxies -(BIFSO article predicting failure of censorship, leading to CGIProxy?)~\cite{Morin1996Rover} - -changing dns servers - -relationship of censorship to network monitoring/NIDS -risks of flow blocking (Telex/TapDance~\cite{Frolov2017a}) -% http://www.icir.org/vern/papers/activemap-oak03.pdf -% http://www.icir.org/vern/papers/norm-usenix-sec-01.pdf - -The early development \emph{was} an arms race -or cat-and-mouse game, -but there is no reason to assume it will always be so. +(We see the same progression play out again +when countries begin to experiment with censorship, +such as in Turkey in 2014, where alternative DNS servers +briefly sufficed to circumvent a block of Twitter~\cite{theguardian-how-to-get-around-turkeys-twitter-ban}.) +Not only censors were changing---the world around them +was changing as well. +In this field that is so heavily affected by concerns +about collateral damage, the milieu in which +censors operate is as important as the censors themselves. +A good example of this is the paper on Infranet, +the first academic circumvention design I am aware of. +Its authors argued, in 2001, +that TLS would not suffice as a cover protocol~\cite[\S~3.2]{Feamster2002a}, +because the relatively few TLS-using services at that time +could \emph{all} be blocked without much harm. +Certainly the circumstances are different today---domain +fronting and all refraction networking schemes require +the censor to permit TLS. +As long as circumvention remains relevant, +it will have to change along with changing times, +just as censors do. + +% Anonymizer, Zero Knowledge Freedom WebSecure (according to Feamster2003a) +% Gpass, Freegate? + +% http://www.peacefire.org/circumventor/list-of-possible-weaknesses.html +% 'The "human shield" fallacy': seems to discount collateral damage, but then says 'the Chinese would probably never block [Web traffic and email], at least not without rendering the Internet essentially useless' +% Also discounts possibility of SSL. + +% \cite{dit-hj} first report on DNS hijacking? \chapter{Censor capabilities} @@ -1706,6 +1953,13 @@ not much is known about how much censorship costs to implement. In general, contemporary threat models tend to ignore resource limitations on the part of the censor. +Tying questions of ethics\index{ethics} to questions about censor behavior, motivation: +\cite{Wright2011a} (also mentions ``organisational requirements, administrative burden'') +\cite{Jones2015a} +\cite{Crandall2015a} +Censors may come to conclusions different than what we expect +(have a clue or not). + \chapter{Circumvention systems} @@ -1795,8 +2049,6 @@ I helped analyze the network ``fingerprints'' of active probes and how they might be distinguished from connections by legitimate clients. -surf and serve\cite{McLachlan2009a} - The work on active probing appeared in the 2015 research paper ``Examining How the Great Firewall Discovers Hidden Circumvention Servers''~\cite{Ensafi2015b}, which I coauthored with @@ -1804,6 +2056,7 @@ Roya Ensafi, Philipp Winter, Nick Feamster, Nicholas Weaver, Vern Paxson. \chapter{Time delays in censors' reactions} +\label{chap:proxy-probe} \dragons @@ -1931,6 +2184,8 @@ Domain fronting appeared in the 2015 research paper ``Blocking-resistant communication through domain fronting''~\cite{Fifield2015a-local}, which I coauthored with Chang Lan, Rod Hynes, Percy Wegmann, and Vern Paxson. +CloudTransport~\cite{Brubaker2014a}, + \section{An unvarnished history of meek deployment}