diff --git a/thesis.tex b/thesis.tex index 90579ed..88b9e2d 100644 --- a/thesis.tex +++ b/thesis.tex @@ -15,7 +15,6 @@ \usepackage{relsize} \usepackage[table,x11names]{xcolor} \usepackage{tocloft} -\usepackage{todonotes} % http://grad.berkeley.edu/academic-progress/dissertation/: % "Margins: For the manuscript material, including headers, footers, tables, @@ -25,7 +24,7 @@ \usepackage{makeidx} \makeindex -\usepackage{idxlayout} +\usepackage[font=small]{idxlayout} % biblatex manual: % "When using the hyperref package, it is preferable to load it after biblatex." @@ -122,96 +121,81 @@ \index{appspot.com!zzz@\gobblecomma|seealso {Google App Engine}} \index{APT29|see {Cozy Bear}} \index{AS|see {autonomous system}} +\index{authentication!zzz@\gobblecomma|seealso {integrity}} \index{Azure|see {Microsoft Azure}} -\index{bridges|see {Tor bridges}} -\index{ciphersuite|see {TLS fingerprinting}} -\index{CDN|see {content delivery network}} +\index{bridge|see {Tor bridge}} +\index{certificate!zzz@\gobblecomma|seealso {common name (X.509; TLS)}} +\index{ciphersuite|see {TLS ciphersuite}} +\index{content delivery network|see {CDN}} +\index{classification!zzz@\gobblecomma|seealso {detection; false positive; false negative}} \index{CN|see {China; common name (X.509)}} \index{China!zzz@\gobblecomma|seealso {Great Firewall of China}} \index{CloudFront|see {Amazon CloudFront}} \index{Datagram TLS|see {DTLS}} \index{decoy routing|see {refraction networking}} -\index{default Tor bridge|see {Tor bridges, default}} -\index{domain fronting!zzza@\gobblecomma|seealso {front domain}} -\index{domain fronting!zzzb@\gobblecomma|seealso {meek}} +\index{default bridge|see {Tor bridge, default}} +\index{domain fronting!zzz@\gobblecomma|seealso {front domain; meek}} \index{Domain Name System|see {DNS}} \index{DPI|see {deep packet inspection}} +\index{DTLS!zzz@\gobblecomma|seealso {TLS}} \index{end-to-middle proxying|see {refraction networking}} \index{entropy!zzz@\gobblecomma|seealso {Kullback--Leibler divergence}} -\index{false positives!zzz@\gobblecomma|seealso {collateral damage}} +\index{false positive!zzz@\gobblecomma|seealso {collateral damage}} +\index{fingerprinting!zzz@\gobblecomma|seealso {TLS/DTLS fingerprinting}} \index{format-transforming encryption|see {FTE}} \index{GFW|see {Great Firewall of China}} -\index{Google App Engine!zzz@\gobblecomma|seealso {meek-google}} -\index{Hypertext Transfer Protocol|see {HTTP}} +\index{Google!App Engine!zzz@\gobblecomma|seealso {meek-google}} +\index{idle scan|see {hybrid idle scan}} \index{Interactive Connectivity Establishment|see {ICE}} +\index{indistinguishability|see {distinguishability}} +\index{integrity!zzz@\gobblecomma|seealso {authentication}} \index{injection|see {packet injection}} -\index{ISP|see {Internet service provider}} +\index{Internet service provider|see {ISP}} +\index{keyword filtering!zzz@\gobblecomma|seealso {blocking by content}} \index{microblogging!zzz@\gobblecomma|seealso {Twitter; Sina Weibo}} -\index{Microsoft Azure!zzz@\gobblecomma|seealso {meek-azure}} +\index{Microsoft!Azure!zzz@\gobblecomma|seealso {meek-azure}} \index{MITM|see {man in the middle}} \index{NDSS|see {Network and Distributed System Security Symposium}} -\index{NAT|see {network address translation}} +\index{network address translation|see {NAT}} \index{NIDS|see {intrusion detection}} \index{network intrusion detection system|see {intrusion detection}} -\index{nickname|see {Tor bridges, nickname}} +\index{nickname|see {Tor bridge, nickname}} \index{OpenSSH|see {obfuscated-openssh}} \index{overblocking|see {false positive}} \index{PETS|see {Privacy Enhancing Technologies Symposium}} \index{pluggable transports!zzz@\gobblecomma|seealso {flash proxy; FTE; meek; obfs2; obfs3; obfs4; ScrambleSuit; Snowflake}} -\index{port scanning!zzza@\gobblecomma|seealso {active probing}} -\index{port scanning!zzzb@\gobblecomma|seealso {hybrid idle scan}} -\index{precision|see {false positives}} -\index{proxy discovery problem|see {proxy distribution problem}} -\index{recall|see {false negatives}} +\index{port scanning!zzz@\gobblecomma|seealso {active probing; hybrid idle scan}} +\index{precision|see {false positive}} +\index{recall|see {false negative}} \index{Server Name Indication|see {SNI}} -\index{Secure Real-time Transport Protocol|see {SRTP}} \index{Secure Sockets Layer|see {TLS}} -\index{Session Traversal Utilities for NAT|see {STUN}} -\index{SRTP with Security Descriptions|see {SDES}} +\index{spoofing|see {address spoofing}} \index{SSL|see {TLS}} \index{TCP!flags|see {ACK; SYN; RST}} +\index{threat modeling|see {modeling}} \index{time to live|see {TTL}} \index{TLS!zzz@\gobblecomma|seealso {DTLS}} -\index{Transport Layer Security|see {TLS}} -\index{Transmission Control Protocol|see {TCP}} \index{type~I error|see {false positive}} \index{type~II error|see {false negative}} -\index{Traversal Using Relays around NAT|see {TURN}} -\index{User Datagram Protocol|see {UDP}} \index{virtual private network|see {VPN}} -\index{VoIP|see {voice over IP}} +\index{voice over IP|see {VoIP}} \index{relative entropy|see {Kullback--Leibler divergence}} \index{reset|see {RST}} -\index{Secure Shell|see {SSH}} -\index{Uniform Resource Locator|see {URL}} \index{U.S.|see {United States of America}} \index{web browser!zzz@\gobblecomma|seealso {Chrome; Firefox; Tor Browser}} -\index{World Wide Web!zzz@\gobblecomma|seealso {HTTP; HTTPS}} +\index{World Wide Web!zzz@\gobblecomma|seealso {HTTP; HTTPS; web browser}} -\index{Tor!bridges!zzz@\gobblecomma|seealso {Azadi, cymrubridge31, cymrubridge33, fdctorbridge01, GreenBelt, JonbesheSabz, LeifEricson, Lisbeth, MaBishomarim, Mosaddegh, ndnop3, ndnop4, ndnop5, noether, NX01, riemann}} +\index{Tor!bridge!zzz@\gobblecomma|seealso {Azadi; cymrubridge31; cymrubridge33; fdctorbridge01; GreenBelt; JonbesheSabz; LeifEricson; Lisbeth; MaBishomarim; Mosaddegh; ndnop3; ndnop4; ndnop5; noether; NX01; riemann}} \begin{document} \begin{CJK}{UTF8}{gbsn} \frontmatter -% Avoid warnings "destination with the same identifier (name{page.1}) has been already used, duplicate ignored". -% We'll set pageanchor=true again right after \mainmatter. -% https://tex.stackexchange.com/questions/18924/pdftex-warning-ext4-destination-with-the-same-identifier-nam-epage-1-has#comment35713_18927 -\hypersetup{pageanchor=false} - \include{frontmatter} -\makeatletter -\renewcommand{\@oddfoot}{Draft of \today\hfill\url{https://www.bamsoftware.com/papers/thesis/}} -\makeatother - -\setlength{\cftbeforetoctitleskip}{-.5em} -\tableofcontents - \mainmatter -\hypersetup{pageanchor=true} % Quals notes: % Mention of route injection – any idea how much? @@ -306,7 +290,7 @@ The censor does not control the network outside its border; it may send messages to the outside world, but it cannot control them after they have traversed the border. -This abstract model\index{models} is a good starting point, +This abstract model\index{modeling} is a good starting point, but it is not the whole story. We will have to adapt it to fit different situations, sometimes relaxing and sometimes strengthening assumptions. @@ -324,15 +308,15 @@ The destination may knowingly cooperate with the client's circumvention effort, or may not. There are many possible complications, reflecting the messiness and diversity of dealing with real censors. -Adjusting the basic model\index{models} to reflect real-world actors' -motivations and capabilities is the heart of \emph{threat modeling}\index{threat modeling}. +Adjusting the basic model\index{modeling} to reflect real-world actors' +motivations and capabilities is the heart of \emph{threat modeling}\index{modeling}. In particular, what makes circumvention possible at all is the censor's motivation to block only some, but not all, of the incoming and outgoing communications---this assumption will be a major focus of the next chapter. -It is not hard to see how the border firewall model\index{models} +It is not hard to see how the border firewall model\index{modeling} relates to censorship in practice. In a common case, the censor is the government of a country, and the limits of its controlled network correspond to @@ -347,8 +331,8 @@ Content restrictions may vary across geographic locations, even within the same country---Wright et~al.~\indexauthors{\cite{Wright2011a}} identified some reasons why this might be. -A~good model\index{models} for some places is not a single unified regime, -but rather several autonomous service providers, +A~good model\index{modeling} for some places is not a single unified regime, +but rather several autonomous service providers\index{ISP}, each controlling and censoring its own portion of the network, perhaps coordinating with others about what to block and perhaps not. Another important case is that of a university or corporate network, @@ -367,10 +351,10 @@ Here are examples of forms of censorship that are in scope: \begin{itemize} \item blocking IP addresses\index{blocking!by address} \item blocking specific network protocols\index{blocking!by content} -\item blocking DNS resolution for certain domains\index{DNS!blocking}\index{blocking!by address} +\item blocking DNS resolution for certain domains\index{blocking!by address} \item blocking keywords in URLs\index{URL}\index{blocking!by content} \item parsing application-layer data (``deep packet inspection'')\index{deep packet inspection} -\item statistical and probabilistic traffic classification +\item statistical and probabilistic traffic classification\index{classification} \item bandwidth throttling\index{throttling} \item active scanning to discover the use of circumvention \end{itemize} @@ -382,14 +366,14 @@ Some other censorship-related topics that are \emph{not} in scope include: \item anything that takes place entirely within the censor's network and does not cross the border \item deletion-resistant publishing in the vein of - the Eternity Service~\cite{Anderson1996a}\index{Eternity Service, The} + the Eternity Service~\cite{Anderson1996a}\index{Eternity Service} (what Köpsell and Hillig call ``censorship resistant publishing systems''~\indexauthors{\cite[\S 1]{Koepsell2004a}}), except insofar as access to such services may be blocked \end{itemize} -Parts of the abstract model\index{models} are deliberately +Parts of the abstract model\index{modeling} are deliberately left unspecified, to allow for the many variations that arise in practice. The precise nature of ``blocking'' can take many forms, from packet dropping\index{packet dropping}, to injection of false responses\index{packet injection}, @@ -428,12 +412,12 @@ is the thesis of the thesis, in which I~lay out opinionated general principles of the field of circumvention. The remaining chapters are split between -the topics of modeling\index{models} and circumvention. +the topics of modeling\index{modeling} and circumvention. One's point of view is colored by experience. I~will therefore briefly describe the background to my research. I~owe much of my experience to collaboration -with the Tor Project\index{Tor Project, The}, +with the Tor Project\index{Tor!Project}, producers of the Tor anonymity network. whose anonymity network has been the vehicle for deployment of my circumvention systems. @@ -479,28 +463,28 @@ by somehow resisting the censor's blocking action. A censor is, then, essentially a traffic classifier\index{classification} coupled with -a blocking mechanism\index{blocking}. +a blocking mechanism. Though the design space is large, and many complications are possible, at its heart a censor must decide, for each communication, whether to block or allow, and then effect blocks as appropriate. -Like any classifier, a censor is liable to make mistakes. +Like any classifier\index{classification}, a censor is liable to make mistakes. When the censor fails to block something that it would have preferred to block, it is an error called a \emph{false negative}\index{false negative|textbf}; when the censor accidentally blocks something that it would have preferred to allow, it is a \emph{false positive}\index{false positive|textbf}. Techniques to avoiding detection are often called -``obfuscation,''\index{obfuscation} +``obfuscation,'' and the term is an appropriate one. It reflects not an attitude of security through obscurity\index{security through obscurity}; but rather a recognition that avoiding detection\index{detection} is about -making the censor's classification problem more difficult, +making the censor's classification\index{classification} problem more difficult, and therefore more costly. -Forcing the censor to trade false positives\index{false positives} -for false negatives\index{false negatives} +Forcing the censor to trade false positives\index{false positive} +for false negatives\index{false negative} is the core of all circumvention that is based on avoiding detection. -The costs\index{costs} of misclassifications cannot be understood +The costs of misclassifications\index{classification} cannot be understood in absolute terms: they only have meaning relative to a specific censor and its resources and motivations. @@ -508,7 +492,7 @@ Understanding the relative importance that a censor assigns to classification errors---knowing what it prefers to allow and to block---is key to knowing what what kind of circumvention will be successful. -Through good modeling\index{models}, +Through good modeling\index{modeling}, we can make the tradeoffs less favorable for the censor and more favorable for the circumventor. @@ -519,12 +503,12 @@ I like to divide detection techniques into two classes: and \emph{detection by address}\index{detection!by address}. Detection by content is based on the content or topic\index{content|textbf} of the message: -keyword\index{keywords} filtering and protocol identification fall into this class. +keyword\index{keyword filtering} filtering and protocol identification\index{classification} fall into this class. Detection by address is based on the sender or recipient of the message: -IP address blacklists\index{blacklist} +IP address blacklists\index{blacklist}\index{blocking!by address} and DNS response tampering\index{DNS!poisoning} fall into this class. -An ``address''\index{address|textbf} may be any kind of identifier: +An ``address'' may be any kind of identifier: an IP address, a domain name, an email\index{email} address. Of these two classes, my experience is that detection by address is harder to defeat. @@ -568,7 +552,7 @@ but with access to a proxy, which transitively gives access to the destination. The censor attempts to deal with detecting and blocking communication with proxies using the same tools it would for any other communication. -Just as it may look for forbidden keywords\index{keywords} in text, +Just as it may look for forbidden keywords\index{keyword filtering} in text, it may look for distinctive features of proxy protocols; just as it may block politically sensitive web sites, it may block the addresses of any proxies it can discover. @@ -586,7 +570,7 @@ I~consider more content-like: timing\index{packet size and timing}, data transfer characteristics, and protocols. Winter~\indexauthors{\cite[\S 1.1]{Winter2014c}} divides circumvention into three problems: -bootstrapping, endpoint blocking, and traffic obfuscation\index{obfuscation}. +bootstrapping, endpoint blocking, and traffic obfuscation. Endpoint blocking and traffic obfuscation correspond to my detection by address and detection by content; bootstrapping is the challenge of getting a copy of circumvention software @@ -609,7 +593,7 @@ Khattak, Elahi, et~al.\ call ``fingerprinting'' and ``direct censorship''~\index and Tschantz et~al.\ call ``detection'' and ``action''~\indexauthors{\cite[\S II]{Tschantz2016a-local}}. A major difficulty in developing circumvention systems is that -however much you model\index{models} +however much you model\index{modeling} and try to predict the reactions of a censor, real-world testing is expensive. If you really want to test a design against a censor, @@ -641,13 +625,13 @@ after they have become popular. \index{collateral damage|(textbf} What prevents the censor from -shutting down all connectivity\index{shutdown} within its network, +shutting down all connectivity\index{shutdowns} within its network, trivially preventing the client from reaching any destination? The answer is that the censor derives benefits from allowing network connectivity, other than the communications which it wants to censor. Or to put it another way: -the censor incurs a cost\index{costs} +the censor incurs a cost when it overblocks\index{false positive}: accidentally blocks something it would have preferred to allow. @@ -675,11 +659,11 @@ or else block everything and suffer maximum collateral damage. It is not necessary to reach this ideal fully before circumvention becomes possible. Better obfuscation drives up the censor's error rate -and therefore the cost\index{costs} of any blocking. +and therefore the cost of any blocking. Ideally, the potential ``damage'' is never realized, -because the censor sees the cost\index{costs} as being too great. +because the censor sees the cost as being too great. -Collateral damage, being an abstract ``cost,''\index{costs} can take many forms. +Collateral damage, being an abstract ``cost,'' can take many forms. It may come in the form of civil discontent, as people try to access web sites and get annoyed with the government when unable to do so. @@ -776,11 +760,11 @@ the essential element ``deniability\index{deniability},'' meaning that a client can plausibly claim to have been doing something other than circumventing when confronted with a log of their network activity. -Khattak, Elahi, et~al.~\cite[\S 4]{Khattak2016a} consider +Khattak, Elahi, et~al.~\indexauthors{\cite[\S 4]{Khattak2016a}} consider ``deniability''\index{deniability} separately from ``unblockability\index{unblockability}.'' % \cite{Narain2014a} also says ``deniability'' % \cite{Houmansadr2011a} also says ``deniability'' -Houmansadr et~al.~\cite{Houmansadr2011a,Houmansadr2013a,Houmansadr2013b} +Houmansadr et~al.~\indexauthors{\cite{Houmansadr2011a,Houmansadr2013a,Houmansadr2013b}} used the term ``unobservability,''\index{unobservability} which I feel fails to capture the censor's essential function of distinguishing\index{distinguishability}, not only observation. @@ -795,7 +779,7 @@ Collateral damage provides a way to make statements about censorship resistance quantifiable, at least in a loose sense. Rather than saying, ``the censor cannot block $X$,''\index{unblockability} or even, ``the censor is unwilling to block $X$,'' -it is better to say ``in order to block $X$, the censor would have to do $Y$,''\index{costs} +it is better to say ``in order to block $X$, the censor would have to do $Y$,'' where $Y$ is some action bearing a cost for the censor. A statement like this makes it clear that some censors may be able to afford the cost of blocking and others may not; @@ -808,7 +792,7 @@ If a circumvention system becomes blocked, it may simply mean that the circumventor overestimated the collateral damage or underestimated the censor's capacity to absorb it. -\index{shutdown|(} +\index{shutdowns|(} We have observed that the risk of collateral damage is what prevents the censor from shutting down the network completely---and yet, censors \emph{do} occasionally enact shutdowns or daily ``curfews.'' @@ -821,7 +805,7 @@ estimated that shutdowns cost millions of dollars per day per 10~million population, the amount depending on a country's level of connectivity. This does not necessarily contradict -the theory of collateral damage. +the theory of collateral damage\index{collateral damage}. It is just that, in some cases, a censor reckons that the benefits of a shutdown @@ -832,7 +816,7 @@ don't have as much to lose by blocking it. The fact that shutdowns are limited in duration shows that even censors that can afford to a shutdown cannot afford to keep it up forever. -\index{shutdown|)} +\index{shutdowns|)} Complicating everything is the fact that censors are not bound to act rationally. @@ -844,7 +828,7 @@ to make decisions that cause more harm than good. The imposition of censorship in the first place, I~suggest, is exactly such an irrational action, -retarding progress at the greater societal level\index{costs!of censorship}. +retarding progress at the greater societal level. \index{collateral damage|)} @@ -857,13 +841,13 @@ retarding progress at the greater societal level\index{costs!of censorship}. There are two general strategies to counter content-based detection. The first is to mimic some content that the censor allows, like HTTP\index{HTTP} or email\index{email}. -The second is to randomize the content, +The second is to randomize\index{randomization} the content, making it dissimilar to anything that the censor specifically blocks. Tschantz et~al.~\indexauthors{\cite{Tschantz2016a-local}} call these two strategies ``steganography''\index{steganography} and ``polymorphism''\index{polymorphism} respectively. -It is not a strict classification---any +It is not a strict categorization---any real system will incorporate a bit of both. The two strategies reflect they reflect differing conceptions of censors. @@ -885,9 +869,9 @@ and in any case its effectiveness can only be judged against a censor's sensitivity to collateral damage\index{collateral damage}. Whitelisting\index{whitelist}, by its nature, tends to cause more collateral damage than blacklisting\index{blacklist}. -And just as obfuscation\index{obfuscation} protocols are +And just as obfuscation protocols are not purely steganographic or polymorphic, -real censors are not purely whitelisting or blacklisting. +real censors are not purely whitelisting\index{whitelist} or blacklisting\index{blacklist}. Houmansadr et~al.~\indexauthors{\cite{Houmansadr2013b}} exhibited weaknesses in ``parrot''\index{dead-parrot attacks} circumvention systems that imperfectly mimic a cover protocol. @@ -903,10 +887,10 @@ Geddes et~al.~\indexauthors{\cite{Geddes2013a}} showed that even perfect imitati may leave vulnerabilities due to mismatches between the cover protocol and the carried protocol. -For instance, randomly dropping packets +For instance, randomly dropping packets\index{packet dropping} may disrupt circumvention more than normal use of the cover protocol. It's worth noting, though, that apart from active probing\index{active probing} and -perhaps entropy measurement, most of the attacks proposed +perhaps entropy\index{entropy} measurement, most of the attacks proposed in academic research have not been used by censors in practice. Some systematizations @@ -951,16 +935,16 @@ has seen substantial deployment. \index{steganography|)} \index{polymorphism|(} -There are many examples of the randomized\index{randomization+}, polymorphic strategy. +There are many examples of the randomized\index{randomization}, polymorphic strategy. An important subclass of these comprises the so-called -look-like-nothing systems\index{look-like-nothing transport} that encrypt\index{encryption+} a stream +look-like-nothing systems\index{look-like-nothing transport} that encrypt\index{encryption} a stream without any plaintext header or framing information, -so that it appears to be a uniformly random\index{randomness+} byte sequence. +so that it appears to be a uniformly random\index{randomization} byte sequence. A pioneering design was the obfuscated-openssh\index{obfuscated-openssh} of Bruce Leidl~\indexauthors{\cite{Leidl-obfuscated-openssh}}, which aimed to hide the plaintext packet metadata in the SSH protocol\index{SSH}. obfuscated-openssh worked, in essence, -by first sending an encryption key, and +by first sending an encryption\index{encryption} key, and then sending ciphertext encrypted with that key. The encryption of the obfuscation layer was an additional layer, independent of SSH's\index{SSH} ordinary encryption. @@ -969,7 +953,7 @@ the protocol by recovering the key and using it to decrypt the rest of the strea obfuscated-openssh could optionally incorporate a pre-shared password\index{authentication} into the key derivation function, which would protect against this attack. Dust\index{Dust}~\cite{Wiley2011a}, -similarly randomized\index{randomness} bytes +similarly randomized\index{randomization} bytes (at least in its v1 version---later versions permitted fitting to distributions other than uniform). It was not susceptible to passive deobfuscation @@ -988,7 +972,7 @@ censors and circumventors over several years. obfs2\index{obfs2}~\cite{obfs2}, which debuted in 2012 in response to blocking in Iran\index{Iran}~\cite{tor-blog-obfsproxy-next-step-censorship-arms-race}, uses very simple obfuscation inspired by obfuscated-openssh: -it is essentially equivalent to sending an encryption key, +it is essentially equivalent to sending an encryption\index{encryption} key, then the rest of the stream encrypted with that key. obfs2 is detectable, with no false negatives\index{false negative} @@ -997,7 +981,7 @@ by even a passive censor who knows how it works; and it is vulnerable to active probing\index{active probing} attacks, where the censor speculatively connects to servers to see what protocols they use. However, it sufficed against the -keyword-\index{keywords} and pattern-based censors of its era. +keyword-\index{keyword filtering} and pattern-based censors of its era. obfs3\index{obfs3}~\cite{obfs3}---first available in 2013 but not really released to users until 2014~\cite{tor-blog-tor-browser-36-released}---was designed @@ -1021,7 +1005,7 @@ When a client connects to a ScrambleSuit proxy, it must demonstrate knowledge of the out-of-band secret before the proxy will respond, which prevents active probing. -obfs4\index{obfs4|textbf}~\cite{obfs4}, first available in 2014~\cite{tor-blog-tor-browser-45-released}, +obfs4\index{obfs4}~\cite{obfs4}, first available in 2014~\cite{tor-blog-tor-browser-45-released}, is an incremental advancement on ScrambleSuit that uses more efficient cryptography, and additionally authenticates\index{authentication} the key exchange @@ -1033,7 +1017,7 @@ which is that every proxy can potentially have its own characteristics. ScrambleSuit and obfs4, in addition to randomizing packet contents, also shape packet sizes and timing\index{packet size and timing} -to fit random\index{randomness+} distributions. +to fit random\index{randomization} distributions. Crucially, the chosen distributions are consistent within each proxy, but vary across proxies. That means that even if a censor is able to build a profile @@ -1107,7 +1091,7 @@ distribute information about them to legitimate users, without revealing too many to the censor. Both of these considerations pose challenges. -\index{Tor bridge|(textbf} +\index{Tor!bridge|(} Tor's blocking resistance design~\cite{tor-techreport-2006-11-001}, based on secret proxies called ``bridges,'' was of this kind. Volunteers run bridges, which report themselves to a central database @@ -1134,9 +1118,9 @@ there have never been more than a few thousand of them, and Dingledine reported in 2011 that the Great Firewall of China\index{Great Firewall of China} managed to enumerate both the HTTPS and email pools~\cites[\S 1]{tor-techreport-2011-05-001}[\S 1]{tor-techreport-2011-10-002}. -\index{Tor bridge|)} +\index{Tor!bridge|)} -\index{proxy distribution problem|(} +\index{proxy distribution|(} Tor relies on BridgeDB\index{BridgeDB} to provide address blocking resistance for all its transports that otherwise have only content obfuscation. And that is a great strength of such a system. @@ -1165,13 +1149,13 @@ and the unblocked lifetimes of proxies. % and shows how to isolate malicious client nodes; % Hyphae~\cite{LovecruftDeValence2017a} % Enemy at the Gateways~\cite{Nasr2017a} -\index{proxy distribution problem|)} +\index{proxy distribution|)} A~way to make proxy distribution more robust against censors (but at the same time less usable\index{usability} by clients) is to ``poison'' the set of proxy addresses with the addresses of important servers, -the blocking of which would result in high collateral damage. +the blocking of which would result in high collateral damage\index{collateral damage}. VPN Gate\index{VPN Gate} employed this idea~\cite[\S 4.2]{Nobori2014a}, mixing into the their public proxy list the addresses of root DNS servers\index{DNS} @@ -1183,7 +1167,7 @@ via subversion of a proxy distribution system, one must also worry about ``out-of-band'' discovery, for example by mass scanning~\cites[\S 6]{tor-techreport-2011-10-002}[\S 9.3]{tor-techreport-2006-11-001}. Durumeric et~al. found about 80\% of existing (unobfuscated) -Tor bridges\index{Tor bridge}~\indexauthors{\cite[\S 4.4]{Durumeric2013a}} +Tor bridges\index{Tor!bridge}~\indexauthors{\cite[\S 4.4]{Durumeric2013a}} by scanning all of IPv4 on a handful of common bridge ports. % surf and serve~\cite{McLachlan2009a} (didn't actually scan) % extensive analysis~\cite{Ling2012a} (didn't scan) @@ -1228,7 +1212,7 @@ that the censor does not dare block them. Refraction networking\index{refraction networking}~\cite{refraction-network} puts proxy capability into network routers---in the middle of paths, rather than at the end. -Clients cryptographically tag certain flows in a way that is invisible +Clients cryptographically tag\index{authentication} certain flows in a way that is invisible to the censor but detectable to a refraction-capable router, which redirects from its apparent destination to some other, covert destination. @@ -1273,7 +1257,7 @@ only spoof its responses to look as if they came from proxy. TriangleBoy\index{TriangleBoy} did not match CensorSpoofer's\index{CensorSpoofer} resistance to insider attack\index{insider attack}, because clients still needed to find and communicate directly with a proxy, -so the whole system basically reduced to the proxy discovery problem\index{proxy distribution problem}, +so the whole system basically reduced to the proxy discovery problem\index{proxy distribution}, despite the use of address spoofing. \index{address spoofing|)} @@ -1286,7 +1270,7 @@ despite the use of address spoofing. \section{Spheres of influence and visibility} -\index{spheres of influence/visibility|(} +\index{sphere of influence/visibility|(} \index{blocking!versus detection} \index{detection!versus blocking} @@ -1313,14 +1297,14 @@ is the 2006 research on ``Ignoring the Great Firewall of China''\index{Great Firewall of China} by Clayton et~al.~\indexauthors{\cite{Clayton2006a}}. They found that the firewall would block connections by injecting\index{packet injection} -phony TCP\index{TCP} RST\index{RST (TCP flag)} packets +phony TCP\index{TCP} RST\index{RST} packets (which cause the connection to be torn down) -or SYN/ACK\index{SYN (TCP flag)}\index{ACK (TCP flag)}\index{SYN/ACK (TCP flags)} packets +or SYN/ACK\index{SYN}\index{ACK}\index{SYN/ACK)} packets (which cause the connection to become unsynchronized), and that simply ignoring the anomalous packets rendered blocking ineffective. (Why did the censor choose to \emph{inject} its own packets, -rather than \emph{drop}\index{packet dropping+} those of the client or server? +rather than \emph{drop}\index{packet dropping} those of the client or server? The answer is probably that injection is technically easier to achieve, highlighting a limit on the censor's power.) One can think of this ignoring as shrinking the censor's @@ -1386,7 +1370,7 @@ Monitor evasion techniques can be used to reduce a censor's sphere of visibility---remove certain traffic features from its consideration. Crandall et~al.~\indexauthors{\cite{Crandall2007a}} in 2007 suggested -using IP fragmentation\index{fragmentation} to prevent keyword\index{keywords} matching. +using IP fragmentation\index{fragmentation} to prevent keyword\index{keyword filtering} matching. In 2008 and 2009, Park and Crandall~\indexauthors{\cite{Park2010a}} explicitly characterized the Great Firewall\index{Great Firewall of China} as a network intrusion detection system\index{intrusion detection} @@ -1394,7 +1378,7 @@ and found that a lack of TCP reassembly\index{TCP!reassembly} allowed evading ke Winter and Lindskog~\indexauthors{\cite{Winter2012a}} found that the Great Firewall still did not do TCP segment reassembly in 2012. They released a tool, brdgrd\index{brdgrd}~\cite{brdgrd}, -that by manipulating the TCP window size\index{TCP!window size}, +that by manipulating the TCP window size\index{TCP!window}, prevented the censor's scanners from receiving a full response in the first packet, thereby foiling active probing\index{active probing}. Anderson~\indexauthors{\cite{Anderson2012splinternet}} gave technical information @@ -1411,7 +1395,7 @@ a few years later, finding that the firewall had evolved to prevent some previous evasion techniques, and discovering new ones. -\index{spheres of influence/visibility|)} +\index{sphere of influence/visibility|)} \section{Early censorship and circumvention} @@ -1442,7 +1426,7 @@ technology originally developed for personal firewalls. The term ``Great Firewall of China\index{Great Firewall of China}'' first appeared in an article in \textsl{Wired}\index{Wired@\textsl{Wired}}~\cite{wired-china-3} in 1997. -In the wake of the first signs of blocking by ISPs\index{Internet service provider}, % DFN/Radikal? +In the wake of the first signs of blocking by ISPs\index{ISP}, % DFN/Radikal? people were thinking about how to bypass filters. The circumvention systems of that era were largely HTML-rewriting web proxies\index{HTML-rewriting proxy}: @@ -1476,7 +1460,7 @@ ran such proxies. That vision has not come to pass. Accumulating a sufficient number of proxies and communicating their addresses securely to clients---in -short, the proxy distribution problem\index{proxy distribution problem}---turned +short, the proxy distribution problem\index{proxy distribution}---turned out not to follow automatically, but to be a major sub-problem of its own. @@ -1492,7 +1476,7 @@ such as tweaking a protocol or using an alternative DNS\index{DNS} server. (We see the same progression play out again when countries first begin to experiment with censorship, such as in Turkey\index{Turkey} in 2014, where alternative DNS servers\index{DNS} -briefly sufficed to circumvent a block of Twitter\index{Twitter}\index{social media}~\cite{theguardian-how-to-get-around-turkeys-twitter-ban}.\index{DNS!blocking}) +briefly sufficed to circumvent a block of Twitter\index{Twitter}\index{social media}~\cite{theguardian-how-to-get-around-turkeys-twitter-ban}.) Not only censors were changing---the world around them was changing as well. In field of circumvention, which is so heavily affected by concerns @@ -1552,15 +1536,15 @@ set of capabilities that many censors have in common: \begin{itemize} \item blocking of specific IP addresses and ports\index{blocking!by address} \item control of default DNS servers\index{DNS} -\item blocking DNS queries\index{DNS!blocking} -\item injection of false DNS responses\index{DNS!poisoning} -\item injection of TCP\index{TCP} RSTs\index{RST (TCP flag)} +\item blocking DNS queries +\item injection of false DNS responses\index{DNS!poisoning}\index{packet injection} +\item injection of TCP\index{TCP} RSTs\index{RST} \item keyword\index{keyword filtering} filtering in unencrypted\index{encryption} contents \item application protocol parsing (``deep packet inspection''\index{deep packet inspection}) \item participation in a circumvention system as a client\index{insider attack} \item scanning to discover proxies\index{port scanning}\index{active probing} \item throttling connections\index{throttling} -\item temporary total shutdowns\index{shutdown} +\item temporary total shutdowns\index{shutdowns} \end{itemize} Not all censors will be able---or motivated---to do all of these. As the amount of traffic to be handled increases, @@ -1568,7 +1552,7 @@ in-path attacks such as throttling\index{throttling} become relatively more expe Whether a particular act of censorship even makes sense will depend on a local cost--benefit analysis, a weighing of the expected gains against the potential collateral damage\index{collateral damage}. -Some censors may be able to tolerate a brief total shutdown\index{shutdown}, +Some censors may be able to tolerate a brief total shutdown\index{shutdowns}, while for others the importance of Internet connectivity is too great for such a blunt instrument. @@ -1593,7 +1577,7 @@ Suppose the software accesses a destination that violates local law. Could the volunteer be held liable for the access? Quantifying the degree of risk depends on modeling\index{modeling} -how a censor will react to a given stimulus~\cite[\S 2.2]{Crandall2015a}. +how a censor will react to a given stimulus~\cite[\S 2.2]{Crandall2015a-local}. \index{ethics|)} % Past measurement studies have done well at @@ -1648,13 +1632,13 @@ one-time experiments and generic measurement platforms. One of the earliest technical studies of censorship occurred in a place you might not expect, the German\index{Germany} state of North Rhein-Westphalia\index{North Rhein-Westphalia}. -Dornseif~\indexauthors{\cite{Dornseif2003a}} tested ISPs'\index{Internet service provider} implementation +Dornseif~\indexauthors{\cite{Dornseif2003a}} tested ISPs'\index{ISP} implementation of a controversial legal order to block web sites circa 2002. While there were many possible ways to implement the block, none were trivial to implement, nor free of overblocking side effects. The most popular implementation used DNS tampering,\index{DNS!poisoning} which is -returning (or injecting) false responses to DNS requests +returning (or injecting\index{packet injection}) false responses to DNS requests for the blocked sites. An in-depth survey of DNS tampering found a variety of implementations, some blocking more @@ -1674,10 +1658,10 @@ DNS poisoning\index{DNS!poisoning}, and keyword filtering\index{keyword filtering}. Clayton~\indexauthors{\cite{Clayton2006b}} in 2006 studied a ``hybrid'' blocking system, -CleanFeed\index{CleanFeed} by the British\index{United Kingdom} ISP\index{Internet service provider} BT\index{BT}, +CleanFeed\index{CleanFeed} by the British\index{United Kingdom} ISP\index{ISP} BT\index{BT}, that aimed for a better balance of costs and benefits: a ``fast path'' IP address and port matcher -acted as a prefilter for the ``slow path,'' a full HTTP proxy\index{HTTP proxy}. +acted as a prefilter for the ``slow path,'' a full HTTP proxy\index{HTTP!proxy}. The system, in use since 2004, was designed to block access to any of a secret list of web sites. The system was vulnerable to a number of evasions, @@ -1685,7 +1669,7 @@ such a using a proxy, using an alternate IP address or port, and obfuscating URLs\index{URL}. The two-level nature of the blocking system unintentionally made it an oracle -that could reveal the IP addresses of sites in the secret blocking list. +that could reveal the IP addresses of sites in the secret blocking list\index{blacklist}. In 2006, Clayton, Murdoch, and Watson~\indexauthors{\cite{Clayton2006a}} further studied the technical aspects of the Great Firewall of China\index{Great Firewall of China}. @@ -1694,14 +1678,14 @@ treating incoming and outgoing traffic equally. By sending web requests from outside the firewall to a web server inside, they could provoke the same blocking behavior that someone on the inside would see. -They sent HTTP\index{HTTP} requests containing forbidden keywords\index{keywords} -that caused the firewall to inject RST packets\index{RST (TCP flag)}\index{packet injection} +They sent HTTP\index{HTTP} requests containing forbidden keywords\index{keyword filtering} +that caused the firewall to inject RST packets\index{RST}\index{packet injection} towards both the client and server. -Simply ignoring RST\index{RST (TCP flag)} packets (on both ends) +Simply ignoring RST\index{RST} packets (on both ends) rendered the blocking mostly ineffective. The injected packets had inconsistent TTLs\index{TTL} and other anomalies that enabled their identification. -Rudimentary countermeasures, such as splitting keywords +Rudimentary countermeasures, such as splitting keywords\index{fragmentation} across packets, were also effective in avoiding blocking. The authors brought up an important point that would become a major theme of future censorship modeling\index{modeling}: @@ -1718,7 +1702,7 @@ Contemporaneous studies of the Great Firewall\index{Great Firewall of China} by Wolfgarten~\indexauthors{\cite{Wolfgarten2006a}} and Tokachu~\indexauthors{\cite{Tokachu2006a}} found cases of DNS tampering\index{DNS!poisoning}, -search engine filtering, and RST injection\index{RST (TCP flag)}\index{packet injection+} +search engine filtering, and RST injection\index{RST}\index{packet injection} caused by keyword\index{keyword filtering} detection. In 2007, Lowe, Winters, and Marcus~\indexauthors{\cite{Lowe2007a}} did detailed experiments on DNS tampering in China\index{China}.\index{DNS!poisoning} @@ -1735,7 +1719,7 @@ Hong Kong\index{Hong Kong}, and the U.S.\index{United States of America} By manipulating the IP time-to-live field\index{TTL}, the authors found that the false responses -were injected by an intermediate router, +were injected\index{packet injection} by an intermediate router, evidenced by the fact that the authentic response would be received as well, only later. A more comprehensive survey~\cite{Anonymous2014a} @@ -1747,7 +1731,7 @@ IP ID and TTL\index{TTL} analysis showed that each node was a cluster of several hundred processes that collectively injected censored responses. They found 174 bogus IP addresses, more than previously documented, -and extracted a blacklist of about 15,000 keywords. +and extracted a blacklist\index{blacklist} of about 15,000 keywords. \index{Great Firewall of China|(} The Great Firewall, because of its unusual sophistication, @@ -1761,15 +1745,15 @@ by latent semantic analysis, using the Chinese-language\index{Chinese language} Wikipedia\index{Wikipedia} as a corpus. They found limited statefulness in the firewall: sending a naked HTTP\index{HTTP} request -without a preceding SYN\index{SYN (TCP flag)} resulted in no blocking. +without a preceding SYN\index{SYN} resulted in no blocking. In 2008 and 2009, Park and Crandall~\indexauthors{\cite{Park2010a}} further tested keyword filtering of HTTP\index{HTTP} responses. -Injecting RST packets\index{RST (TCP flag)} into responses is more difficult +Injecting RST packets\index{RST} into responses is more difficult than doing the same to requests, because of the greater uncertainty in predicting -TCP\index{TCP!sequence numbers+} sequence numbers +TCP\index{TCP!sequence numbers} sequence numbers once a session is well underway. -In fact, RST injection\index{RST (TCP flag)} into responses was hit or miss, +In fact, RST injection\index{RST}\index{packet injection} into responses was hit or miss, succeeding only 51\% of the time, with some, apparently diurnal, variation. They also found inconsistencies in the statefulness of the firewall. @@ -1793,14 +1777,14 @@ gave a detailed description of the design of the Great Firewall in 2012. He described IP address blocking\index{blocking!by address} by null routing, -RST\index{RST (TCP flag)} injection\index{packet injection}, +RST\index{RST} injection\index{packet injection}, and DNS poisoning\index{DNS!poisoning}, and documented cases of collateral damage affecting clients inside and outside China. \index{Great Firewall of China|)} Dainotti et~al.~\indexauthors{\cite{Dainotti2011a}} -reported on the total Internet shutdowns\index{shutdown} +reported on the total Internet shutdowns\index{shutdowns} that took place in Egypt\index{Egypt} and Libya\index{Libya} in the early months of 2011. They used multiple measurements to document @@ -1837,12 +1821,12 @@ DNS tampering\index{DNS!poisoning}, and throttling. The most usual method was HTTP request filtering; DNS tampering (directing to a blackhole IP address) affected only the three domains -\nolinkurl{facebook.com}\index{facebook.com@\nolinkurl{facebook.com}}, -\nolinkurl{youtube.com}\index{youtube.com@\nolinkurl{youtube.com}}, and -\nolinkurl{plus.google.com}\index{plus.google.com@\nolinkurl{plus.google.com}}. +\nolinkurl{facebook.com}\index{Facebook}, +\nolinkurl{youtube.com}\index{YouTube}, and +\nolinkurl{plus.google.com}\index{Google!Plus}. SSH\index{SSH} connections were throttled down to about 15\% of the link capacity, -while randomized protocols\index{look-like-nothing transport} +while randomized\index{randomization} protocols\index{look-like-nothing transport} were throttled almost down to zero, 60~seconds into a connection's lifetime. Throttling seemed to be achieved by dropping packets\index{packet dropping}, @@ -1860,7 +1844,7 @@ They found that the firewall was stateful, but only in the client-to-server direction. The firewall was vulnerable to a variety of TCP-\index{TCP} and HTTP-based\index{HTTP} evasion techniques, such as overlapping fragments\index{fragmentation}, TTL-limited\index{TTL} packets, -and URL\index{URL} encodings. +and URL\index{URL!encoding} encodings. Nabi~\indexauthors{\cite{Nabi2013a}} investigated web censorship in Pakistan\index{Pakistan} in 2013, using a publicly available @@ -1914,8 +1898,8 @@ The cannon was responsible for denial-of-service\index{denial of service} attack on Amazon CloudFront\index{Amazon CloudFront} and GitHub\index{GitHub}. The unwitting participants in the attack were web browsers\index{web browser} located \emph{outside} of China, -who began their attack when the cannon injected -malicious JavaScript\index{JavaScript} into certain HTTP responses +who began their attack when the cannon injected\index{packet injection} +malicious JavaScript\index{JavaScript} into certain HTTP responses\index{HTTP} originating inside of China. The new attack tool was noteworthy because it demonstrated previously unseen in-path behavior, @@ -1938,12 +1922,12 @@ The authors of the study found evidence of IP address blocking\index{blocking!by DNS\index{DNS} blocking, and HTTP\index{HTTP} request keyword blocking\index{keyword filtering}; and also evidence of users circumventing censorship by downloading circumvention software or using cache feature of Google\index{Google} search. -All subdomains of .il\index{.il}, the top-level domain for Israel\index{Israel}, +All subdomains of .il\index{.il (top-level domain of Israel)}, the top-level domain for Israel\index{Israel}, were blocked, as were many IP address ranges in Israel. -Blocked URL keywords included +Blocked URL\index{URL!filtering} keywords\index{keyword filtering} included ``proxy'', which resulted in collateral damage\index{collateral damage} to the Google Toolbar\index{Google} -and the Facebook like button\index{Facebook} because they included the string ``proxy'' in HTTP\index{HTTP} requests. +and the Facebook like button\index{Facebook!like button} because they included the string ``proxy'' in HTTP\index{HTTP} requests. Tor\index{Tor!protocol} was only lightly censored: only one of several proxies blocked it, and only sporadically. @@ -1964,12 +1948,12 @@ such as Tibet\index{Tibet} and Taiwan\index{Taiwan}. In some cases, entire domains were blocked\index{blocking!by address}; in others, only specific URLs within the domain were blocked\index{URL!filtering}. There were cases of overblocking\index{false positive}: apparently inadvertently blocked sites -that happened to share an IP address or URL keyword\index{keyword filtering} +that happened to share an IP address or URL\index{URL!filtering} keyword\index{keyword filtering} with an intentionally blocked site. The firewall terminated connections by injecting\index{packet injection} -a TCP\index{TCP} RST packet\index{RST (TCP flag)}, -then injecting a zero-sized TCP window\index{TCP!window size}, +a TCP\index{TCP} RST packet\index{RST}, +then injecting a zero-sized TCP window\index{TCP!window}, which would prevent any communication with the same server for a short time. Using technical tricks, the authors inferred @@ -1979,7 +1963,7 @@ but did not return them in search results~\cite{oni-bulletin-005}. Censorship of blogs included keyword blocking\index{keyword filtering} by domestic blogging\index{blog} services, and blocking of external domains such as -\nolinkurl{blogspot.com}\index{blogspot.com@\nolinkurl{blogspot.com}}~\cite{oni-bulletin-008}. +\nolinkurl{blogspot.com}~\cite{oni-bulletin-008}. Email\index{email} filtering was done by the email providers themselves, not by an independent network firewall. Email providers seemed to implement their filtering rules @@ -2028,7 +2012,7 @@ to examine two case studies of censorship: Turkey's\index{Turkey} ban on social media sites in March 2014 and Russia's\index{Russia} blocking of certain LiveJournal\index{LiveJournal}\index{social media} blogs\index{blog} in March 2014. Atlas allows 4 types of measurements: ping, traceroute, DNS resolution\index{DNS}, -and X.509\index{X.509} certificate fetching. +and TLS certificate\index{certificate} fetching. In Turkey\index{Turkey}, they found at least six shifts in policy during two weeks of site blocking. They observed an escalation in blocking in Turkey: @@ -2085,9 +2069,9 @@ This research was partly born out of frustration with some typical assumptions made in academic research on circumvention, which we felt placed undue emphasis -on steganography and obfuscation of traffic streams, +on steganography\index{steganography} and obfuscation of traffic streams, while not paying enough attention to -the perhaps more important problems of proxy distribution\index{proxy distribution problem} +the perhaps more important problems of proxy distribution\index{proxy distribution} and initial rendezvous\index{rendezvous} between client and proxy. We wanted to help bridge the gap by laying out a research agenda to align the incentives of researchers with those of circumventors. @@ -2105,7 +2089,7 @@ how the connection is established initially (related to detection by address). Designers tend to misperceive the censor's weighting of false positives and false negatives---assuming -a whitelist rather than a blacklist, say. +a whitelist\index{whitelist} rather than a blacklist\index{blacklist}, say. Real censors care greatly about the cost of running detection, and prefer cheap, passive, stateless solutions whenever possible. It is important to guard against these modes of detection @@ -2126,14 +2110,14 @@ In active probing, the censor pretends to be a legitimate client, making its own connections to suspected addresses to see whether they speak a proxy protocol. Any addresses that are found to be proxies -are added to a blacklist\index{blocking!by address} +are added to a blacklist\index{blacklist}\index{blocking!by address} so that access to them will be blocked in the future. The input to the active probing subsystem, a set of suspected addresses, comes from passive observation\index{detection!by content} of the content of client connections. The censor sees a client connect to a destination and tries to determine, by content inspection, what protocol is in use. -When the censor's content classifier is unsure +When the censor's content classifier\index{classification} is unsure whether the protocol is a proxy protocol, it passes the destination address to the active probing subsystem. Active prober then checks, @@ -2153,15 +2137,15 @@ the censor passes the destination's address to an active prober. The active prober attempts connections using various proxy protocols. If any of the proxy connections succeeds, the censor adds the destination -to an address blacklist\index{blocking!by address}. +to an address blacklist\index{blacklist}\index{blocking!by address}. } \label{fig:active-probing} \end{figure} Active probing makes good sense for the censor, -whose main restriction is the risk of false-positive\index{false positives} classifications +whose main restriction is the risk of false-positive\index{false positive} classifications that result in collateral damage\index{collateral damage}. -Any classifier based purely on passive content inspection\index{detection!by content} +Any classifier\index{classification} based purely on passive content inspection\index{detection!by content} must be very precise (have a low rate of false positives). Active probing increases precision by blocking only those servers that are determined, through active inspection, @@ -2176,7 +2160,7 @@ separate from the firewall's other responsibilities that require a low response time. \index{port scanning|(} -\index{active probing!reactive vs.\ proactive|(} +\index{active probing!proactive versus reactive|(} Active probing, as I~use the term in this chapter, is distinguished from other types of active scans by being reactive, driven by observation of client connections. @@ -2187,7 +2171,7 @@ The potential for the latter kind of scanning has been appreciated for over a decade. Dingledine and Mathewson~\indexauthors{\cite[\S 9.3]{tor-techreport-2006-11-001}} raised scanning resistance as a consideration -in the design document for Tor bridges\index{Tor bridges}. +in the design document for Tor bridges\index{Tor!bridge}. McLachlan and Hopper~\indexauthors{\cite[\S 3.2]{McLachlan2009a}} observed that the bridges' tendency to run on a handful of popular ports @@ -2201,13 +2185,13 @@ was then just beginning to use. Durumeric et~al.~\indexauthors{\cite[\S 4.4]{Durumeric2013a}} demonstrated the effectiveness of Internet-wide scanning, targeting only two ports to -discover about 80\% of public Tor bridges\index{Tor bridges} in only a few hours, +discover about 80\% of public Tor bridges\index{Tor!bridge} in only a few hours, Tsyrklevich~\indexauthors{\cite{tor-dev-internet-wide-bridge-scanning}} and Matic et~al.~\indexauthors{\cite[\S V.D]{Matic2017a}} later showed how existing public repositories of Internet scan data could reveal bridges, without even the necessity of running one's own scan. -\index{active probing!reactive vs.\ proactive|)} +\index{active probing!proactive versus reactive|)} \index{port scanning|)} The Great Firewall of China\index{Great Firewall of China} @@ -2265,11 +2249,11 @@ the censor can tell that circumvention is taking place but cannot block the proxy without unacceptable collateral damage\index{collateral damage}. In Snowflake\index{Snowflake} (\autoref{chap:snowflake}), proxies are web browsers\index{web browser} running ordinary peer-to-peer protocols, -authenticated using a per-connection shared secret. +authenticated\index{authentication} using a per-connection shared secret. Even if a censor discovers one of Snowflake's proxies, it cannot verify that the proxy is running Snowflake or something else, without having first negotiated a shared secret -through Snowflake's broker\index{broker (snowflake)} mechanism.\index{rendezvous!of Snowflake} +through Snowflake's broker\index{broker (Snowflake)} mechanism.\index{rendezvous!of Snowflake} \section{History of active probing research} @@ -2292,7 +2276,7 @@ Nixon's random-looking probes are temporarily replaced by TLS\index{TLS} probes before changing back again~\indexauthors{\cite{Nixon-sshprobes}}. \\ 2011 October & -hrimfaxi\index{hrimfaxi} reports that Tor bridges\index{Tor bridges} are quickly detected by the GFW~\cite{tor-trac-4185}\index{Great Firewall of China}. +hrimfaxi\index{hrimfaxi} reports that Tor bridges\index{Tor!bridge} are quickly detected by the GFW~\cite{tor-trac-4185}\index{Great Firewall of China}. \\ 2011 November & Nixon publishes observations and hypotheses about the strange SSH connections~\indexauthors{\cite{Nixon-sshprobes}}. @@ -2370,7 +2354,7 @@ For a few weeks in May and June 2011, the probes did not look random, but instead looked like TLS\index{TLS}. In October 2011, Tor user hrimfaxi\index{hrimfaxi} reported that -a newly set up, unpublished Tor bridge\index{Tor bridges} +a newly set up, unpublished Tor bridge\index{Tor!bridge} would be blocked within 10~minutes of their first being accessed from China~\cite{tor-trac-4185}. Moving the bridge to another port @@ -2423,11 +2407,11 @@ sometimes found a live host, though usually with a different IP TTL\index{TTL} than was used during the probing, which the authors suggested may be a sign of -address spoofing by the probing infrastructure. +address spoofing\index{address spoofing} by the probing infrastructure. % diurnal pattern in scanning delay Because probing was triggered by patterns in the TLS client handshake\index{TLS!fingerprinting}, they developed a server-side tool, brdgrd~\cite{brdgrd}\index{brdgrd}, -that rewrote the TCP window\index{TCP!window size} so that +that rewrote the TCP window\index{TCP!window} so that the client's handshake would be split across packets\index{fragmentation}. The tool sufficed, at the time, to prevent active probing, but stopped working in 2013~\indexauthors{\cite[\S Software]{Winter2012a-webpage}}. @@ -2465,10 +2449,10 @@ but were still distinctive. The ScrambleSuit\index{ScrambleSuit} transport, designed to be immune to active-probing attacks, -first shipped with Tor Browser~4.0\index{Tor Browser} +first shipped with Tor Browser~4.0\index{Tor!Browser} in October~2014~\cite{tor-blog-tor-browser-40-released}. The successor transport obfs4\index{obfs4}, similarly immune, -shipped in Tor Browser~4.5\index{Tor Browser} in +shipped in Tor Browser~4.5\index{Tor!Browser} in April 2015~\cite{tor-blog-tor-browser-45-released}. In August 2015, @@ -2532,7 +2516,7 @@ We found probing of the Tor protocol\index{Tor!protocol}, as expected. The probes we observed in 2015, however, differed from those Wilde\index{Wilde, Tim} described in 2011, -which proceeded as far as building a circuit. +which proceeded as far as building a circuit\index{Tor!circuit}. The ones we saw used less of the Tor protocol: after the TLS\index{TLS} handshake they only queried the server's version and disconnected. @@ -2543,7 +2527,7 @@ not batched to a multiple of 15~minutes. \item[obfs2] The obfs2\index{obfs2} protocol -is meant to look like a random stream\index{look-like-nothing transports}, +is meant to look like a random stream\index{look-like-nothing transport}, but it has a weakness that makes it trivial to identify, passively and retroactively\index{detection!by content}, needing only the first 20 bytes sent by the client. @@ -2555,11 +2539,11 @@ could belong only to legitimate circumventors or to active probers. \item[obfs3] The obfs3\index{obfs3} protocol is also meant -to look like a random stream\index{look-like-nothing transports}; +to look like a random stream\index{look-like-nothing transport}; but unlike obfs2\index{obfs2}, it is not trivially identifiable passively. It is not possible to retroactively recognize obfs3 connections (from, say, a packet capture) with certainty: -sure classification requires active participation in the protocol. +sure classification\index{classification} requires active participation in the protocol. In some of our experiments, we ran an obfs3 server that was able to participate in the handshake and so confirm that the protocol really was obfs3. @@ -2620,11 +2604,11 @@ User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like where the `\texttt{XX}' is a number that varies. The intent of this probe seems to be the discovery of servers that are capable of domain fronting\index{domain fronting} for Google\index{Google} services, -including Google App Engine\index{Google App Engine}, which runs at \nolinkurl{appspot.com}\index{appspot.com}. +including Google App Engine\index{Google!App Engine}, which runs at \nolinkurl{appspot.com}\index{appspot.com}. % \cite[Table~2(a)]{Anonymous2014a} has "appspot.com" as a top-10 DNS poisoning pattern. (See \autoref{chap:domain-fronting} for more on domain fronting.) At one time, there were simple proxies running at -\nolinkurl{webncsproxyXX.appspot.com}\index{Google App Engine}. +\nolinkurl{webncsproxyXX.appspot.com}\index{Google!App Engine}. \item[urllib] This probe type is new since our 2015 paper. @@ -2697,7 +2681,7 @@ The server log analysis, however, unexpectedly turned up the other probe types. The server log data set consisted of application-layer logs from my personal web and mail\index{email} server, -which was also a Tor bridge\index{Tor bridge}. +which was also a Tor bridge\index{Tor!bridge}. Application-layer logs lack much of the fidelity you would normally want in a measurement experiment; they do not have precise timestamps @@ -2767,13 +2751,13 @@ the address 202.108.181.70\index{202.108.181.70 (active prober)}, which by itself accounted for 2\% of the probes. (Even this substantial fraction stands in contrast to previous studies, where that single IP address accounted for roughly half the probes~\cite[\S 4.5.1]{Winter2012a}.) -Among the address ranges are ones belonging to residential ISPs\index{Internet service provider}. +Among the address ranges are ones belonging to residential ISPs\index{ISP}. Despite the many source addresses, the probes seems to be managed by only a few underlying processes. The evidence for this lies in shared patterns in metadata: -TCP\index{TCP} initial sequence numbers +TCP\index{TCP!sequence numbers} initial sequence numbers and TCP timestamps\index{TCP!timestamps}. \autoref{fig:active-probing-tsval} shows clear patterns in TCP timestamps, @@ -2819,7 +2803,7 @@ to effectively exclude active probers, they do hint at how the probing is implemented. The active probers have an unusual TLS fingerprint\index{TLS!fingerprinting}, -TLSv1.0\index{TLS} with a peculiar list of ciphersuites. +TLSv1.0\index{TLS} with a peculiar list of ciphersuites\index{TLS!ciphersuite}. Tor probes sent only a VERSIONS cell~\cite[\S 4.1]{tor-spec}\index{VERSIONS (Tor cell)}, waited for a response, then closed the connection. @@ -2835,7 +2819,7 @@ and later switched to a custom implementation. The obfs2\index{obfs2} probes were conformant with the protocol specification, and unremarkable except for the fact that sometimes payloads were duplicated. -obfs2 clients are supposed to use fresh randomness for each connection, +obfs2 clients are supposed to use fresh randomness\index{randomization} for each connection, but a small fraction, about 0.65\%, of obfs2 probes shared an identical payload with one other probe. The two probes in a pair came from different source IP addresses @@ -2844,7 +2828,7 @@ The apparently separate probers must therefore share some state---at least a shared pseudorandom number generator. The obfs3\index{obfs3} protocol calls for the client to send -a random amount of random bytes as padding. +a random\index{randomization} amount of random bytes as padding. The active probers' implementation of the protocol gets the probability distribution wrong, half the time sending too much padding. @@ -2884,9 +2868,9 @@ offers hints about their internal decision making. In this chapter I~describe the results of an experiment that is designed to shed light on the actions of censors; namely, a test of how quickly they react to and block -a certain kind of Tor bridge\index{Tor!bridges!default}. +a certain kind of Tor bridge\index{Tor!bridge!default}. -\index{Tor!bridges|(} +\index{Tor!bridge|(} Tor bridges are secret proxies that help clients get around censorship. The effectiveness of bridges depends on their secrecy---a censor that learns a bridge's address can simply block its IP address\index{blocking!by address}. @@ -2895,9 +2879,9 @@ envisioned that users would learn of bridges through covert or social channels~\cite[\S 7]{tor-techreport-2006-11-001}, in order to prevent any one actor from learning about and blocking a large number of them. -\index{Tor!bridges|)} +\index{Tor!bridge|)} -\index{Tor!bridges!default|(} +\index{Tor!bridge!default|(} But as it turns out, most users do not use bridges in the way envisioned. Rather, most users who use bridges use one of a @@ -2918,7 +2902,7 @@ why is it that censors do not take blocking steps that we find obvious? There must be some quality of censors' internal dynamics that we do not understand adequately. -\index{Tor!bridges!default|)} +\index{Tor!bridge!default|)} The purpose of this chapter is to begin to go beneath the surface of censorship @@ -2931,7 +2915,7 @@ with perhaps conflicting goals; this project is a small step towards better understanding what lies under the face that censors present. The main vehicle for the exploration of this subject -is the observation of default Tor bridges\index{Tor!bridges!default} +is the observation of default Tor bridges\index{Tor!bridge!default} to find out how quickly they are blocked after they first become discoverable by a censor. I~took part in this project @@ -2948,17 +2932,17 @@ Aase, Crandall, Díaz, Knockel, Ocaña Molinero, Saia, Wallach, and Zhu~\indexau looked into case studies of censorship with a focus on understanding censors' motivation, resources, and time sensitivity. -They ``had assumed that censors are fully motivated to block content\index{blocking} +They ``had assumed that censors are fully motivated to block content\index{blocking!by content} and the censored are fully motivated to disseminate it,'' but some of their observations challenged that assumption, with varied and seemingly undirected censorship hinting at behind-the-scenes resource limitations. They describe an apparent ``intern effect\index{intern effect},'' -by which keyword lists seem to have been compiled by +by which keyword lists\index{keyword filtering} seem to have been compiled by a bored and unmotivated worker, without much guidance. Knockel et~al.~\indexauthors{\cite{Knockel2017a}} looked into -censorship of keywords\index{keywords+} in Chinese\index{China} mobile games, +censorship of keywords\index{keyword filtering} in Chinese\index{China} mobile games, finding that censorship enforcement in that context is similarly decentralized, different from the centralized control we @@ -3004,34 +2988,34 @@ high-collateral-damage\index{collateral damage} servers into their proxy list, the firewall stopped blocking for two days, then resumed again, with an additional check that an IP addresses -really was a VPN~Gate proxy before blocking\index{blocking+}. +really was a VPN~Gate proxy before blocking it. Wright et~al.~\indexauthors{\cite[\S 2]{Wright2011a}} motivated a desire for fine-grained censorship measurement by highlighting limitations that tend to prevent a censor from begin equally effective everywhere in its controlled network. -Not only resource limitations\index{resources+}, +Not only resource limitations, but also administrative and logistical requirements, make it difficult to manage a system as complex as a national censorship apparatus. -\index{Tor!bridges!default|(} +\index{Tor!bridge!default|(} There has been no prior long-term study dedicated to measuring time delays in the blocking of default bridges. There have, however, been a couple of point measurements that put bounds on what blocking delays in the past must have been. -Tor Browser\index{Tor Browser} first shipped with default +Tor Browser\index{Tor!Browser} first shipped with default obfs2\index{obfs2} bridges on February~11, 2012~\cite{tor-blog-obfsproxy-next-step-censorship-arms-race}; Winter and Lindskog tested them 41~days later~\indexauthors{\cite[\S 5.1]{Winter2012a}} and found all~13 of them blocked. -(The bridges then were blocked by RST injection\index{RST (TCP flag)}\index{injection}, +(The bridges then were blocked by RST injection\index{RST}\index{packet injection}, a different blocking technique than the timeouts we have seen more recently.) In 2015 I~used public reports of blocking and non-blocking of the first batch of default obfs4\index{obfs4} bridges to infer a blocking delay of not less than~15 and not more than 76~days~\indexauthors{\cite{tor-dev-censorship-lag}}. -\index{Tor!bridges!default|)} +\index{Tor!bridge!default|)} As security researchers, are accustomed to making conservative assumptions when building threat models\index{modeling}. @@ -3073,7 +3057,7 @@ from actual blocking. The script only tested whether it is possible to make a TCP\index{TCP} connection, which is a necessary but not sufficient precondition -to actually establishing a Tor circuit\index{Tor!circuit+} +to actually establishing a Tor circuit\index{Tor!circuit} through the bridge. In Kazakhstan\index{Kazakhstan}, we deployed an additional script that attempted to establish a full Tor-in-obfs4\index{obfs4} connection, @@ -3082,22 +3066,22 @@ we discovered there. The experiment was opportunistic in nature: we ran from China\index{China}, Iran\index{Iran}, and Kazakhstan\index{Kazakhstan} not only because -they are likely suspects for Tor\index{Tor!protocol} blocking\index{blocking}, +they are likely suspects for Tor\index{Tor!protocol} blocking\index{blocking!by content}, but because we happened to have access to a site in each from which we could run probes over some period of time. Therefore the measurements cover different dates in different countries. -We began at a time when Tor\index{Tor Project} was building up +We began at a time when Tor\index{Tor!Project} was building up its stock of default bridges. We began monitoring each new bridges as it was added, -coordinating with the Tor Browser\index{Tor Browser} developers to get advance notice +coordinating with the Tor Browser\index{Tor!Browser} developers to get advance notice of their addition when possible. Additionally we had the developers run certain more controlled experiments for us---such as adding a bridge to the source code but commenting it out---that are further detailed below. -We were only concerned with default bridges, not secret ones\index{Tor!bridges!default}. -Our goal was not to estimate the difficulty of the proxy discovery problem\index{proxy discovery problem}, +We were only concerned with default bridges, not secret ones\index{Tor!bridge!default}. +Our goal was not to estimate the difficulty of the proxy discovery problem\index{proxy discovery}, but to better understand how censors deal with what should be an easy task. We focused on bridges using the obfs4 pluggable transport~\cite{obfs4}\index{obfs4}\index{pluggable transports}, which not only is the most-used transport and the one @@ -3111,11 +3095,11 @@ because whether default or not, active probing would cause them to be blocked shortly after their first use. -Bridges are identified by a nickname and a port number\index{Tor!bridges!nickname}. +Bridges are identified by a nickname and a port number\index{Tor!bridge!nickname}. The nickname is an arbitrary identifier, chosen by the bridge operator. So, for example, ``ndnop3:24215''\index{ndnop3 (Tor bridge)} is one bridge, and ``ndnop3:10527''\index{ndnop3 (Tor bridge)} is another on the same IP address. -We pulled the list of bridges from Tor Browser\index{Tor Browser} +We pulled the list of bridges from Tor Browser\index{Tor!Browser} and Orbot\index{Orbot}, which is the port of Tor for Android\index{Android}. Tor Browser and Orbot\index{Orbot} mostly shared bridges in common, though there were a few Orbot-only bridges. @@ -3185,10 +3169,10 @@ Port numbers are in chronological order of release. } \index{obfs4} \index{FTE} -\index{Tor Browser} +\index{Tor!Browser} \index{Orbot} -\index{Tor!bridges!default} -\index{Tor!bridges!nickname} +\index{Tor!bridge!default} +\index{Tor!bridge!nickname} \index{ndnop3 (Tor bridge)} \index{ndnop5 (Tor bridge)} \index{riemann (Tor bridge)} @@ -3208,7 +3192,7 @@ Port numbers are in chronological order of release. \label{tab:proxy-probe-destinations} \end{table} -\index{Tor!bridges!default|(} +\index{Tor!bridge!default|(} There are four stages in the process of deploying a new default bridge. At the beginning, the bridge is secret, @@ -3217,23 +3201,23 @@ Each successive stage of deployment makes the bridge more public, increasing the number of places where a censor may look to discover it. The whole process takes a few days to a few weeks, -mostly depending on Tor Browser's\index{Tor Browser} release schedule. +mostly depending on Tor Browser's\index{Tor!Browser} release schedule. \begin{description} \item[Ticket filed] -The process begins with the filing of a ticket in Tor's\index{Tor Project} public issue tracker. +The process begins with the filing of a ticket in Tor's\index{Tor!Project} public issue tracker. The ticket includes the bridge's IP address. A~censor that pays attention to the issue tracker could discover bridges as early as this stage. \item[Ticket merged] After review, the ticket is merged and the new bridge -is added to the source code of Tor Browser\index{Tor Browser}. +is added to the source code of Tor Browser\index{Tor!Browser}. From there it will begin to be included in nightly builds. A~censor that reads the bridge configuration file\index{bridge configuration file} from the source code repository, or downloads nightly builds, could discover bridges at this stage. \item[Testing release] -\index{Tor Browser!releases} +\index{Tor!Browser!releases} Just prior to a public release, Tor Browser developers send candidate builds to a public mailing list\index{tor-qa mailing list} to solicit @@ -3245,15 +3229,15 @@ Occasionally the developers skip the testing period, such as in the case of an urgent security release. \item[Public release] After testing, the releases are made public -and announced on the Tor Blog\index{Tor Blog}\index{blog}. +and announced on the Tor Blog\index{Tor!Blog}\index{blog}. A~censor could learn of bridges at this stage by reading the blog and downloading executables. This is also the stage at which the new bridges begin to have an appreciable number of users. -There are two release tracks of Tor Browser: stable\index{Tor Browser!stable release} and alpha\index{Tor Browser!alpha release}. +There are two release tracks of Tor Browser: stable\index{Tor!Browser!releases} and alpha. Alpha releases are distinguished by an `a' in their version number, for example 6.5a4. -According to Tor Metrics~\cite{tor-metrics-webstats-tb}\index{Tor Metrics}, +According to Tor Metrics~\cite{tor-metrics-webstats-tb}\index{Tor!Metrics}, stable downloads outnumber alpha downloads by a factor of about 30~to~1. \end{description} @@ -3266,7 +3250,7 @@ and that the bridges did not expose any transports other than obfs4\index{obfs4} We wanted to ensure that any blocking of bridges could only be the result of their status as default bridges, and not a side effect of some other detection system. -\index{Tor!bridges!default|)} +\index{Tor!bridge!default|)} \section{Results from China} @@ -3347,12 +3331,12 @@ More details and evidence appear in the following subsections. \subsection{Per-port blocking} \label{sec:china-perport} -In the first few release batches, the censor blocked individual ports\index{port+}, -not an entire IP address\index{IP address+}. +In the first few release batches, the censor blocked individual ports, +not an entire IP address. For example, see point~\cnref{a} in \autoref{fig:proxy-probe-timelines-china1}: -after ndnop3:24215\index{ndnop3 (Tor bridge} was blocked, +after ndnop3:24215\index{ndnop3 (Tor bridge)} was blocked, we opened ndnop3:10527 on the same IP address. -The alternate port\index{port} remained reachable +The alternate port remained reachable until it, too, was blocked in the next release batch. We used this technique of rotating @@ -3365,7 +3349,7 @@ in addition to their obfs4\index{obfs4} ports. After riemann:443\index{riemann (Tor bridge)} (obfs4) was blocked (point~\cnref{c} in \autoref{fig:proxy-probe-timelines-china1}), riemann:22 (SSH) remained reachable for a further nine months, until it was finally blocked at point~\cnref{m}. -Per-port blocking would give way to whole-IP blocking +Per-port blocking would give way to whole-IP blocking\index{blocking!by address} in October 2016. @@ -3379,7 +3363,7 @@ and blocked the bridges in an earlier stage. In the 5.5.5/6.0a5/6.0 batch, the censor even seems to have missed the 5.5.5 and 6.0a5 releases (point~\cnref{e} in \autoref{fig:proxy-probe-timelines-china1}), -only blocking\index{blocking} after the 6.0 release, 36 days later. +only blocking after the 6.0 release, 36 days later. This observation hints that, before October 2016 anyway, the censor was somehow extracting bridge addresses from the release packages themselves. @@ -3482,7 +3466,7 @@ A similar pattern appears across all bridges for nine hours starting on June~28, 2016 at 17:40~UTC. -After the switch to whole-IP\index{IP address} blocking\index{blocking!by address}, +After the switch to whole-IP blocking\index{blocking!by address}, there are further instances of spotty and inconsistent censorship, though of a different nature. Several cases are visible near point~\cnref{j} @@ -3538,7 +3522,7 @@ by inspecting the bridge configuration file\index{bridge configuration file} in because if it had been, it would not have missed two of the bridges in the list. Rather, we suspect that the censor used some kind of network-level analysis---perhaps -running a release of Tor Browser\index{Tor Browser} in a black-box fashion, +running a release of Tor Browser\index{Tor!Browser} in a black-box fashion, and making a record of all addresses it connected to. This would explain why GreenBelt:5881\index{GreenBelt (Tor bridge)} was not blocked (it couldn't be connected to while the censor was harvesting bridge addresses) @@ -3657,6 +3641,7 @@ to have become active \subsection{The onset of whole-IP blocking} \label{sec:china-allports} +\index{blocking!by address|)} The blocking event of October~20, 2016 was noteworthy not only because it occurred before a release, but also because it affected more than one port on some bridges. See point~\cnref{h} in \autoref{fig:proxy-probe-timelines-china1}. @@ -3722,6 +3707,7 @@ The same pattern seems to happen with LeifEricson\index{LeifEricson (Tor bridge) The newly opened ports 50000, 50001, and 50002 share brief periods of reachability in September and October 2016, but port 41213 during the same time remained solidly down. +\index{blocking!by address|)} \subsection{No discovery of Orbot bridges} @@ -3732,7 +3718,7 @@ but port 41213 during the same time remained solidly down. Orbot, the version of Tor\index{Tor} for Android\index{Android}, also includes default bridges. It has its own bridge configuration file\index{bridge configuration file}, -similar to Tor Browser's\index{Tor Browser}, but in a different format. +similar to Tor Browser's\index{Tor!Browser}, but in a different format. Most of Orbot's bridges are borrowed from Tor Browser, so when a bridge gets blocked, it is blocked for users of both Orbot and Tor Browser. @@ -3744,14 +3730,14 @@ but they remained unblocked for over eight months, even as the ports used by Tor Browser were blocked one by one. The Orbot-only bridges were finally blocked---see point~\cnref{k} in \autoref{fig:proxy-probe-timelines-china1}---as a side effect -of the whole-IP blocking that began in October 2016 (\autoref{sec:china-allports}). +of the whole-IP blocking\index{blocking!by address} that began in October 2016 (\autoref{sec:china-allports}). (All of the Orbot bridges suffered outages, as \autoref{fig:proxy-probe-timelines-china1} shows, but they were the result of temporary misconfigurations, not blocking. They were unreachable during those outages from the control site as well.) These results show that whatever mechanism the censor had -for discovering and blocking the default bridges of Tor Browser\index{Tor Browser}, +for discovering and blocking the default bridges of Tor Browser\index{Tor!Browser}, it lacked for discovering and blocking those of Orbot. Again we have a case of our assumptions not matching reality---blocking that should be easy to do, and yet is not done. @@ -3777,8 +3763,8 @@ As expected, they were already blocked at the beginning, and remained so \label{sec:china-unused} As a control measure, we reserved a bridge in secret. -ndnop4:27668\index{ndnop4 (Tor Bridge} (see point~\cnref{n} in \autoref{fig:proxy-probe-timelines-china1}) -was not published, neither in Tor Browser's\index{Tor Browser} bridge configuration file\index{bridge configuration file}, +ndnop4:27668\index{ndnop4 (Tor bridge)} (see point~\cnref{n} in \autoref{fig:proxy-probe-timelines-china1}) +was not published, neither in Tor Browser's\index{Tor!Browser} bridge configuration file\index{bridge configuration file}, nor in BridgeDB\index{BridgeDB}. As expected, it was never blocked. @@ -3886,7 +3872,7 @@ most of the bridges were in fact effectively blocked. \caption{ Default bridge bootstrap progress from a site in Kazakhstan. In contrast to \autoref{fig:proxy-probe-timelines-kazakhstan}, above, -this experiment built a full obfs4 connection and Tor circuit, +this experiment built a full obfs4 connection and Tor circuit\index{Tor!circuit}, revealing blocking beyond the TCP\index{TCP} handshake. Tor reports its connection progress as a percentage; so here, ``success'' is on a continuum from 0\% to~100\%, @@ -3992,7 +3978,7 @@ Domain fronting is a general-purpose circumvention technique based on HTTPS\index{HTTPS}. It disguises the true destination of a client's messages by routing them through a large web server or -content delivery network\index{content delivery network} that hosts +content delivery network\index{CDN} that hosts many web sites. From the censor's point of view, messages appear to go not to their actual (presumably blocked) destination, @@ -4019,7 +4005,7 @@ that are visible to the censor: \item the server's TLS\index{TLS} certificate\index{certificate}~\cite[\S 7.4.2]{rfc5246}\index{common name (X.509)} \end{itemize} and in one place that is not visible to the censor, -because it is encrypted: +because it is encrypted\index{encryption}: \begin{itemize} \item the HTTP\index{HTTP} Host header~\cite[\S 5.4]{rfc7230}\index{Host (HTTP header)} \end{itemize} @@ -4041,7 +4027,7 @@ of a client making a domain-fronted request. \caption{ Domain fronting uses different names at different protocol layers. -The forbidden destination domain is encrypted within +The forbidden destination domain is encrypted\index{encryption} within the TLS\index{TLS} layer. The censor sees only a front domain\index{front domain}, one chosen to be expensive to block\index{collateral damage}. @@ -4077,12 +4063,12 @@ resolves the deadlock by sending the domain name in plaintext in the TLS layer. Domain fronting takes advantage of decoupling the two normally coupled values. -It relies on the server decrypting the TLS layer +It relies on the server decrypting\index{encryption} the TLS layer and throwing it away, then routing requests according to the Host header\index{Host (HTTP header)}. Virtual hosting\index{virtual hosting}, -in the form of content delivery networks\index{content delivery network} (CDNs), is now common. +in the form of content delivery networks\index{CDN} (CDNs), is now common. A~CDN works by placing an ``edge server''\index{edge server} between the client and the destination, called an ``origin server''\index{origin server} in this context. When the edge server receives an HTTP\index{HTTP} request, @@ -4095,8 +4081,8 @@ but only through the intermediary CDN, which foils address-based blocking\index{blocking!by address} of the destination the censor may have imposed. Domain fronting also works on application hosting services -like Google App Engine\index{Google App Engine}, -because one can upload a simple application that emulates a CDN. +like Google App Engine\index{Google!App Engine}, +because one can upload a simple application that emulates a CDN\index{CDN}. The contents of the client's messages, as well as the domain name of the true destination, are protected by TLS\index{TLS} encryption\index{encryption}. @@ -4108,17 +4094,17 @@ non-circumvention-related traffic to those addresses, with whatever collateral damage\index{collateral damage} that entails. Domain fronting may be an atypical use of HTTPS\index{HTTPS}, -but it is not a way to get free CDN\index{content delivery network} service. +but it is not a way to get free CDN\index{CDN} service. A~CDN does not forward requests to arbitrary domains, only to domains belonging to one of its customers. Setting up domain fronting requires -becoming a customer of a CDN\index{content delivery network} +becoming a customer of a CDN\index{CDN} and paying for service---and the cost can be high\index{domain fronting!costs of}, as \autoref{sec:meek-history} shows. It may seem at first that domain fronting is only useful for accessing HTTPS\index{HTTPS} web sites, -and then only when they are hosted on a CDN\index{content delivery network}. +and then only when they are hosted on a CDN\index{CDN}. But extending the idea to work with arbitrary destinations only requires the minor additional step @@ -4129,15 +4115,15 @@ then forwards to the destination. Domain fronting shields the address of the proxy, which does not pose enough risk of collateral damage\index{collateral damage}, on its own, to resist blocking\index{blocking!by address}. -Exactly this sort of HTTPS tunneling underlies meek\index{meek}, +Exactly this sort of HTTPS tunneling\index{tunneling} underlies meek\index{meek}, a circumvention system based on domain fronting that is discussed further in \autoref{sec:meek-impl}. One of the best features of domain fronting is that it does not require any secret information, -completely bypassing the proxy distribution problem\index{proxy distribution problem} +completely bypassing the proxy distribution problem\index{proxy distribution} (\autoref{sec:address-strategies}). -The address of the CDN\index{content delivery network} edge server\index{edge server}, +The address of the CDN\index{CDN} edge server\index{edge server}, the address of the proxy hidden behind it, the fact that some fraction of traffic to the edge server\index{edge server} is circumvention---all of these may be known by the censor, @@ -4174,16 +4160,16 @@ is omitted, rather than being faked. % merge 2.0 code Earlier in 2012, Bryce Boe wrote a blog post~\indexauthors{\cite{Boe2012a}} -outlining how to use Google App Engine\index{Google App Engine} as a proxy, +outlining how to use Google App Engine\index{Google!App Engine} as a proxy, and suggested that sending a false SNI\index{SNI} -could bypass SNI whitelisting\index{whitelisting}. +could bypass SNI whitelisting\index{whitelist}. Even farther back, in 2004, -when HTTPS\index{HTTPS} and CDNs\index{content delivery network} were less common, +when HTTPS\index{HTTPS} and CDNs\index{CDN} were less common, Köpsell and Hillig~\indexauthors{\cite[\S 5.2]{Koepsell2004a}} foresaw the possibilities of a situation such as exists today: ``Imagine that all web pages of the United States\index{United States of America} are only -retrievable (from abroad) by sending encrypted requests to +retrievable (from abroad) by sending encrypted\index{encryption} requests to one and only one special node. Clearly this idea belongs to the `all or nothing' concept because a blocker has to block @@ -4204,7 +4190,7 @@ rather than at the ends. The client ``tags'' its messages in a way that the censor cannot detect (analogously to the way the Host header\index{Host (HTTP header)} -is encrypted in domain fronting). +is encrypted\index{encryption} in domain fronting). When the router finds a tagged message, it shunts the message away from its nominal destination and towards some other, covert destination. @@ -4223,7 +4209,7 @@ is like domain fronting in many respects. It uses HTTPS\index{HTTPS} to a shared server (in this case a cloud storage server). The specific storage area being accessed---what -the censor would like to know---is encrypted, +the censor would like to know---is encrypted\index{encryption}, so the censor cannot block CloudTransport\index{CloudTransport} without blocking the storage service completely. @@ -4251,25 +4237,27 @@ in different protocols. and my understanding of it is based on an imperfect translation.) Wang et~al.~\indexauthors{\cite{Wang2015a}} -built classifiers for meek\index{meek} +built classifiers\index{classification} for meek\index{meek} among other protocols using entropy\index{entropy}, timing\index{packet size and timing}, and transport-layer features. They emphasized practical classifiers -and tested their false-classification\index{false positives}\index{false negatives} rates +and tested their misclassification\index{false positive}\index{false negative} rates against real traffic traces. \section{A pluggable transport for Tor} \label{sec:meek-impl} +\index{meek|(} + I~am the main author and maintainer of meek\index{meek}, a pluggable transport\index{pluggable transports} for Tor based on domain fronting. meek uses domain-fronted HTTP\index{HTTP} POST\index{POST (HTTP method)} requests as the primitive operation to send or receive chunks of data up to a few kilobytes in size. -The intermediate CDN\index{content delivery network} -receives domain-fronted requests and forwards them to a Tor bridge\index{Tor bridge}. +The intermediate CDN\index{CDN} +receives domain-fronted requests and forwards them to a Tor bridge\index{Tor!bridge}. Auxiliary programs on the client and the bridge convert the sequence of HTTP requests to the byte stream expected by Tor. @@ -4284,17 +4272,17 @@ and protocol layers interact. \caption{ Putting it together: how to build a circumvention system around domain fronting. -The CDN\index{content delivery network} acts as a limited proxy, +The CDN\index{CDN} acts as a single-purpose proxy, only capable of forwarding to destinations within its own network---one of which is a bridge, which we control. The bridge acts as a general-purpose proxy, -capable of reaching access to any destination. +capable of reaching any destination. Fronting through the CDN hides the bridge's address, -which is presumably blocked\index{blocking!by address}. +which the censor would otherwise block\index{blocking!by address}. } -\index{Tor bridge} +\index{Tor!bridge} \index{SNI} \index{Host (HTTP header)} \index{meek} @@ -4420,7 +4408,7 @@ a local headless browser (which is completely separate from the browser that the user interacts with). \index{TLS!fingerprinting|)} -meek\index{meek} first appeared in Tor Browser\index{Tor Browser} +meek\index{meek} first appeared in Tor Browser\index{Tor!Browser} in October 2014~\cite{tor-blog-tor-browser-40-released}, and continues in operation to the present. It is Tor's\index{Tor} second-most-used transport @@ -4444,10 +4432,10 @@ This graph is an updated version of Figure~5 from the 2015 paper ``Blocking-resistant communication through domain fronting''~\cite{Fifield2015a-local}; the vertical blue stripe divides old and new data. -The user counts come from Tor Metrics~\cite{tor-tr-2012-10-001}. +The user counts come from Tor Metrics\index{Tor!Metrics}. } \index{meek} -\index{Tor Metrics} +\index{Tor!Metrics} \label{fig:metrics-clients-meek} \end{figure} @@ -4465,7 +4453,7 @@ The notation `{\color{gray} ---}' means meek wasn't deployed on that service in that month; for example, we stopped using App Engine after May 2016 following the suspension of the service -(see discussion on p.~\pageref{para:meek-suspension}). +(see discussion on page~\pageref{para:meek-suspension}). The notation `{\color{gray} ?}' marks the months after I~stopped handling the invoices personally. I~don't know the costs for those months, @@ -4475,9 +4463,9 @@ than the values shown. } \index{meek!costs of|textbf} \index{domain fronting!costs of} -\index{Google App Engine} +\index{Google!App Engine} \index{Amazon CloudFront} -\index{Microsoft Azure} +\index{Microsoft!Azure} \label{tab:meek-costs} \end{table} @@ -4502,12 +4490,12 @@ a circumvention system built around web browser\index{web browser}--based proxies. Flash proxy clients need a secure rendezvous\index{rendezvous!of flash proxy}, a way to register their address -to a central facilitator\index{flash proxy!facilitator}, +to a central facilitator, so that flash proxies may connect back to them. Initially there were only two means of registration: -flashproxy-reg-http\index{flash proxy!flashproxy-reg-http}, +flashproxy-reg-http, which sent client registrations as HTTP\index{HTTP} requests; -and flashproxy-reg-email\index{flash proxy!flashproxy-reg-email}, +and flashproxy-reg-email, which sent client registrations to a distinguished email\index{email} address. We knew that flashproxy-reg-http was easily blockable; flashproxy-reg-email had good blocking resistance @@ -4517,12 +4505,12 @@ At some point, Jacob Appelbaum\index{Appelbaum, Jacob} showed me an example of using domain fronting---though we didn't have a name for it then---to access a simple HTML-rewriting proxy\index{HTML-rewriting proxy} -based on Google App Engine\index{Google App Engine}. +based on Google App Engine\index{Google!App Engine}. I~eventually realized that the same trick would work for flash proxy rendezvous\index{rendezvous!of flash proxy}. I~proposed a design~\cite{tor-trac-8860} in May 2013 and within a month Arlo Breault\index{Breault, Arlo} had written -flashproxy-reg-appspot\index{flash proxy!flashproxy-reg-appspot}, -which worked just like flashproxy-reg-http\index{flash proxy!flashproxy-reg-http}, +flashproxy-reg-appspot, +which worked just like flashproxy-reg-http, except that it fronted through \nolinkurl{www.google.com}\index{www.google.com@\nolinkurl{www.google.com}} rather than contacting the registration server directly. The fronting-based registration became flash proxy's\index{flash proxy} preferred registration method, @@ -4532,7 +4520,7 @@ The development of domain fronting, from a simple rendezvous\index{rendezvous} t to a full-fledged bidirectional transport, seems slow in retrospect. All the pieces were there; it was a matter of putting them together. I~did not immediately appreciate the potential of domain fronting when I~first saw it. -Even after the introduction of flashproxy-reg-appspot\index{flash proxy!flashproxy-reg-appspot}, +Even after the introduction of flashproxy-reg-appspot, months passed before the beginning of meek\index{meek}. The whole idea behind flash proxy rendezvous\index{rendezvous!of flash proxy} is that the registration channel @@ -4541,7 +4529,7 @@ it is only used to bootstrap into a more capable channel (WebSocket\index{WebSocket}, in flash proxy's case). Email\index{email} fits this model well: not good for a general-purpose channel, -but just good enough for rendezvous\index{rendezvous}. +but just good enough for rendezvous. The fronting-based HTTP\index{HTTP} channel, however, was more capable than needed for rendezvous, being bidirectional and reasonably high-performance. @@ -4553,7 +4541,7 @@ the circumvention system GoAgent\index{GoAgent} through the ``Collateral Freedom''\index{collateral freedom@``collateral freedom''}~\indexauthors{\cite{Robinson2013a}} report of Robinson et~al. GoAgent used an early form of domain fronting, -issuing HTTP\index{HTTP} requests directly from a Google App Engine\index{Google App Engine} server. +issuing HTTP\index{HTTP} requests directly from a Google App Engine\index{Google!App Engine} server. According to the report, GoAgent was the most used circumvention tool among a group of users in China\index{China}. @@ -4581,7 +4569,7 @@ none of which were satisfactory. I~wrote a simplified experimental prototype called ``meeker\index{meeker},'' which simply prepended an HTTP\index{HTTP} header before the client and server streams, but meeker only worked for direct connections, -not through an HTTP-aware intermediary like App Engine\index{Google App Engine}. +not through an HTTP-aware intermediary like App Engine\index{Google!App Engine}. When I~explained these difficulties to George Kadianakis\index{Kadianakis, George} in December 2013, he advised me to forget the complexity and implement the simplest @@ -4602,7 +4590,7 @@ cleared the way.) In the post, I~linked to the source code, described the protocol, and explained how to try it, -using an App Engine\index{Google App Engine} instance I~set up shortly before. +using an App Engine\index{Google!App Engine} instance I~set up shortly before. At this time there was no web browser\index{web browser} TLS camouflage\index{TLS!fingerprinting}, and only App Engine was supported. I~was not yet using the term ``domain fronting.'' @@ -4610,26 +4598,26 @@ The big ideas of the title were as follows: we could run one big public bridge rather than relying on multiple smaller bridges as other transports did; a web server with a PHP\index{PHP} ``reflector'' script -could take the place of a CDN\index{content delivery network}, +could take the place of a CDN\index{CDN}, providing a diversity of access points even without domain fronting; we could combine meek with authentication\index{authentication} and serve a 404\index{404 (HTTP status code)} to unauthenticated users; -and Cloudflare\index{Cloudflare} and other CDNs\index{content delivery networks} -are alternatives to App Engine\index{Google App Engine}. +and Cloudflare\index{Cloudflare} and other CDNs\index{CDN} +are alternatives to App Engine\index{Google!App Engine}. We did end up running a public bridge for public benefit (and later worrying over how to pay for it), -and deploying on platforms other than App Engine\index{Google App Engine} +and deploying on platforms other than App Engine\index{Google!App Engine} (with Tor we use other CDNs, but not Cloudflare\index{Cloudflare} specifically). Arlo Breault\index{Breault, Arlo} would write a PHP\index{PHP} reflector, though there was never a repository of public meek reflectors -as there were for other types of Tor bridges\index{Tor bridges}. +as there were for other types of Tor bridges\index{Tor!bridge}. Combining meek with authentication\index{authentication} never happened; it was never needed for our public domain-fronted instances because active probing doesn't help the censor in those cases anyway. During the spring 2014 semester (January--May) I~was enrolled in Vern Paxson's\index{Paxson, Vern} -Internet/Network Security\index{Internet/Network Security} course +Internet/Network Security\index{CS261N (network security course)} course along with fellow student Chang Lan\index{Lan, Chang}. We made the development and security evaluation of meek our course project. @@ -4641,7 +4629,7 @@ Our final report, became the kernel of our later research paper. I~began the process of getting -meek integrated into Tor Browser\index{Tor Browser} in February 2014~\cite{tor-trac-10935}. +meek integrated into Tor Browser\index{Tor!Browser} in February 2014~\cite{tor-trac-10935}. The initial integration would be completed in August 2014. In the intervening time, along with much testing and debugging, Chang Lan\index{Lan, Chang} and I~wrote browser extensions @@ -4666,28 +4654,28 @@ could act as a glue layer between Tor\index{Tor} and the HTTP\index{HTTP} proxy in effect allowing you to use Lantern as a pluggable transport\index{pluggable transports} for Tor. We worked out a prototype and wrote a summary of the process~\cite{tor-dev-howto-use-lantern-pt}. In that specific application, we used meek not for its -domain-fronting properties but for its HTTP-tunneling properties; +domain-fronting properties but for its HTTP-tunneling\index{tunneling} properties; but the early contact with other circumvention developers was valuable. June 2014 brought a surprise: the Great Firewall of China\index{Great Firewall of China} blocked all Google\index{Google} services~\cite{google-transparency-cn-201405,greatfire-google-block-cn}. It would be vain to think that it was in response -to the nascent deployment of meek on App Engine\index{Google App Engine}; +to the nascent deployment of meek on App Engine\index{Google!App Engine}; a much more likely cause was Google's decision to begin using HTTPS\index{HTTPS} for web searches, -which would foil keyword-based URL\index{URL} filtering\index{detection!by content}\index{keywords}. +which would foil keyword-based URL\index{URL!filtering} filtering\index{keyword filtering}. Nevertheless, the blocking cast doubt on the feasibility of domain fronting: I~had believed that blocking all of Google\index{Google} would be too costly -in terms of collateral damage to be sustained for long +in terms of collateral damage\index{collateral damage} to be sustained for long by any censor, even the Great Firewall\index{Great Firewall of China}, and that belief was wrong. In any case, we now needed fronts other than Google\index{Google} in order to have any claim of effective circumvention in China\index{China}. I~set up additional backends: Amazon CloudFront\index{Amazon CloudFront} -and Microsoft Azure\index{Microsoft Azure}. -When meek made its debut in Tor Browser\index{Tor Browser}, +and Microsoft Azure\index{Microsoft!Azure}. +When meek made its debut in Tor Browser\index{Tor!Browser}, it would offer three modes: meek-google\index{meek!meek-google}, meek-amazon\index{meek!meek-amazon}, and meek-azure\index{meek!meek-azure}. @@ -4715,15 +4703,15 @@ and a deeper investigation into resistance against traffic analysis attacks based on packet sizes and timing\index{packet size and timing}. -The first public release of Tor Browser\index{Tor Browser} that had +The first public release of Tor Browser\index{Tor!Browser} that had a built-in easy-to-use meek client was version 4.0-alpha-1 on August 12, 2014~\cite{tor-blog-tor-browser-364-and-40-alpha-1-are-released}. -This was an alpha release\index{Tor Browser!alpha release}, -used by fewer users than the stable release\index{Tor Browser!stable release}. +This was an alpha release\index{Tor!Browser!releases}, +used by fewer users than the stable release. I~made a blog post explaining how to use it a few days later~\cite{tor-blog-how-use-meek-pluggable-transport}. The release and blog post had a positive effect on the number of users, however the absolute numbers from around this time are uncertain, -because of a mistake I~made in configuring the meek bridge\index{Tor bridge}. +because of a mistake I~made in configuring the meek bridge\index{Tor!bridge}. I~was running the meek bridge and the flash proxy\index{flash proxy} bridge on the same instance of Tor\index{Tor}; and because of how Tor's statistics are aggregated, @@ -4732,7 +4720,7 @@ I~switched the meek bridge to a separate instance of Tor on September 15; numbers after that date are more trustworthy. In any case, the usage before this first release was tiny: -the App Engine~\index{Google App Engine} bill, +the App Engine~\index{Google!App Engine} bill, at a rate of \$0.12/GB with one~GB free each day, was less than \$1.00 per month for the first seven months of 2014~\cite[\S Costs]{meek-wiki}. \index{meek!costs of} @@ -4741,23 +4729,23 @@ and would continue to rise from there. See \autoref{tab:meek-costs} on page~\pageref{tab:meek-costs} for a history of monthly costs. -Tor Browser\index{Tor Browser} 4.0~\cite{tor-blog-tor-browser-40-released} +Tor Browser\index{Tor!Browser} 4.0~\cite{tor-blog-tor-browser-40-released} was released on October 15, 2014. -It was the first stable\index{Tor Browser!stable release} -(not alpha\index{Tor Browser!alpha release}) release to have meek, +It was the first stable\index{Tor!Browser!releases} +(not alpha) release to have meek, and it had an immediate effect on the number of users: which jumped from 50 to~500 within a week. (The increase was partially conflated with -a failure of the meek-amazon\index{meek-amazon} bridge to publish statistics -before that date, but the other bridge\index{Tor bridge}, -servicing both meek-google\index{meek-google} and meek-azure\index{meek-azure}, +a failure of the meek-amazon\index{meek!meek-amazon} bridge to publish statistics +before that date, but the other bridge\index{Tor!bridge}, +servicing both meek-google\index{meek!meek-google} and meek-azure\index{meek!meek-azure}, individually showed the same increase.) It was a lesson in user behavior: although meek had been available -in an alpha release\index{Tor Browser!alpha release} for two months already, +in an alpha release\index{Tor!Browser!releases} for two months already, evidently a large number of users did not know of it or chose not to try it -until the first stable release\index{Tor Browser!stable release}. +until the first stable release. At that time, the other transports available were obfs3\index{obfs3}, FTE\index{FTE}, @@ -4769,7 +4757,7 @@ and flash proxy\index{flash proxy}. Through the first part of 2015, the estimated number of simultaneous users continued to grow, reaching about 2,000, -as we fixed bugs and Tor Browser\index{Tor Browser} had further releases. +as we fixed bugs and Tor Browser\index{Tor!Browser} had further releases. The first release of Orbot\index{Orbot} that included meek appeared in February~\cite{guardian-dev-orbot-v15-alpha-3}. @@ -4782,7 +4770,7 @@ and appeared on June~30 at the symposium. The increasing use of domain fronting by various circumvention tools began to attract more attention. A March 2015 article by Eva Dou and Alistair Barr in -\textsl{The Wall Street Journal}\index{Wall Street Journal, The@\textsl{Wall Street Journal, The}}~\indexauthors{\cite{DouBarrWallStreetJournal}} +\textsl{The Wall Street Journal}\index{Wall Street Journal@\textsl{Wall Street Journal}}~\indexauthors{\cite{DouBarrWallStreetJournal}} described domain fronting and ``collateral freedom''\index{collateral freedom@``collateral freedom''} in general, depicting cloud service providers as being caught in the crossfire @@ -4797,42 +4785,42 @@ caused by a Chinese\index{China} network attack system later known as the Great Cannon\index{Great Cannon}~\cite{Marczak2015a-local}. They blamed the attack on the attention brought by the news article. As further fallout, Cloudflare\index{Cloudflare}, -a CDN\index{content delivery network} which Lantern\index{Lantern} +a CDN\index{CDN} which Lantern\index{Lantern} used for fronting and whose CEO was quoted in the article, stopped supporting domain fronting~\cite{PrinceCloudflareHackerNews}, by beginning to enforce a match between the SNI\index{SNI} and the Host header\index{Host (HTTP header)} -Since its first deployment, the Azure\index{Microsoft Azure}\index{meek-azure} backend +Since its first deployment, the Azure\index{Microsoft!Azure}\index{meek!meek-azure} backend had been slower, with fewer users, than the other two options, -App Engine\index{Google App Engine}\index{meek-google} -and CloudFront\index{Amazon CloudFront}\index{meek-amazon}. +App Engine\index{Google!App Engine}\index{meek!meek-google} +and CloudFront\index{Amazon CloudFront}\index{meek!meek-amazon}. For months I~had chalked it up to limitations of the platform. In April 2015, though, I~found the real source of the problem: the component I~wrote that runs on Azure, receives domain-fronted HTTP\index{HTTP} requests and forwards them -to the meek bridge\index{Tor bridge}, +to the meek bridge\index{Tor!bridge}, was not reusing TCP\index{TCP} connections. For every outgoing request, the code was doing a fresh TCP\index{TCP} and TLS\index{TLS} handshake---causing a bottleneck at the bridge as its CPU tried to cope with all the incoming TLS. When I~fixed the code to reuse connections~\cite{tor-dev-meek-azure-persistent}, -the number of users (overall, not only for Azure\index{Microsoft Azure}) +the number of users (overall, not only for Azure\index{Microsoft!Azure}) had a sudden jump, increasing from 2,000 to reaching 6,000 in two weeks. Evidently, we had been leaving users on the table by having one of the backends not run as fast as possible. The deployment of domain fronting was being partly supported by a \$500/month grant from Google\index{Google}. -Already in February 2015, the monthly cost for App Engine\index{Google App Engine} alone +Already in February 2015, the monthly cost for App Engine\index{Google!App Engine} alone began to exceed that amount~\cite[\S Costs]{meek-wiki}. In an effort to control costs, in May 2015 we began to rate-limit\index{rate limiting} the -App Engine\index{Google App Engine}\index{meek-google} -and CloudFront\index{Amazon CloudFront}\index{meek-amazon} bridges\index{Tor bridge}, +App Engine\index{Google!App Engine}\index{meek!meek-google} +and CloudFront\index{Amazon CloudFront}\index{meek!meek-amazon} bridges\index{Tor!bridge}, deliberately slowing the service so that fewer would use it. -Until October 2015, the Azure\index{Microsoft Azure}\index{meek-azure} +Until October 2015, the Azure\index{Microsoft!Azure}\index{meek!meek-azure} bridge was on a research grant provided by Microsoft\index{Microsoft}, so we allowed it to run as fast as possible. When the grant expired, we rate-limited the Azure bridge as well. @@ -4840,11 +4828,11 @@ This rate-limiting\index{rate limiting} explains the relative flatness of the us from May to the end of~2015. Google\index{Google} changed the terms of service\index{terms of service} governing -App Engine\index{Google App Engine} in 2015. +App Engine\index{Google!App Engine} in 2015. (I~received a message announcing the change in May, but it seems the changes had been changed online since March.) The updated terms included a paragraph -that seemed to prohibit running a proxy\index{proxy} service~\cite{google-cloud-service-terms-20150326000133}: +that seemed to prohibit running a proxy service~\cite{google-cloud-service-terms-20150326000133}: % I and Yawning got notice of the change on 2015-05-20. The notice said: % • Add a restriction against using the Google Cloud Platform services to % provide network transport or sell bandwidth @@ -4858,36 +4846,36 @@ that seemed to prohibit running a proxy\index{proxy} service~\cite{google-cloud- under its control to: (i)~use the Services to provide a service, Application, or functionality of network transport or transmission (including, but not limited to, IP transit, virtual private networks\index{VPN}, -or content delivery networks\index{content delivery network}); or (ii)~sell bandwidth from the +or content delivery networks\index{CDN}); or (ii)~sell bandwidth from the Services. \end{quote} This was a stressful time: we seemed to have Google's\index{Google} support, but the terms of service said otherwise. I~contacted Google\index{Google} to ask for clarification or guidance, -in the meantime leaving meek-google\index{meek-google} running; +in the meantime leaving meek-google\index{meek!meek-google} running; however I~never got an answer to my questions. The point became moot a year later, -when Google\index{Google} shut down our App Engine\index{Google App Engine} project, +when Google\index{Google} shut down our App Engine\index{Google!App Engine} project, for another reason altogether; see below. By this time we had not received reports of any attempts to block domain fronting. We did, however, suffer a few accidental outages (which are just as bad as blocking, from a client's point of view). Between July~20 and August~14, an account transition error -left the Azure\index{meek-azure} +left the Azure\index{meek!meek-azure} configuration broken~\cite{tor-dev-meek-azure-outage-201508}. I~set up another configuration on Azure and published instructions on how to use it, -but it would not be available to the majority of users until the next release of Tor Browser\index{Tor Browser}, +but it would not be available to the majority of users until the next release of Tor Browser\index{Tor!Browser}, which happened on August~11. -Between September~30 and October~9, the CloudFront\index{meek-amazon} bridge +Between September~30 and October~9, the CloudFront\index{meek!meek-amazon} bridge was effectively down because of an expired TLS\index{TLS} certificate\index{certificate}. When it rebooted on October~9, an administrative oversight caused its Tor relay identity fingerprint to change---meaning that clients expecting the former fingerprint refused to connect to it~\cite{tor-trac-17473}. The situation was not fully resolved until November~4 -with the next release of Tor Browser\index{Tor Browser}: +with the next release of Tor Browser\index{Tor!Browser}: cascading failures led to over a month of downtime. In October 2015 there appeared a couple of research papers @@ -4905,7 +4893,7 @@ published a more comprehensive report on detecting meek (and other protocols), emphasizing practicality and precision. They showed that some previously proposed -classifiers would have untenable false-positive rates\index{false positives}, +classifiers\index{classification} would have untenable false-positive rates\index{false positive}, and constructed a classifier for meek based on entropy\index{entropy} and timing features\index{packet size and timing}. It's worth noting that since the first reported @@ -4916,20 +4904,20 @@ to use techniques other than those described in these papers. A~side benefit of building a circumvention system atop Tor\index{Tor} -is easy integration with Tor Metrics\index{Tor Metrics}---the source of the user +is easy integration with Tor Metrics\index{Tor!Metrics}---the source of the user number estimates in this section. Since the beginning of meek's deployment, we had known about a problem -with the way it integrates with Tor Metrics\index{Tor Metrics}. +with the way it integrates with Tor Metrics\index{Tor!Metrics}. Tor\index{Tor} pluggable transports\index{pluggable transports} geolocate\index{geolocation} the client's IP address in order to aggregate statistics by country. -But when a meek bridge\index{Tor bridge} receives a connection, +But when a meek bridge\index{Tor!bridge} receives a connection, the ``client IP address'' it sees is not that of the true client, but rather that of some cloud server, the intermediary through which the client's domain-fronted traffic passes. So the total user counts were fine, but the per-country counts were meaningless. -For example, because App Engine's\index{Google App Engine} servers were located in the U.S.\index{United States of America}, +For example, because App Engine's\index{Google!App Engine} servers were located in the U.S.\index{United States of America}, every meek-google connection was being counted as if it belonged to a client in the U.S.\index{United States of America}\ By the end of 2015, meek users were a large enough fraction (about~20\%) @@ -4941,9 +4929,9 @@ which fixed the per-country counts from then on. \subsection*{2016: Taking off the reins; misuse; blocking efforts} -In mid-January 2016 the Tor Project\index{Tor Project, The} asked me to raise +In mid-January 2016 the Tor Project\index{Tor!Project} asked me to raise the rate limits\index{rate limiting} on the meek bridges, in anticipation -of rumored attempts to block\index{blocking} Tor\index{Tor} in Egypt\index{Egypt}. +of rumored attempts to block Tor\index{Tor} in Egypt\index{Egypt}. I~asked the bridge operators raise the limits from approximately 1~MB/s to 3~MB/s. The effect of the relaxed rate limits\index{rate limiting} was immediate: @@ -4955,7 +4943,7 @@ The first action that may have been a deliberate attempt to block domain fronting came on January~29, 2016, when the Great Firewall of China\index{Great Firewall of China} blocked one of the edge servers\index{edge server} -of the Azure\index{Microsoft Azure} CDN\index{content delivery network}. +of the Azure\index{Microsoft!Azure} CDN\index{CDN}. The blocking was by IP address\index{blocking!by address}, a severe method: not only the domain name we were using for fronting, but thousands of other names @@ -4969,10 +4957,10 @@ I~am aware of no other incidents of edge server blocking. \phantomsection \label{para:meek-suspension} The next surprise was on May~13, 2016. -meek's App Engine\index{Google App Engine} backend\index{meek-google} +meek's App Engine\index{Google!App Engine} backend\index{meek!meek-google} stopped working and I~got a notice: \begin{quote} -We've recently detected some activity on your Google Cloud Platform\index{Google Cloud Platform}/API Project ID meek-reflect that appears to violate our Terms of Service\index{terms of service}. Please take a moment to review the Google Cloud Platform Terms of Service or the applicable Terms of Service for the specific Google API you are using. +We've recently detected some activity on your Google Cloud Platform/API Project ID meek-reflect that appears to violate our Terms of Service\index{terms of service}. Please take a moment to review the Google Cloud Platform Terms of Service or the applicable Terms of Service for the specific Google API you are using. Your project is being suspended for committing a general terms of service violation. @@ -4988,22 +4976,22 @@ through an unofficial channel, about what happened. Some botnet\index{botnet} had apparently been misusing meek for command and control\index{command and control} purposes. -Its operators had not even bothered to set up their own App Engine\index{Google App Engine} project; +Its operators had not even bothered to set up their own App Engine\index{Google!App Engine} project; they were free-riding on the service we had been operating for the public. -Although we may have been able to reinstate the meek-google\index{meek-google} service, +Although we may have been able to reinstate the meek-google\index{meek!meek-google} service, seeing as the suspension was the result of someone else's actions, not ours, with the existing uncertainty around the terms of service\index{terms of service} I~didn't have the heart to pursue it. -meek-google\index{meek-google} remained off, and users migrated to -meek-amazon\index{meek-amazon} or meek-azure\index{meek-azure}. +meek-google\index{meek!meek-google} remained off, and users migrated to +meek-amazon\index{meek!meek-amazon} or meek-azure\index{meek!meek-azure}. It turned out, later, that it had been no common botnet\index{botnet} -misusing meek-google\index{meek-google}, but an organized political hacker group, +misusing meek-google\index{meek!meek-google}, but an organized political hacker group, known as Cozy Bear\index{Cozy Bear} or APT29. Matthew Dunwoody presented observations to that effect in a FireEye\index{FireEye} blog post~\indexauthors{\cite{fireeye-apt29_domain_frontin}} in March 2017. -The malware would install a backdoor that operated over a Tor +The malware would install a backdoor that operated over a Tor\index{Tor!onion service} onion service\index{onion service}, and used meek for camouflage. He and Nick Carr had earlier presented those findings at DerbyCon\index{DerbyCon} in September 2016~\indexauthors{\cite{DunwoodyCarrDerbyCon2016}}, @@ -5013,7 +5001,7 @@ The year 2016 brought the first reports of efforts to block meek. These efforts all had in common that they used TLS fingerprinting\index{TLS!fingerprinting}\index{blocking!by content} in conjunction with SNI\index{SNI} inspection\index{blocking!by address}. In May, a Tor user reported that Cyberoam\index{Cyberoam}, -a firewall company, had released an update that enabled detection\index{detection} and blocking\index{blocking} +a firewall company, had released an update that enabled detection and blocking of meek, among other Tor pluggable transports\index{pluggable transports}~\cite{tor-dev-cyberoam}. Through experiments we determined that the firewall was detecting meek whenever it saw a combination of two features: @@ -5029,12 +5017,12 @@ by the firewall to limit collateral damage\index{collateral damage}: it did not block those domains for all clients, but only for the subset having a particular TLS fingerprint\index{TLS!fingerprinting}. I~admit that I~had not considered the possibility -of using TLS\index{TLS} and SNI\index{SNI} together to make a more precise classifier. +of using TLS\index{TLS} and SNI\index{SNI} together to make a more precise classifier\index{classification}. We had known since the beginning of the possibility of TLS fingerprinting, which is why we took the trouble to implement browser\index{web browser}-based TLS camouflage. The camouflage was performing as intended: even an ordinary Firefox~38\index{Firefox web browser} -(the basis of Tor Browser\index{Tor Browser}, and what meek camouflaged itself as) +(the basis of Tor Browser\index{Tor!Browser}, and what meek camouflaged itself as) would be blocked by the firewall when accessing one of the three listed domains\index{front domain}. However, Firefox~38 was by that time a year old. I~found a source~\cite{traffic-obf-cyberoam} saying that @@ -5048,10 +5036,10 @@ In July I~received a report of similar behavior by a FortiGuard firewall\index{FortiGuard}~\cite{traffic-obf-fortiguard} from Tor user Kanwaljeet Singh Channey\index{Channey, Kanwaljeet Singh}. The situation was virtually the same as in the Cyberoam\index{Cyberoam} case: -the firewall would block connections having a specific TLS fingerprint\index{TLS!fingerprint}\index{blocking!by content} +the firewall would block connections having a specific TLS fingerprint\index{TLS!fingerprinting}\index{blocking!by content} and a specific SNI\index{SNI}\index{blocking!by address}. This time, the TLS fingerprint was that of Firefox~45\index{Firefox web browser} -(which by then Tor Browser\index{Tor Browser} had upgraded to); +(which by then Tor Browser\index{Tor!Browser} had upgraded to); and the specific SNIs were two, not three, omitting \nolinkurl{www.google.com}\index{www.google.com@\nolinkurl{www.google.com}}. As in the previous case, @@ -5060,7 +5048,7 @@ was sufficient to get through the firewall. For reasons not directly related to domain fronting or meek, I~had been interested in the blocking situation in Kazakhstan\index{Kazakhstan}, -ever since Tor Metrics\index{Tor Metrics} reported a sudden drop in the number of users +ever since Tor Metrics\index{Tor!Metrics} reported a sudden drop in the number of users in that country in June 2016~\cite{kazakhstan-wiki}. (See \autoref{sec:proxy-probe-kazakhstan} for other results from Kazakhstan.) I~worked with an anonymous collaborator\index{Anonymous}, who reported @@ -5082,29 +5070,31 @@ users reported from Brazil\index{Brazil} in particular. The explanation may be some kind of anomaly; for instance some third-party software that happened to use meek, or a malware infection like the one that caused the shutdown -of meek-google\index{meek-google}. +of meek-google\index{meek!meek-google}. The count of users from Brazil\index{Brazil} dropped suddenly, from 1,500 almost to zero, on March~3, 2017, which happened also to be the day -that I~shut down meek-azure\index{meek-azure} +that I~shut down meek-azure\index{meek!meek-azure} pending a migration to new infrastructure. The Brazil\index{Brazil} count would remain low until rising again in June 2017. +\index{GAEuploader|(} In September 2016, I~began mentoring Katherine Li\index{Li, Katherine} -in writing GAEuploader\index{GAEuploader}~\cite{LiGAEuploader}, +in writing GAEuploader~\cite{LiGAEuploader}, a program to simplify and automate the process of setting up domain fronting. The program automatically uploads the necessary code -to Google App Engine\index{Google App Engine}, +to Google App Engine\index{Google!App Engine}, then outputs a bridge specification -ready to be pasted into Tor Browser\index{Tor Browser} or Orbot\index{Orbot}. +ready to be pasted into Tor Browser\index{Tor!Browser} or Orbot\index{Orbot}. We hoped also that the code would be useful to other projects, like XX-Net\index{XX-Net}~\cite{xx-net}, that require users to perform the complicated task of -uploading code to App Engine\index{Google App Engine}. +uploading code to App Engine\index{Google!App Engine}. GAEuploader\index{GAEuploader} had beta releases in January~\cite{tor-dev-gaeuploader} and November~\cite{tor-dev-gaeuploader-windows} 2017; however the effect on the number of users has so far not been substantial. +\index{GAEuploader|)} Between October~19 and November~10, 2016, the number of meek users decreased globally by about a third~\cite{tor-trac-20495}. @@ -5120,34 +5110,34 @@ Once a fixed release was available, user numbers recovered. As an side effect of this event, we learned that a majority of meek users -were using Orbot\index{Orbot} rather than Tor Browser\index{Tor Browser}. +were using Orbot\index{Orbot} rather than Tor Browser\index{Tor!Browser}. \subsection*{2017: Long-term support} -In January 2017, a grant I~had been using to pay meek-azure's\index{meek-azure} +In January 2017, a grant I~had been using to pay meek-azure's\index{meek!meek-azure} bandwidth bills ran out. Lacking the means to keep it running, I~announced my intention to shut it down~\cite{tor-dev-meek-azure-run-out}. Shortly thereafter, Team Cymru\index{Team Cymru} offered to set up -their own instances and pay the CDN\index{content delivery network} fees, -and so we made plans to migrate meek-azure\index{meek-azure} +their own instances and pay the CDN\index{CDN} fees, +and so we made plans to migrate meek-azure\index{meek!meek-azure} to the new setup in the next releases. For cost reasons, though, I~still had to shut down\index{meek!costs of} the old configuration before the new releases -of Tor Browser\index{Tor Browser} and Orbot were fully ready. +of Tor Browser\index{Tor!Browser} and Orbot were fully ready. I~shut down my configuration on March~3. -The next release of Tor Browser\index{Tor Browser} was on March~7, +The next release of Tor Browser\index{Tor!Browser} was on March~7, and the next release of Orbot\index{Orbot} was on March~22: so there was a period of days or weeks during which -meek-azure\index{meek-azure} was non-functional. +meek-azure\index{meek!meek-azure} was non-functional. It would have been better to allow the two configurations to run concurrently for a time, so that users of the old would be able to transparently upgrade to the new---but for cost reasons it was not possible. Perhaps not coincidentally, the surge of users from Brazil\index{Brazil}, which had started in July 2016, -ceased on March~3, the same day I~shut down meek-azure\index{meek-azure} before its migration. +ceased on March~3, the same day I~shut down meek-azure\index{meek!meek-azure} before its migration. Handing over control of the infrastructure was a relief to me. I~had managed to make sure the monthly bills got paid\index{meek!costs of}, but it took more care and attention than I~liked. @@ -5192,6 +5182,8 @@ roughly doubled during that time. \index{meek!history of|)} +\index{meek|)} + \index{domain fronting|)textbf} @@ -5227,7 +5219,7 @@ for Tor~\cite{tor-blog-combined-flash-proxy-pyobfsproxy-browser-bundles}, but since its introduction in 2013 it never had many users~\cite{tor-metrics-userstats-bridge-transport-websocket}. I~believe that its lack of adoption was a result mainly -of its incompatibility with NAT (network address translation)\index{network address translation}: +of its incompatibility with NAT (network address translation)\index{NAT}: its use of the TCP-based\index{TCP} WebSocket\index{WebSocket} protocol~\cite{rfc6455} required clients to follow complicated port forwarding instructions\index{usability}~\cite{flashproxyhowto-wiki}. For that reason, flash proxy was deprecated in 2016~\cite{tor-trac-17428}. @@ -5236,11 +5228,11 @@ Snowflake keeps the basic idea of in-browser proxies, but replaces WebSocket\index{WebSocket} with WebRTC~\cite{draft-ietf-rtcweb-overview}\index{WebRTC}, a suite of protocols for peer-to-peer communications. Importantly, WebRTC uses UDP\index{UDP} for communication, -and includes facilities for NAT\index{network address translation} traversal, +and includes facilities for NAT\index{NAT} traversal, allowing most clients to use it without manual configuration. WebRTC mandatorily encrypts\index{encryption} its channels, -which as a side effect obscures any keywords or byte patterns -in the tunneled\index{tunneling+} traffic. +which as a side effect obscures any keywords or byte patterns\index{detection!by content} +in the tunneled\index{tunneling} traffic. (Still leaving open the possibility of detecting\index{detection!by content} the use of WebRTC\index{WebRTC} itself---see \autoref{sec:webrtc-fingerprinting}.) @@ -5251,8 +5243,8 @@ uProxy required clients to know a confederate outside the censor's network who could run a proxy. The client would connect through the proxy using WebRTC\index{WebRTC}; the proxy would then directly fetch -the client's requested URLs\index{URL+}. -Snowflake centralizes the proxy discovery process\index{proxy discovery problem}, +the client's requested URLs\index{URL}. +Snowflake centralizes the proxy discovery process\index{proxy distribution}, removing the requirement to arrange one's own proxy outside the firewall. Snowflake proxies are merely dumb pipes to a more capable proxy, @@ -5267,7 +5259,7 @@ which resemble snowflakes in their impermanence and uniqueness. \pagebreak[4] Snowflake now exists in an experimental alpha release, -incorporated into Tor Browser\index{Tor Browser}. +incorporated into Tor Browser\index{Tor!Browser}. My main collaborators on the Snowflake project are Arlo Breault\index{Breault, Arlo}, Mia Gil~Epner\index{Gil Epner, Mia}, @@ -5283,15 +5275,15 @@ Hooman Mohajeri Moghaddam\index{Moghaddam, Hooman Mohajeri}. \includegraphics{figures/snowflake} \caption{ Schematic of Snowflake. -See \autoref{fig:snowflake-rendezvous} for an elaboration -on Steps~1, 2, and~3. -\todo[inline]{missing bridge} +See \autoref{fig:snowflake-rendezvous} +on page~\pageref{fig:snowflake-rendezvous} +for elaboration on Steps~1, 2, and~3. } \index{broker (Snowflake)} \index{offer (Snowflake)} \index{answer (Snowflake)} \index{WebRTC} -\index{domain fronting!as Snowflake rendezvous} +\index{domain fronting!in Snowflake rendezvous} \label{fig:snowflake} \end{figure} @@ -5307,7 +5299,7 @@ many \emph{clients}, responsible for initially requesting service and then establishing peer-to-peer connections with snowflake proxies \item -a \emph{broker}\index{broker (Snowflake)|textbf}, an online database that serves +a \emph{broker}\index{broker (Snowflake)}, an online database that serves to match clients with snowflake proxies \item a \emph{bridge} @@ -5318,7 +5310,7 @@ The architecture of the system is influenced by the requirement that proxies run in a browser\index{web browser}, and the nature of WebRTC\index{WebRTC} connection establishment, which uses a bidirectional handshake. -In our implementation, the bridge is really a Tor bridge\index{Tor!bridges}. +In our implementation, the bridge is really a Tor bridge\index{Tor!bridge}. Even though a Tor circuit\index{Tor!circuit} consists of multiple hops, that fact is abstracted away from the Tor client's perspective; Snowflake does not inherently depend on Tor\index{Tor}. @@ -5363,8 +5355,8 @@ does not matter for this step). The snowflake proxy then copies data back and forth between client and bridge until it is terminated. The client's communication with the bridge is -encrypted and authenticated end-to-end through -the WebRTC\index{WebRTC} tunnel, +encrypted\index{encryption} and authenticated\index{integrity} end-to-end through +the WebRTC\index{WebRTC} tunnel\index{tunneling}, so the proxy cannot interfere with it. When the snowflake proxy terminates, the client may request a new one. @@ -5379,7 +5371,7 @@ The rendezvous phase bears further explanation. Steps~1, 2, and~3 actually happen synchronously, using interleaved HTTP\index{HTTP} requests and responses: see \autoref{fig:snowflake-rendezvous}. -The client's single request uses domain fronting\index{domain fronting!as rendezvous for Snowflake}\index{rendezvous!of Snowflake}, +The client's single request uses domain fronting\index{domain fronting!in Snowflake rendezvous}\index{rendezvous!of Snowflake}, but the requests of the snowflake proxies are direct. In Step~1, the client sends a request containing its offer\index{offer (Snowflake)}. The broker\index{broker (Snowflake)} holds the connection open but does not immediately respond. @@ -5388,7 +5380,7 @@ In Step~2, a snowflake proxy makes a polling request and the broker responds with the client's offer\index{offer (Snowflake)}. The snowflake composes its answer\index{answer (Snowflake)} and sends it back to the broker in a second HTTP\index{HTTP} request -(linked to the first by a random\index{randomness} token). +(linked to the first by a random token). In Step~3, the broker\index{broker (Snowflake)} finally responds to the client's initial request by passing on the snowflake proxy's answer. From the client's point of view, it has sent @@ -5416,7 +5408,7 @@ to send back its answer. \index{POST (HTTP method)} \index{HTTP} \index{200 (HTTP status code)} -\index{domain fronting!as Snowflake rendezvous} +\index{domain fronting!in Snowflake rendezvous} \index{rendezvous!of Snowflake} \index{broker (Snowflake)} \index{offer (Snowflake)} @@ -5425,13 +5417,13 @@ to send back its answer. \end{figure} One may ask, if the domain-fronted\index{domain fronting} -rendezvous\index{rendezvous} channel +rendezvous channel is bidirectional and already assumed to be difficult to block, doesn't it suffice for circumvention on its own? The answer is that it does suffice---that's the idea behind meek (\autoref{sec:meek-history}). The disadvantage of building a system -exclusively on domain fronting, though, is high monetary cost\index{domain fronting!cost of} +exclusively on domain fronting, though, is high monetary cost\index{domain fronting!costs of} (see \autoref{tab:meek-costs} on page~\pageref{tab:meek-costs}). Snowflake offloads the bulk of data transfer onto WebRTC\index{WebRTC}, and uses expensive domain fronting only for rendezvous. @@ -5460,7 +5452,7 @@ proposed by Feamster et~al.\ in~2003. WebRTC\index{WebRTC} offers two features that are necessary for Snowflake: \begin{enumerate*} \item it is supported in web browsers\index{web browser}, and -\item it deals with NAT\index{network address translation}. +\item it deals with NAT\index{NAT}. \end{enumerate*} In other respects, though, WebRTC\index{WebRTC} is a nuisance. Its close coupling with browser code makes it difficult @@ -5538,7 +5530,7 @@ ICE\index{ICE} (Interactive Connectivity Establishment)~\cite{rfc5245} is a combination of two protocols. STUN\index{STUN} (Session Traversal Utilities for NAT)~\cite{rfc5389} helps hosts open and maintain a binding -in a NAT\index{network address translation} table. +in a NAT\index{NAT} table. TURN\index{TURN} (Traversal Using Relays around NAT)~\cite{rfc5766} is a way of proxying through a third party when the end hosts' NAT configurations are such @@ -5560,9 +5552,9 @@ Media channels use SRTP\index{SRTP} (Secure Real-time Transport Protocol)~\cite{rfc3711} and data channels use DTLS\index{DTLS} (Datagram TLS)~\cite{rfc6347}. -Even though the contents of both are encrypted, +Even though the contents of both are encrypted\index{encryption}, an observer can easily distinguish\index{distinguishability} -a media channel from a data channel\index{WebRTC!media channel versus data channel}. +a media channel from a data channel\index{WebRTC!media versus data channels}. Applications that use media channels have options for doing key exchange: some borrow the DTLS handshake in a process called @@ -5575,8 +5567,8 @@ DTLS, as with TLS\index{TLS}, offers a wealth of fingerprintable features. Some of the most salient are the protocol version, extensions, -the client's offered ciphersuites, -and values in the server's certificate. +the client's offered ciphersuites\index{TLS!ciphersuite}, +and values in the server's certificate\index{certificate}. \end{description} Snowflake uses a WebRTC\index{WebRTC} library extracted @@ -5592,8 +5584,8 @@ in order to get an idea of the implementation choices being made in practice. We tested three applications that use media channels\index{WebRTC!media versus data channels}, all chat services: -Google Hangouts (\url{https://hangouts.google.com})\index{Google Hangouts}, -Facebook Messenger (\url{https://www.messenger.com})\index{Facebook Messenger}, +Google Hangouts (\url{https://hangouts.google.com})\index{Google!Hangouts}, +Facebook Messenger (\url{https://www.messenger.com})\index{Facebook!Messenger}, and OpenTokRTC (\url{https://opentokrtc.com/})\index{OpenTokRTC}. We also tested two applications that use data channels: Snowflake itself and @@ -5607,7 +5599,7 @@ so do Hangouts and Sharefest. All applications other than Hangouts used DTLS\index{DTLS} for key exchange. While the client portions differed, -the server certificate was more promising, +the server certificate\index{certificate} was more promising, in all cases having a Common Name\index{common name (X.509)} of ``WebRTC'' and a validity of 30~days. @@ -5622,7 +5614,7 @@ having three distinct fingerprints. While it is difficult to generalize from one measurement at one site, these results suggest that WebRTC\index{WebRTC} use---at least the forms that use DTLS---is not common. -We guessed that Google Hangouts\index{Google Hangouts} would be the +We guessed that Google Hangouts\index{Google!Hangouts} would be the main source of WebRTC connections; however our script would not have found Hangouts connections because Hangouts does not use DTLS. @@ -5657,22 +5649,25 @@ because Hangouts does not use DTLS. \backmatter -\defbibnote{bibnote}{ -I~strive to provide a URL\index{URL} with references whenever possible. -I~archived a copy of each URL at the -Internet Archive\index{Internet Archive} -on or about December 14, 2017. +\defbibnote{bibnote}{% +I~strive to provide a URL\index{URL} for each reference whenever possible. +On December 15, 2017, +I~archived each URL +at the Internet Archive\index{Internet Archive}; +or, when that didn't work, +at \nolinkurl{archive.is}\index{archive.is@\nolinkurl{archive.is}}. If a link is broken, look for an archived version at -\url{https://web.archive.org/}. -\todo[inline]{Archive the URLs.}} +\url{https://web.archive.org/} or +\url{https://archive.is/}. +Many of the references are also cached in CensorBib\index{CensorBib}, +\url{https://censorbib.nymity.ch/}. +} \printbibliography[heading=bibintoc,prenote=bibnote] \clearpage \phantomsection \addcontentsline{toc}{chapter}{\indexname} -\setindexprenote{\dragons} - \printindex \end{CJK}