youtubebeat/vendor/github.com/saintfish/chardet/testdata/utf8.html

2603 lines
206 KiB
HTML
Raw Normal View History

2018-11-18 15:32:28 +01:00
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html lang="en" dir="ltr" class="client-nojs" xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>UTF-8 - Wikipedia, the free encyclopedia</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta http-equiv="Content-Style-Type" content="text/css" />
<meta name="generator" content="MediaWiki 1.20wmf9" />
<link rel="alternate" type="application/x-wiki" title="Edit this page" href="/w/index.php?title=UTF-8&amp;action=edit" />
<link rel="edit" title="Edit this page" href="/w/index.php?title=UTF-8&amp;action=edit" />
<link rel="apple-touch-icon" href="//en.wikipedia.org/apple-touch-icon.png" />
<link rel="shortcut icon" href="/favicon.ico" />
<link rel="search" type="application/opensearchdescription+xml" href="/w/opensearch_desc.php" title="Wikipedia (en)" />
<link rel="EditURI" type="application/rsd+xml" href="//en.wikipedia.org/w/api.php?action=rsd" />
<link rel="copyright" href="//creativecommons.org/licenses/by-sa/3.0/" />
<link rel="alternate" type="application/atom+xml" title="Wikipedia Atom feed" href="/w/index.php?title=Special:RecentChanges&amp;feed=atom" />
<link rel="stylesheet" href="//bits.wikimedia.org/en.wikipedia.org/load.php?debug=false&amp;lang=en&amp;modules=ext.gadget.ReferenceTooltips%2Cteahouse%7Cext.wikihiero%7Cmediawiki.legacy.commonPrint%2Cshared%7Cskins.vector&amp;only=styles&amp;skin=vector&amp;*" type="text/css" media="all" />
<meta name="ResourceLoaderDynamicStyles" content="" />
<link rel="stylesheet" href="//bits.wikimedia.org/en.wikipedia.org/load.php?debug=false&amp;lang=en&amp;modules=site&amp;only=styles&amp;skin=vector&amp;*" type="text/css" media="all" />
<style type="text/css" media="all">a:lang(ar),a:lang(ckb),a:lang(fa),a:lang(kk-arab),a:lang(mzn),a:lang(ps),a:lang(ur){text-decoration:none}
/* cache key: enwiki:resourceloader:filter:minify-css:7:4edf467d58fee44c83dfe724ca07c179 */</style>
<script src="//bits.wikimedia.org/en.wikipedia.org/load.php?debug=false&amp;lang=en&amp;modules=startup&amp;only=scripts&amp;skin=vector&amp;*" type="text/javascript"></script>
<script type="text/javascript">if(window.mw){
mw.config.set({"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"UTF-8","wgTitle":"UTF-8","wgCurRevisionId":507478399,"wgArticleId":32188,"wgIsArticle":true,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["All articles with unsourced statements","Articles with unsourced statements from December 2011","All articles with specifically marked weasel-worded phrases","Articles with specifically marked weasel-worded phrases from September 2011","Articles with unsourced statements from February 2012","Articles needing additional references from October 2009","All articles needing additional references","Articles with unsourced statements from December 2009","Character sets","Encodings","Character encoding","Unicode Transformation Formats"],"wgBreakFrames":false,"wgPageContentLanguage":"en","wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgMonthNamesShort":["","Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"],"wgRelevantPageName":"UTF-8","wgRestrictionEdit":[],"wgRestrictionMove":[],"wgSearchNamespaces":[0],"wgVectorEnabledModules":{"collapsiblenav":true,"collapsibletabs":true,"editwarning":true,"expandablesearch":false,"footercleanup":false,"sectioneditlinks":false,"simplesearch":true,"experiments":true},"wgWikiEditorEnabledModules":{"toolbar":true,"dialogs":true,"hidesig":true,"templateEditor":false,"templates":false,"preview":false,"previewDialog":false,"publish":false,"toc":false},"wgTrackingToken":"05adb93d1ad4862b032b79a4b4b060fd","wgArticleFeedbackv5Permissions":{"oversighter":false,"moderator":false,"editor":false},"wikilove-recipient":"","wikilove-anon":0,"mbEmailEnabled":true,"mbUserEmail":false,"mbIsEmailConfirmationPending":false,"wgFlaggedRevsParams":{"tags":{"status":{"levels":1,"quality":2,"pristine":3}}},"wgStableRevisionId":null,"wgCategoryTreePageCategoryOptions":"{\"mode\":0,\"hideprefix\":20,\"showcount\":true,\"namespaces\":false}","Geo":{"city":"","country":""},"wgNoticeProject":"wikipedia","aftv5Whitelist":false});
}</script><script type="text/javascript">if(window.mw){
mw.loader.implement("user.options",function(){mw.user.options.set({"ccmeonemails":0,"cols":80,"date":"default","diffonly":0,"disablemail":0,"disablesuggest":0,"editfont":"default","editondblclick":0,"editsection":1,"editsectiononrightclick":0,"enotifminoredits":0,"enotifrevealaddr":0,"enotifusertalkpages":1,"enotifwatchlistpages":0,"extendwatchlist":0,"externaldiff":0,"externaleditor":0,"fancysig":0,"forceeditsummary":0,"gender":"unknown","hideminor":0,"hidepatrolled":0,"imagesize":2,"justify":0,"math":0,"minordefault":0,"newpageshidepatrolled":0,"nocache":0,"noconvertlink":0,"norollbackdiff":0,"numberheadings":0,"previewonfirst":0,"previewontop":1,"quickbar":5,"rcdays":7,"rclimit":50,"rememberpassword":0,"rows":25,"searchlimit":20,"showhiddencats":false,"showjumplinks":1,"shownumberswatching":1,"showtoc":1,"showtoolbar":1,"skin":"vector","stubthreshold":0,"thumbsize":4,"underline":2,"uselivepreview":0,"usenewrc":0,"watchcreations":1,"watchdefault":0,"watchdeletion":0,"watchlistdays":3
,"watchlisthideanons":0,"watchlisthidebots":0,"watchlisthideliu":0,"watchlisthideminor":0,"watchlisthideown":0,"watchlisthidepatrolled":0,"watchmoves":0,"wllimit":250,"flaggedrevssimpleui":1,"flaggedrevsstable":0,"flaggedrevseditdiffs":true,"flaggedrevsviewdiffs":false,"vector-simplesearch":1,"useeditwarning":1,"vector-collapsiblenav":1,"usebetatoolbar":1,"usebetatoolbar-cgd":1,"wikilove-enabled":1,"variant":"en","language":"en","searchNs0":true,"searchNs1":false,"searchNs2":false,"searchNs3":false,"searchNs4":false,"searchNs5":false,"searchNs6":false,"searchNs7":false,"searchNs8":false,"searchNs9":false,"searchNs10":false,"searchNs11":false,"searchNs12":false,"searchNs13":false,"searchNs14":false,"searchNs15":false,"searchNs100":false,"searchNs101":false,"searchNs108":false,"searchNs109":false,"gadget-teahouse":1,"gadget-ReferenceTooltips":1,"gadget-DRN-wizard":1,"gadget-mySandbox":1});;},{},{});mw.loader.implement("user.tokens",function(){mw.user.tokens.set({"editToken":"+\\",
"watchToken":false});;},{},{});
/* cache key: enwiki:resourceloader:filter:minify-js:7:81f7c0502e347822f14be81f96ff03ab */
}</script>
<script type="text/javascript">if(window.mw){
mw.loader.load(["mediawiki.page.startup","mediawiki.legacy.wikibits","mediawiki.legacy.ajax","ext.wikimediaShopLink.core"]);
}</script>
<!--[if lt IE 7]><style type="text/css">body{behavior:url("/w/skins-1.20wmf9/vector/csshover.min.htc")}</style><![endif]--></head>
<body class="mediawiki ltr sitedir-ltr ns-0 ns-subject page-UTF-8 skin-vector action-view vector-animateLayout">
<div id="mw-page-base" class="noprint"></div>
<div id="mw-head-base" class="noprint"></div>
<!-- content -->
<div id="content" class="mw-body">
<a id="top"></a>
<div id="mw-js-message" style="display:none;"></div>
<!-- sitenotice -->
<div id="siteNotice"><!-- centralNotice loads here --></div>
<!-- /sitenotice -->
<!-- firstHeading -->
<h1 id="firstHeading" class="firstHeading"><span dir="auto">UTF-8</span></h1>
<!-- /firstHeading -->
<!-- bodyContent -->
<div id="bodyContent">
<!-- tagline -->
<div id="siteSub">From Wikipedia, the free encyclopedia</div>
<!-- /tagline -->
<!-- subtitle -->
<div id="contentSub"></div>
<!-- /subtitle -->
<!-- jumpto -->
<div id="jump-to-nav" class="mw-jump">
Jump to: <a href="#mw-head">navigation</a>, <a href="#p-search">search</a>
</div>
<!-- /jumpto -->
<!-- bodycontent -->
<div id="mw-content-text" lang="en" dir="ltr" class="mw-content-ltr"><div style="display:none;" class="pef-notification-container">
<div class="pef-notification">
<div class="pef-notification-checkmark">&nbsp;</div>
<span></span>
</div>
</div><p><b>UTF-8</b> (<b><a href="/wiki/Universal_Character_Set" title="Universal Character Set">UCS</a> Transformation Format—8-bit</b><sup id="cite_ref-0" class="reference"><a href="#cite_note-0"><span>[</span>1<span>]</span></a></sup>) is a <a href="/wiki/Variable-width_encoding" title="Variable-width encoding">variable-width encoding</a> that can represent every <a href="/wiki/Character_(computing)" title="Character (computing)">character</a> in the <a href="/wiki/Unicode" title="Unicode">Unicode</a> character set. It was designed for <a href="/wiki/Backward_compatibility" title="Backward compatibility">backward compatibility</a> with <a href="/wiki/ASCII" title="ASCII">ASCII</a> and to avoid the complications of <a href="/wiki/Endianness" title="Endianness">endianness</a> and <a href="/wiki/Byte_order_mark" title="Byte order mark">byte order marks</a> in <a href="/wiki/UTF-16" title="UTF-16">UTF-16</a> and <a href="/wiki/UTF-32" title="UTF-32">UTF-32</a>.</p>
<p>UTF-8 has become the dominant character encoding for the <a href="/wiki/World-Wide_Web" title="World-Wide Web" class="mw-redirect">World-Wide Web</a>, accounting for more than half of all Web pages.<sup id="cite_ref-1" class="reference"><a href="#cite_note-1"><span>[</span>2<span>]</span></a></sup><sup id="cite_ref-BuiltWith_2-0" class="reference"><a href="#cite_note-BuiltWith-2"><span>[</span>3<span>]</span></a></sup><sup id="cite_ref-W3Techs_3-0" class="reference"><a href="#cite_note-W3Techs-3"><span>[</span>4<span>]</span></a></sup> The <a href="/wiki/Internet_Engineering_Task_Force" title="Internet Engineering Task Force">Internet Engineering Task Force</a> (IETF) requires all <a href="/wiki/Internet" title="Internet">Internet</a> <a href="/wiki/Protocol_(computing)" title="Protocol (computing)" class="mw-redirect">protocols</a> to identify the <a href="/wiki/Character_encoding" title="Character encoding">encoding</a> used for character data, and the supported character encodings must include UTF-8.<sup id="cite_ref-rfc2277_4-0" class="reference"><a href="#cite_note-rfc2277-4"><span>[</span>5<span>]</span></a></sup> The <a href="/wiki/Internet_Mail_Consortium" title="Internet Mail Consortium">Internet Mail Consortium</a> (IMC) recommends that all e-mail programs be able to display and create mail using UTF-8.<sup id="cite_ref-IMC_5-0" class="reference"><a href="#cite_note-IMC-5"><span>[</span>6<span>]</span></a></sup> UTF-8 is also increasingly being used as the default character encoding in <a href="/wiki/Operating_systems" title="Operating systems" class="mw-redirect">operating systems</a>, <a href="/wiki/Programming_languages" title="Programming languages" class="mw-redirect">programming languages</a>, <a href="/wiki/Application_programming_interface" title="Application programming interface">APIs</a>, and <a href="/wiki/Application_software" title="Application software">software applications</a>.<sup class="Template-Fact" style="white-space:nowrap;">[<i><a href="/wiki/Wikipedia:Citation_needed" title="Wikipedia:Citation needed"><span title="This claim needs references to reliable sources from December 2011">citation needed</span></a></i>]</sup></p>
<p>UTF-8 encodes each of the 1,112,064<sup id="cite_ref-6" class="reference"><a href="#cite_note-6"><span>[</span>7<span>]</span></a></sup> <a href="/wiki/Code_point" title="Code point">code points</a> in the Unicode character set using one to four 8-bit <a href="/wiki/Byte" title="Byte">bytes</a> (termed "<a href="/wiki/Octet_(computing)" title="Octet (computing)">octets</a>" in the Unicode Standard). Code points with lower numerical values (i.e. earlier code positions in the Unicode character set, which tend to occur more frequently in practice) are encoded using fewer bytes.<sup id="cite_ref-7" class="reference"><a href="#cite_note-7"><span>[</span>8<span>]</span></a></sup> The first 128 characters of Unicode, which correspond one-to-one with <a href="/wiki/ASCII" title="ASCII">ASCII</a>, are encoded using a single octet with the same binary value as ASCII, making valid ASCII text valid UTF-8-encoded Unicode as well.</p>
<p>The official <a href="/wiki/Internet_Assigned_Numbers_Authority" title="Internet Assigned Numbers Authority">IANA</a> code for the UTF-8 character encoding is <code>UTF-8</code>.<sup id="cite_ref-8" class="reference"><a href="#cite_note-8"><span>[</span>9<span>]</span></a></sup></p>
<table id="toc" class="toc">
<tr>
<td>
<div id="toctitle">
<h2>Contents</h2>
</div>
<ul>
<li class="toclevel-1 tocsection-1"><a href="#History"><span class="tocnumber">1</span> <span class="toctext">History</span></a></li>
<li class="toclevel-1 tocsection-2"><a href="#Description"><span class="tocnumber">2</span> <span class="toctext">Description</span></a>
<ul>
<li class="toclevel-2 tocsection-3"><a href="#Examples"><span class="tocnumber">2.1</span> <span class="toctext">Examples</span></a></li>
<li class="toclevel-2 tocsection-4"><a href="#Overlong_encodings"><span class="tocnumber">2.2</span> <span class="toctext">Overlong encodings</span></a></li>
<li class="toclevel-2 tocsection-5"><a href="#Codepage_layout"><span class="tocnumber">2.3</span> <span class="toctext">Codepage layout</span></a></li>
<li class="toclevel-2 tocsection-6"><a href="#Invalid_byte_sequences"><span class="tocnumber">2.4</span> <span class="toctext">Invalid byte sequences</span></a></li>
<li class="toclevel-2 tocsection-7"><a href="#Invalid_code_points"><span class="tocnumber">2.5</span> <span class="toctext">Invalid code points</span></a></li>
</ul>
</li>
<li class="toclevel-1 tocsection-8"><a href="#Official_name_and_variants"><span class="tocnumber">3</span> <span class="toctext">Official name and variants</span></a></li>
<li class="toclevel-1 tocsection-9"><a href="#Derivatives"><span class="tocnumber">4</span> <span class="toctext">Derivatives</span></a>
<ul>
<li class="toclevel-2 tocsection-10"><a href="#CESU-8"><span class="tocnumber">4.1</span> <span class="toctext">CESU-8</span></a></li>
<li class="toclevel-2 tocsection-11"><a href="#Modified_UTF-8"><span class="tocnumber">4.2</span> <span class="toctext">Modified UTF-8</span></a></li>
<li class="toclevel-2 tocsection-12"><a href="#Extending_from_31_bit_to_36_bit_range"><span class="tocnumber">4.3</span> <span class="toctext">Extending from 31 bit to 36 bit range</span></a></li>
</ul>
</li>
<li class="toclevel-1 tocsection-13"><a href="#Byte_order_mark"><span class="tocnumber">5</span> <span class="toctext">Byte order mark</span></a></li>
<li class="toclevel-1 tocsection-14"><a href="#Advantages_and_disadvantages"><span class="tocnumber">6</span> <span class="toctext">Advantages and disadvantages</span></a>
<ul>
<li class="toclevel-2 tocsection-15"><a href="#General"><span class="tocnumber">6.1</span> <span class="toctext">General</span></a>
<ul>
<li class="toclevel-3 tocsection-16"><a href="#Advantages"><span class="tocnumber">6.1.1</span> <span class="toctext">Advantages</span></a></li>
<li class="toclevel-3 tocsection-17"><a href="#Disadvantages"><span class="tocnumber">6.1.2</span> <span class="toctext">Disadvantages</span></a></li>
</ul>
</li>
<li class="toclevel-2 tocsection-18"><a href="#Compared_to_single-byte_encodings"><span class="tocnumber">6.2</span> <span class="toctext">Compared to single-byte encodings</span></a>
<ul>
<li class="toclevel-3 tocsection-19"><a href="#Advantages_2"><span class="tocnumber">6.2.1</span> <span class="toctext">Advantages</span></a></li>
<li class="toclevel-3 tocsection-20"><a href="#Disadvantages_2"><span class="tocnumber">6.2.2</span> <span class="toctext">Disadvantages</span></a></li>
</ul>
</li>
<li class="toclevel-2 tocsection-21"><a href="#Compared_to_other_multi-byte_encodings"><span class="tocnumber">6.3</span> <span class="toctext">Compared to other multi-byte encodings</span></a>
<ul>
<li class="toclevel-3 tocsection-22"><a href="#Advantages_3"><span class="tocnumber">6.3.1</span> <span class="toctext">Advantages</span></a></li>
<li class="toclevel-3 tocsection-23"><a href="#Disadvantages_3"><span class="tocnumber">6.3.2</span> <span class="toctext">Disadvantages</span></a></li>
</ul>
</li>
<li class="toclevel-2 tocsection-24"><a href="#Compared_to_UTF-16"><span class="tocnumber">6.4</span> <span class="toctext">Compared to UTF-16</span></a>
<ul>
<li class="toclevel-3 tocsection-25"><a href="#Advantages_4"><span class="tocnumber">6.4.1</span> <span class="toctext">Advantages</span></a></li>
<li class="toclevel-3 tocsection-26"><a href="#Disadvantages_4"><span class="tocnumber">6.4.2</span> <span class="toctext">Disadvantages</span></a></li>
</ul>
</li>
</ul>
</li>
<li class="toclevel-1 tocsection-27"><a href="#See_also"><span class="tocnumber">7</span> <span class="toctext">See also</span></a></li>
<li class="toclevel-1 tocsection-28"><a href="#References"><span class="tocnumber">8</span> <span class="toctext">References</span></a></li>
<li class="toclevel-1 tocsection-29"><a href="#External_links"><span class="tocnumber">9</span> <span class="toctext">External links</span></a></li>
</ul>
</td>
</tr>
</table>
<h2><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=1" title="Edit section: History">edit</a>]</span> <span class="mw-headline" id="History">History</span></h2>
<p>By early 1992 the search was on for a good byte-stream encoding of multi-byte character sets. The draft <a href="/wiki/Universal_Character_Set" title="Universal Character Set">ISO 10646</a> standard contained a non-required <a href="/wiki/Addendum" title="Addendum">annex</a> called <a href="/wiki/UTF-1" title="UTF-1">UTF-1</a> that provided a byte-stream encoding of its 32-bit code points. This encoding was not satisfactory on performance grounds, but did introduce the notion that bytes in the range of 0127 continue representing the ASCII characters in UTF, thereby providing backward compatibility with ASCII.</p>
<p>In July 1992, the <a href="/wiki/X/Open" title="X/Open">X/Open</a> committee XoJIG was looking for a better encoding. Dave Prosser of <a href="/wiki/Unix_System_Laboratories" title="Unix System Laboratories">Unix System Laboratories</a> submitted a proposal for one that had faster implementation characteristics and introduced the improvement that 7-bit ASCII characters would <i>only</i> represent themselves; all multibyte sequences would include only bytes where the high bit was set.</p>
<p>In August 1992, this proposal was circulated by an <a href="/wiki/IBM" title="IBM">IBM</a> X/Open representative to interested parties. <a href="/wiki/Ken_Thompson" title="Ken Thompson">Ken Thompson</a> of the <a href="/wiki/Plan_9_from_Bell_Labs" title="Plan 9 from Bell Labs">Plan 9</a> <a href="/wiki/Operating_system" title="Operating system">operating system</a> group at <a href="/wiki/Bell_Labs" title="Bell Labs">Bell Labs</a> then made a crucial modification to the encoding to allow it to be self-synchronizing, meaning that it was not necessary to read from the beginning of the string to find code point boundaries. Thompson's design was outlined on September 2, 1992, on a placemat in a New Jersey diner with <a href="/wiki/Rob_Pike" title="Rob Pike">Rob Pike</a>. The following days, Pike and Thompson implemented it and updated <a href="/wiki/Plan_9_from_Bell_Labs" title="Plan 9 from Bell Labs">Plan 9</a> to use it throughout, and then communicated their success back to X/Open.<sup id="cite_ref-9" class="reference"><a href="#cite_note-9"><span>[</span>10<span>]</span></a></sup></p>
<p>UTF-8 was first officially presented at the <a href="/wiki/USENIX" title="USENIX">USENIX</a> conference in <a href="/wiki/San_Diego" title="San Diego">San Diego</a>, from January 2529, 1993.</p>
<p>In November 2003 UTF-8 was restricted by <a class="external mw-magiclink-rfc" href="//tools.ietf.org/html/rfc3629">RFC 3629</a> to four bytes in order to match the constraints of the <a href="/wiki/UTF-16" title="UTF-16">UTF-16</a> character encoding.</p>
<h2><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=2" title="Edit section: Description">edit</a>]</span> <span class="mw-headline" id="Description">Description</span></h2>
<p>The design of UTF8 is most easily seen in the table of the scheme as originally proposed by Dave Prosser and subsequently modified by Ken Thompson (the <code>x</code>'s are replaced by the bits of the code point):</p>
<table class="wikitable">
<tr>
<th>Bits</th>
<th>Last code point</th>
<th>Byte 1</th>
<th>Byte 2</th>
<th>Byte 3</th>
<th>Byte 4</th>
<th>Byte 5</th>
<th>Byte 6</th>
</tr>
<tr>
<th>&#160;&#160;7</th>
<td>U+007F</td>
<td><code>0xxxxxxx</code></td>
</tr>
<tr>
<th>11</th>
<td>U+07FF</td>
<td><code>110xxxxx</code></td>
<td><code>10xxxxxx</code></td>
</tr>
<tr>
<th>16</th>
<td>U+FFFF</td>
<td><code>1110xxxx</code></td>
<td><code>10xxxxxx</code></td>
<td><code>10xxxxxx</code></td>
</tr>
<tr>
<th>21</th>
<td>U+1FFFFF</td>
<td><code>11110xxx</code></td>
<td><code>10xxxxxx</code></td>
<td><code>10xxxxxx</code></td>
<td><code>10xxxxxx</code></td>
</tr>
<tr>
<th>26</th>
<td>U+3FFFFFF</td>
<td><code>111110xx</code></td>
<td><code>10xxxxxx</code></td>
<td><code>10xxxxxx</code></td>
<td><code>10xxxxxx</code></td>
<td><code>10xxxxxx</code></td>
</tr>
<tr>
<th>31</th>
<td>U+7FFFFFFF</td>
<td><code>1111110x</code></td>
<td><code>10xxxxxx</code></td>
<td><code>10xxxxxx</code></td>
<td><code>10xxxxxx</code></td>
<td><code>10xxxxxx</code></td>
<td><code>10xxxxxx</code></td>
</tr>
</table>
<p>The salient features of the above scheme are as follows:</p>
<ol>
<li>One-byte codes are used only for the ASCII values 0 through 127. In this case the UTF-8 code has the same value as the ASCII code. The high-order bit of these codes is always 0.</li>
<li>Codepoints larger than 127 are represented by multi-byte sequences, composed of a <i>leading byte</i> and one or more <i>continuation bytes</i>. The leading byte has two or more high-order 1s followed by a 0, while continuation bytes all have '10' in the high-order position.</li>
<li>The remaining bits of the encoding are used for the bits of the codepoint being encoded, padded with high-order 0s if necessary. The number of bytes in the encoding is the minimum required to hold all the significant bits of the codepoint.</li>
<li>The number of high-order 1s in the leading byte of a multi-byte sequence indicates the number of bytes in the sequence, so that the length of the sequence can be determined without examining the continuation bytes.</li>
<li>Single bytes, leading bytes, and continuation bytes do not share values. This makes the scheme "self synchronizing", allowing the start of a character to be found by backing up at most five bytes (three bytes in actual UTF8 as explained below).</li>
</ol>
<p>The first 128 characters (US-ASCII) need one byte. The next 1,920 characters need two bytes to encode. This covers the remainder of almost all <a href="/wiki/Latin-derived_alphabet" title="Latin-derived alphabet">Latin-derived alphabets</a>, and also <a href="/wiki/Greek_alphabet" title="Greek alphabet">Greek</a>, <a href="/wiki/Cyrillic_script" title="Cyrillic script">Cyrillic</a>, <a href="/wiki/Coptic_alphabet" title="Coptic alphabet">Coptic</a>, <a href="/wiki/Armenian_alphabet" title="Armenian alphabet">Armenian</a>, <a href="/wiki/Hebrew_alphabet" title="Hebrew alphabet">Hebrew</a>, <a href="/wiki/Arabic_alphabet" title="Arabic alphabet">Arabic</a>, <a href="/wiki/Syriac_alphabet" title="Syriac alphabet">Syriac</a> and <a href="/wiki/T%C4%81na" title="Tāna" class="mw-redirect">Tāna</a> alphabets, as well as <a href="/wiki/Combining_Diacritical_Marks" title="Combining Diacritical Marks" class="mw-redirect">Combining Diacritical Marks</a>. Three bytes are needed for characters in the rest of the <a href="/wiki/Mapping_of_Unicode_character_planes" title="Mapping of Unicode character planes" class="mw-redirect">Basic Multilingual Plane</a> (which contains virtually all characters in common use). Four bytes are needed for characters in the <a href="/wiki/Mapping_of_Unicode_characters" title="Mapping of Unicode characters">other planes of Unicode</a>, which include less common <a href="/wiki/CJK_characters" title="CJK characters">CJK characters</a> and various historic scripts and mathematical symbols.</p>
<p>The original specification covered numbers up to 31 bits (the original limit of the <a href="/wiki/Universal_Character_Set" title="Universal Character Set">Universal Character Set</a>). In November 2003 UTF-8 was restricted by <a class="external mw-magiclink-rfc" href="//tools.ietf.org/html/rfc3629">RFC 3629</a> to end at U+<code>10FFFF</code>, in order to match the constraints of the <a href="/wiki/UTF-16" title="UTF-16">UTF-16</a> character encoding. This removed all 5- and 6-byte sequences, and about half of the 4-byte sequences.</p>
<h3><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=3" title="Edit section: Examples">edit</a>]</span> <span class="mw-headline" id="Examples">Examples</span></h3>
<p>Let us consider how to encode the <a href="/wiki/Euro_sign" title="Euro sign">Euro sign</a>, €.</p>
<ol>
<li>The Unicode code point for "€" is U+20AC.</li>
<li>According to the scheme table above, this will take three bytes to encode, as it is between U+07FF and U+FFFF.</li>
<li>Hexadecimal <b><code>20AC</code></b> is binary <b><code>0010000010101100</code></b>. The two leading zeros are added because, as the scheme table shows, a three-byte encoding needs <i>exactly</i> sixteen bits from the code point.</li>
<li>Because it is a three-byte encoding, the leading byte starts with three 1s, then a 0 (<b><code>1110</code></b>...)</li>
<li>The remaining bits of this byte are taken from the code point (<b><code>1110<span style="color:blue;">0010</span></code></b>), leaving ...<b><code>000010101100</code></b>.</li>
<li>Each of the continuation bytes starts with <b><code>10</code></b> and takes six bits of the code point (so <b><code>10<span style="color:green;">000010</span></code></b>, then <b><code>10<span style="color:red;">101100</span></code></b>).</li>
</ol>
<p>The three bytes <b><code>1110<span style="color:blue;">0010</span></code></b> <b><code>10<span style="color:green;">000010</span></code></b> <b><code>10<span style="color:red;">101100</span></code></b> can be more concisely written in hexadecimal, as <b><code><span style="color:blue;">E2</span> <span style="color:green;">82</span> <span style="color:red;">AC</span></code></b>.</p>
<p>The following table summarises this conversion, as well as others with different lengths in UTF-8. The colors indicate how bits from the code point are distributed among the UTF-8 bytes. Additional bits added by the UTF-8 encoding process are shown in black.</p>
<table class="wikitable" style="font-weight: bold">
<tr>
<th colspan="2">Character</th>
<th>Binary code point</th>
<th>Binary UTF-8</th>
<th>Hexadecimal UTF-8</th>
</tr>
<tr>
<td><a href="/wiki/$" title="$" class="mw-redirect">$</a></td>
<td><code>U+0024</code></td>
<td align="right"><code><span style="color:red;">0100100</span></code></td>
<td align="right"><code>0<span style="color:red;">0100100</span></code></td>
<td align="right"><code><span style="color:red;">24</span></code></td>
</tr>
<tr>
<td><a href="/wiki/%C2%A2" title="¢" class="mw-redirect">¢</a></td>
<td><code>U+00A2</code></td>
<td align="right"><code><span style="color:green;">00010</span><span style="color:red;">100010</span></code></td>
<td align="right"><code>110<span style="color:green;">00010</span> 10<span style="color:red;">100010</span></code></td>
<td align="right"><code><span style="color:green;">C2</span> <span style="color:red;">A2</span></code></td>
</tr>
<tr>
<td><a href="/wiki/%E2%82%AC" title="€" class="mw-redirect"></a></td>
<td><code>U+20AC</code></td>
<td align="right"><code><span style="color:blue;">0010</span><span style="color:green;">000010</span><span style="color:red;">101100</span></code></td>
<td align="right"><code>1110<span style="color:blue;">0010</span> 10<span style="color:green;">000010</span> 10<span style="color:red;">101100</span></code></td>
<td align="right"><code><span style="color:blue;">E2</span> <span style="color:green;">82</span> <span style="color:red;">AC</span></code></td>
</tr>
<tr>
<td><a href="/wiki/Chinese_character" title="Chinese character" class="mw-redirect">𤭢</a></td>
<td><code>U+24B62</code></td>
<td align="right"><code><span style="color:#C000C0;">000</span><span style="color:blue;">100100</span><span style="color:green;">101101</span><span style="color:red;">100010</span></code></td>
<td align="right"><code>11110<span style="color:#C000C0;">000</span> 10<span style="color:blue;">100100</span> 10<span style="color:green;">101101</span> 10<span style="color:red;">100010</span></code></td>
<td align="right"><code><span style="color:#C000C0;">F0</span> <span style="color:blue;">A4</span> <span style="color:green;">AD</span> <span style="color:red;">A2</span></code></td>
</tr>
</table>
<h3><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=4" title="Edit section: Overlong encodings">edit</a>]</span> <span class="mw-headline" id="Overlong_encodings">Overlong encodings</span></h3>
<p>The standard specifies that the correct encoding of a codepoint use only the minimum number of bytes required to hold the significant bits of the codepoint. Longer encodings are called <i>overlong</i> and are not valid UTF-8 representations of the codepoint. This rule maintains a one-to-one correspondence between codepoints and their valid encodings, so that there is a unique valid encoding for each codepoint. Allowing multiple encodings would make testing for string equality difficult to define.</p>
<p>In principle, it would be possible to inflate the number of bytes in an encoding by padding the codepoint with leading 0s. To encode the Euro sign € from the above example in four bytes instead of three, it could be padded with leading 0s until it was 21 bits long—<b><code>000000010000010101100</code></b>. The leading byte prefix for a four-byte encoding is <b><code>11110</code></b>, and so the complete, overlong encoding is <b><code>11110<span style="color:#C000C0;">000</span></code></b> <b><code>10<span style="color:blue;">000010</span></code></b> <b><code>10<span style="color:green;">000010</span></code></b> <b><code>10<span style="color:red;">101100</span></code></b> (or <b><code><span style="color:#C000C0;">F0</span></code></b> <b><code><span style="color:blue;">82</span></code></b> <b><code><span style="color:green;">82</span></code></b> <b><code><span style="color:red;">AC</span></code></b> in hexadecimal).</p>
<p>Although overlong encodings are forbidden in UTF-8, at least one derivative makes use of the form. <a href="#Modified_UTF-8">Modified UTF-8</a> requires the Unicode codepoint U+0000 (the NUL character) to be encoded in the overlong form <b><code>110<span style="color:green;">00000</span></code></b> <b><code>10<span style="color:red;">000000</span></code></b> (hex <b><code><span style="color:green;">C0</span></code></b> <b><code><span style="color:red;">80</span></code></b>), rather than <b><code>00000000</code></b> (hex <b><code>00</code></b>). This allows the byte <b><code>00</code></b> to be used as a <a href="/wiki/Null-terminated_string" title="Null-terminated string">string terminator</a>.</p>
<h3><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=5" title="Edit section: Codepage layout">edit</a>]</span> <span class="mw-headline" id="Codepage_layout">Codepage layout</span></h3>
<table class="wikitable chset nounderlines" frame="box" width="100%" style="text-align: center; font-family: monospace; border-collapse: collapse">
<tr>
<th colspan="17"><b>UTF-8</b></th>
</tr>
<tr>
<td width="4%"></td>
<th width="6%">_0</th>
<th width="6%">_1</th>
<th width="6%">_2</th>
<th width="6%">_3</th>
<th width="6%">_4</th>
<th width="6%">_5</th>
<th width="6%">_6</th>
<th width="6%">_7</th>
<th width="6%">_8</th>
<th width="6%">_9</th>
<th width="6%">_A</th>
<th width="6%">_B</th>
<th width="6%">_C</th>
<th width="6%">_D</th>
<th width="6%">_E</th>
<th width="6%">_F</th>
</tr>
<tr>
<th>&#160;<br />
0_<br />
&#160;</th>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Null_character" title="Null character">NUL</a></small><br />
<small>0000</small><br />
<i><b>0</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Start_of_heading" title="Start of heading" class="mw-redirect">SOH</a></small><br />
<small>0001</small><br />
<i><b>1</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Start_of_text" title="Start of text" class="mw-redirect">STX</a></small><br />
<small>0002</small><br />
<i><b>2</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/End_of_text" title="End of text" class="mw-redirect">ETX</a></small><br />
<small>0003</small><br />
<i><b>3</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/End_of_transmission_character" title="End of transmission character" class="mw-redirect">EOT</a></small><br />
<small>0004</small><br />
<i><b>4</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Enquiry_character" title="Enquiry character">ENQ</a></small><br />
<small>0005</small><br />
<i><b>5</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Acknowledge_character" title="Acknowledge character">ACK</a></small><br />
<small>0006</small><br />
<i><b>6</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Bell_character" title="Bell character">BEL</a></small><br />
<small>0007</small><br />
<i><b>7</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Backspace" title="Backspace">BS</a></small><br />
<small>0008</small><br />
<i><b>8</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Tab_key" title="Tab key">HT</a></small><br />
<small>0009</small><br />
<i><b>9</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Line_feed" title="Line feed" class="mw-redirect">LF</a></small><br />
<small>000A</small><br />
<i><b>10</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Vertical_tab" title="Vertical tab" class="mw-redirect">VT</a></small><br />
<small>000B</small><br />
<i><b>11</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Form_feed" title="Form feed" class="mw-redirect">FF</a></small><br />
<small>000C</small><br />
<i><b>12</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Carriage_return" title="Carriage return">CR</a></small><br />
<small>000D</small><br />
<i><b>13</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Shift_out" title="Shift out" class="mw-redirect">SO</a></small><br />
<small>000E</small><br />
<i><b>14</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Shift_in" title="Shift in" class="mw-redirect">SI</a></small><br />
<small>000F</small><br />
<i><b>15</b></i></td>
</tr>
<tr>
<th>&#160;<br />
1_<br />
&#160;</th>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Data_link_escape" title="Data link escape" class="mw-redirect">DLE</a></small><br />
<small>0010</small><br />
<i><b>16</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Device_Control_1" title="Device Control 1" class="mw-redirect">DC1</a></small><br />
<small>0011</small><br />
<i><b>17</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Device_Control_2" title="Device Control 2" class="mw-redirect">DC2</a></small><br />
<small>0012</small><br />
<i><b>18</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Device_Control_3" title="Device Control 3" class="mw-redirect">DC3</a></small><br />
<small>0013</small><br />
<i><b>19</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Device_Control_4" title="Device Control 4" class="mw-redirect">DC4</a></small><br />
<small>0014</small><br />
<i><b>20</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Negative_acknowledge_character" title="Negative acknowledge character" class="mw-redirect">NAK</a></small><br />
<small>0015</small><br />
<i><b>21</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Synchronous_idle" title="Synchronous idle">SYN</a></small><br />
<small>0016</small><br />
<i><b>22</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/End_of_transmission_block" title="End of transmission block" class="mw-redirect">ETB</a></small><br />
<small>0017</small><br />
<i><b>23</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Cancel_character" title="Cancel character">CAN</a></small><br />
<small>0018</small><br />
<i><b>24</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/End_of_medium" title="End of medium" class="mw-redirect">EM</a></small><br />
<small>0019</small><br />
<i><b>25</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Substitute_character" title="Substitute character">SUB</a></small><br />
<small>001A</small><br />
<i><b>26</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Escape_character" title="Escape character">ESC</a></small><br />
<small>001B</small><br />
<i><b>27</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/File_separator" title="File separator" class="mw-redirect">FS</a></small><br />
<small>001C</small><br />
<i><b>28</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Group_separator" title="Group separator" class="mw-redirect">GS</a></small><br />
<small>001D</small><br />
<i><b>29</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Record_separator" title="Record separator" class="mw-redirect">RS</a></small><br />
<small>001E</small><br />
<i><b>30</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Unit_separator" title="Unit separator" class="mw-redirect">US</a></small><br />
<small>001F</small><br />
<i><b>31</b></i></td>
</tr>
<tr>
<th>&#160;<br />
2_<br />
&#160;</th>
<td style="background:#DFF7FF;"><small><a href="/wiki/Space_character" title="Space character" class="mw-redirect">SP</a></small><br />
<small>0020</small><br />
<i><b>32</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Exclamation_mark" title="Exclamation mark">!</a></span><br />
<small>0021</small><br />
<i><b>33</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Quotation_mark" title="Quotation mark">"</a></span><br />
<small>0022</small><br />
<i><b>34</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Number_sign" title="Number sign">#</a></span><br />
<small>0023</small><br />
<i><b>35</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Dollar_sign" title="Dollar sign">$</a></span><br />
<small>0024</small><br />
<i><b>36</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Percent_sign" title="Percent sign">%</a></span><br />
<small>0025</small><br />
<i><b>37</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Ampersand" title="Ampersand">&amp;</a></span><br />
<small>0026</small><br />
<i><b>38</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Apostrophe" title="Apostrophe">'</a></span><br />
<small>0027</small><br />
<i><b>39</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Bracket" title="Bracket">(</a></span><br />
<small>0028</small><br />
<i><b>40</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Bracket" title="Bracket">)</a></span><br />
<small>0029</small><br />
<i><b>41</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Asterisk" title="Asterisk">*</a></span><br />
<small>002A</small><br />
<i><b>42</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Plus_and_minus_signs" title="Plus and minus signs">+</a></span><br />
<small>002B</small><br />
<i><b>43</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Comma_(punctuation)" title="Comma (punctuation)" class="mw-redirect">,</a></span><br />
<small>002C</small><br />
<i><b>44</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Plus_and_minus_signs" title="Plus and minus signs">-</a></span><br />
<small>002D</small><br />
<i><b>45</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Full_stop" title="Full stop">.</a></span><br />
<small>002E</small><br />
<i><b>46</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Slash_(punctuation)" title="Slash (punctuation)">/</a></span><br />
<small>002F</small><br />
<i><b>47</b></i></td>
</tr>
<tr>
<th>&#160;<br />
3_<br />
&#160;</th>
<td style="background:#F7E7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/0_(number)" title="0 (number)">0</a></span><br />
<small>0030</small><br />
<i><b>48</b></i></td>
<td style="background:#F7E7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/1_(number)" title="1 (number)">1</a></span><br />
<small>0031</small><br />
<i><b>49</b></i></td>
<td style="background:#F7E7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/2_(number)" title="2 (number)">2</a></span><br />
<small>0032</small><br />
<i><b>50</b></i></td>
<td style="background:#F7E7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/3_(number)" title="3 (number)">3</a></span><br />
<small>0033</small><br />
<i><b>51</b></i></td>
<td style="background:#F7E7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/4_(number)" title="4 (number)">4</a></span><br />
<small>0034</small><br />
<i><b>52</b></i></td>
<td style="background:#F7E7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/5_(number)" title="5 (number)">5</a></span><br />
<small>0035</small><br />
<i><b>53</b></i></td>
<td style="background:#F7E7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/6_(number)" title="6 (number)">6</a></span><br />
<small>0036</small><br />
<i><b>54</b></i></td>
<td style="background:#F7E7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/7_(number)" title="7 (number)">7</a></span><br />
<small>0037</small><br />
<i><b>55</b></i></td>
<td style="background:#F7E7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/8_(number)" title="8 (number)">8</a></span><br />
<small>0038</small><br />
<i><b>56</b></i></td>
<td style="background:#F7E7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/9_(number)" title="9 (number)">9</a></span><br />
<small>0039</small><br />
<i><b>57</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Colon_(punctuation)" title="Colon (punctuation)">:</a></span><br />
<small>003A</small><br />
<i><b>58</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Semicolon" title="Semicolon">;</a></span><br />
<small>003B</small><br />
<i><b>59</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Angle_bracket" title="Angle bracket" class="mw-redirect">&lt;</a></span><br />
<small>003C</small><br />
<i><b>60</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Equal_sign" title="Equal sign" class="mw-redirect">=</a></span><br />
<small>003D</small><br />
<i><b>61</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Angle_bracket" title="Angle bracket" class="mw-redirect">&gt;</a></span><br />
<small>003E</small><br />
<i><b>62</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Question_mark" title="Question mark">?</a></span><br />
<small>003F</small><br />
<i><b>63</b></i></td>
</tr>
<tr>
<th>&#160;<br />
4_<br />
&#160;</th>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/@" title="@" class="mw-redirect">@</a></span><br />
<small>0040</small><br />
<i><b>64</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/A" title="A">A</a></span><br />
<small>0041</small><br />
<i><b>65</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/B" title="B">B</a></span><br />
<small>0042</small><br />
<i><b>66</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/C" title="C">C</a></span><br />
<small>0043</small><br />
<i><b>67</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/D" title="D">D</a></span><br />
<small>0044</small><br />
<i><b>68</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/E" title="E">E</a></span><br />
<small>0045</small><br />
<i><b>69</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/F" title="F">F</a></span><br />
<small>0046</small><br />
<i><b>70</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/G" title="G">G</a></span><br />
<small>0047</small><br />
<i><b>71</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/H" title="H">H</a></span><br />
<small>0048</small><br />
<i><b>72</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/I" title="I">I</a></span><br />
<small>0049</small><br />
<i><b>73</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/J" title="J">J</a></span><br />
<small>004A</small><br />
<i><b>74</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/K" title="K">K</a></span><br />
<small>004B</small><br />
<i><b>75</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/L" title="L">L</a></span><br />
<small>004C</small><br />
<i><b>76</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/M" title="M">M</a></span><br />
<small>004D</small><br />
<i><b>77</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/N" title="N">N</a></span><br />
<small>004E</small><br />
<i><b>78</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/O" title="O">O</a></span><br />
<small>004F</small><br />
<i><b>79</b></i></td>
</tr>
<tr>
<th>&#160;<br />
5_<br />
&#160;</th>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/P" title="P">P</a></span><br />
<small>0050</small><br />
<i><b>80</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Q" title="Q">Q</a></span><br />
<small>0051</small><br />
<i><b>81</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/R" title="R">R</a></span><br />
<small>0052</small><br />
<i><b>82</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/S" title="S">S</a></span><br />
<small>0053</small><br />
<i><b>83</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/T" title="T">T</a></span><br />
<small>0054</small><br />
<i><b>84</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/U" title="U">U</a></span><br />
<small>0055</small><br />
<i><b>85</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/V" title="V">V</a></span><br />
<small>0056</small><br />
<i><b>86</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/W" title="W">W</a></span><br />
<small>0057</small><br />
<i><b>87</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/X" title="X">X</a></span><br />
<small>0058</small><br />
<i><b>88</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Y" title="Y">Y</a></span><br />
<small>0059</small><br />
<i><b>89</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Z" title="Z">Z</a></span><br />
<small>005A</small><br />
<i><b>90</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Square_brackets" title="Square brackets" class="mw-redirect">[</a></span><br />
<small>005B</small><br />
<i><b>91</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Backslash" title="Backslash">\</a></span><br />
<small>005C</small><br />
<i><b>92</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Square_brackets" title="Square brackets" class="mw-redirect">]</a></span><br />
<small>005D</small><br />
<i><b>93</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Circumflex" title="Circumflex">^</a></span><br />
<small>005E</small><br />
<i><b>94</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Underscore" title="Underscore">_</a></span><br />
<small>005F</small><br />
<i><b>95</b></i></td>
</tr>
<tr>
<th>&#160;<br />
6_<br />
&#160;</th>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Grave_accent" title="Grave accent">`</a></span><br />
<small>0060</small><br />
<i><b>96</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/A" title="A">a</a></span><br />
<small>0061</small><br />
<i><b>97</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/B" title="B">b</a></span><br />
<small>0062</small><br />
<i><b>98</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/C" title="C">c</a></span><br />
<small>0063</small><br />
<i><b>99</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/D" title="D">d</a></span><br />
<small>0064</small><br />
<i><b>100</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/E" title="E">e</a></span><br />
<small>0065</small><br />
<i><b>101</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/F" title="F">f</a></span><br />
<small>0066</small><br />
<i><b>102</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/G" title="G">g</a></span><br />
<small>0067</small><br />
<i><b>103</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/H" title="H">h</a></span><br />
<small>0068</small><br />
<i><b>104</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/I" title="I">i</a></span><br />
<small>0069</small><br />
<i><b>105</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/J" title="J">j</a></span><br />
<small>006A</small><br />
<i><b>106</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/K" title="K">k</a></span><br />
<small>006B</small><br />
<i><b>107</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/L" title="L">l</a></span><br />
<small>006C</small><br />
<i><b>108</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/M" title="M">m</a></span><br />
<small>006D</small><br />
<i><b>109</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/N" title="N">n</a></span><br />
<small>006E</small><br />
<i><b>110</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/O" title="O">o</a></span><br />
<small>006F</small><br />
<i><b>111</b></i></td>
</tr>
<tr>
<th>&#160;<br />
7_<br />
&#160;</th>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/P" title="P">p</a></span><br />
<small>0070</small><br />
<i><b>112</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Q" title="Q">q</a></span><br />
<small>0071</small><br />
<i><b>113</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/R" title="R">r</a></span><br />
<small>0072</small><br />
<i><b>114</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/S" title="S">s</a></span><br />
<small>0073</small><br />
<i><b>115</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/T" title="T">t</a></span><br />
<small>0074</small><br />
<i><b>116</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/U" title="U">u</a></span><br />
<small>0075</small><br />
<i><b>117</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/V" title="V">v</a></span><br />
<small>0076</small><br />
<i><b>118</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/W" title="W">w</a></span><br />
<small>0077</small><br />
<i><b>119</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/X" title="X">x</a></span><br />
<small>0078</small><br />
<i><b>120</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Y" title="Y">y</a></span><br />
<small>0079</small><br />
<i><b>121</b></i></td>
<td style="background:#E7FFE7;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Z" title="Z">z</a></span><br />
<small>007A</small><br />
<i><b>122</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Braces_(punctuation)" title="Braces (punctuation)" class="mw-redirect">{</a></span><br />
<small>007B</small><br />
<i><b>123</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Vertical_bar" title="Vertical bar">|</a></span><br />
<small>007C</small><br />
<i><b>124</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Braces_(punctuation)" title="Braces (punctuation)" class="mw-redirect">}</a></span><br />
<small>007D</small><br />
<i><b>125</b></i></td>
<td style="background:#DFF7FF;"><span style="font-size: large; font-family: monospace"><a href="/wiki/Tilde" title="Tilde">~</a></span><br />
<small>007E</small><br />
<i><b>126</b></i></td>
<td bgcolor="#FFFFEF"><small><a href="/wiki/Delete_character" title="Delete character">DEL</a></small><br />
<small>007F</small><br />
<i><b>127</b></i></td>
</tr>
<tr>
<th>&#160;<br />
8_<br />
&#160;</th>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+00</small><br />
<i><b>128</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+01</small><br />
<i><b>129</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+02</small><br />
<i><b>130</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+03</small><br />
<i><b>131</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+04</small><br />
<i><b>132</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+05</small><br />
<i><b>133</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+06</small><br />
<i><b>134</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+07</small><br />
<i><b>135</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+08</small><br />
<i><b>136</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+09</small><br />
<i><b>137</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+0A</small><br />
<i><b>138</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+0B</small><br />
<i><b>139</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+0C</small><br />
<i><b>140</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+0D</small><br />
<i><b>141</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+0E</small><br />
<i><b>142</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+0F</small><br />
<i><b>143</b></i></td>
</tr>
<tr>
<th>&#160;<br />
9_<br />
&#160;</th>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+10</small><br />
<i><b>144</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+11</small><br />
<i><b>145</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+12</small><br />
<i><b>146</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+13</small><br />
<i><b>147</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+14</small><br />
<i><b>148</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+15</small><br />
<i><b>149</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+16</small><br />
<i><b>150</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+17</small><br />
<i><b>151</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+18</small><br />
<i><b>152</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+19</small><br />
<i><b>153</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+1A</small><br />
<i><b>154</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+1B</small><br />
<i><b>155</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+1C</small><br />
<i><b>156</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+1D</small><br />
<i><b>157</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+1E</small><br />
<i><b>158</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+1F</small><br />
<i><b>159</b></i></td>
</tr>
<tr>
<th>&#160;<br />
A_<br />
&#160;</th>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+20</small><br />
<i><b>160</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+21</small><br />
<i><b>161</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+22</small><br />
<i><b>162</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+23</small><br />
<i><b>163</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+24</small><br />
<i><b>164</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+25</small><br />
<i><b>165</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+26</small><br />
<i><b>166</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+27</small><br />
<i><b>167</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+28</small><br />
<i><b>168</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+29</small><br />
<i><b>169</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+2A</small><br />
<i><b>170</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+2B</small><br />
<i><b>171</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+2C</small><br />
<i><b>172</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+2D</small><br />
<i><b>173</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+2E</small><br />
<i><b>174</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+2F</small><br />
<i><b>175</b></i></td>
</tr>
<tr>
<th>&#160;<br />
B_<br />
&#160;</th>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+30</small><br />
<i><b>176</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+31</small><br />
<i><b>177</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+32</small><br />
<i><b>178</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+33</small><br />
<i><b>179</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+34</small><br />
<i><b>180</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+35</small><br />
<i><b>181</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+36</small><br />
<i><b>182</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+37</small><br />
<i><b>183</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+38</small><br />
<i><b>184</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+39</small><br />
<i><b>185</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+3A</small><br />
<i><b>186</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+3B</small><br />
<i><b>187</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+3C</small><br />
<i><b>188</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+3D</small><br />
<i><b>189</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+3E</small><br />
<i><b>190</b></i></td>
<td bgcolor="#FFCC88"><span style="font-size: large; font-family: monospace"></span><br />
<small>+3F</small><br />
<i><b>191</b></i></td>
</tr>
<tr>
<th>&#160;<br />
2-byte<br />
C_<br />
&#160;</th>
<td bgcolor="#FF0000"><small>2-byte<br />
inval</small><br />
<small><small>(0000)</small></small><br />
<i><b>192</b></i></td>
<td bgcolor="#FF0000"><small>2-byte<br />
inval</small><br />
<small><small>(0040)</small></small><br />
<i><b>193</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/C1_Controls_and_Latin-1_Supplement" title="C1 Controls and Latin-1 Supplement" class="mw-redirect">Latin-1</a></small><br />
<small>0080</small><br />
<i><b>194</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/C1_Controls_and_Latin-1_Supplement" title="C1 Controls and Latin-1 Supplement" class="mw-redirect">Latin-1</a></small><br />
<small>00C0</small><br />
<i><b>195</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/Latin_Extended-A" title="Latin Extended-A">Latin<br />
Ext-A</a></small><br />
<small>0100</small><br />
<i><b>196</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/Latin_Extended-A" title="Latin Extended-A">Latin<br />
Ext-A</a></small><br />
<small>0140</small><br />
<i><b>197</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/Latin_Extended-B" title="Latin Extended-B">Latin<br />
Ext-B</a></small><br />
<small>0180</small><br />
<i><b>198</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/Latin_Extended-B" title="Latin Extended-B">Latin<br />
Ext-B</a></small><br />
<small>01C0</small><br />
<i><b>199</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/Latin_Extended-B" title="Latin Extended-B">Latin<br />
Ext-B</a></small><br />
<small>0200</small><br />
<i><b>200</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/IPA_Extensions_(Unicode_block)" title="IPA Extensions (Unicode block)">IPA</a></small><br />
<small>0240</small><br />
<i><b>201</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/IPA_Extensions_(Unicode_block)" title="IPA Extensions (Unicode block)">IPA</a></small><br />
<small>0280</small><br />
<i><b>202</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/Unicode_Phonetic_Symbols#Spacing_Modifier_Letters_.28U.2B02B0.E2.80.9302FF.29" title="Unicode Phonetic Symbols" class="mw-redirect">Spaci<br />
Modif</a></small><br />
<small>02C0</small><br />
<i><b>203</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/Combining_character" title="Combining character">Combi<br />
Diacr</a></small><br />
<small>0300</small><br />
<i><b>204</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/Combining_character" title="Combining character">Combi<br />
Diacr</a></small><br />
<small>0340</small><br />
<i><b>205</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/Greek_alphabet#Greek_in_Unicode" title="Greek alphabet">Greek</a></small><br />
<small>0380</small><br />
<i><b>206</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/Greek_alphabet#Greek_in_Unicode" title="Greek alphabet">Greek</a></small><br />
<small>03C0</small><br />
<i><b>207</b></i></td>
</tr>
<tr>
<th>&#160;<br />
2-byte<br />
D_<br />
&#160;</th>
<td bgcolor="#FFFFFF"><small><a href="/wiki/Cyrillic_(Unicode_block)" title="Cyrillic (Unicode block)" class="mw-redirect">Cyril</a></small><br />
<small>0400</small><br />
<i><b>208</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/Cyrillic_(Unicode_block)" title="Cyrillic (Unicode block)" class="mw-redirect">Cyril</a></small><br />
<small>0440</small><br />
<i><b>209</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/Cyrillic_(Unicode_block)" title="Cyrillic (Unicode block)" class="mw-redirect">Cyril</a></small><br />
<small>0480</small><br />
<i><b>210</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/Cyrillic_(Unicode_block)" title="Cyrillic (Unicode block)" class="mw-redirect">Cyril</a></small><br />
<small>04C0</small><br />
<i><b>211</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/Cyrillic_(Unicode_block)" title="Cyrillic (Unicode block)" class="mw-redirect">Cyril</a></small><br />
<small>0500</small><br />
<i><b>212</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/Armenian_alphabet" title="Armenian alphabet">Armen</a></small><br />
<small>0540</small><br />
<i><b>213</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/Unicode_and_HTML_for_the_Hebrew_alphabet" title="Unicode and HTML for the Hebrew alphabet">Hebrew</a></small><br />
<small>0580</small><br />
<i><b>214</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/Unicode_and_HTML_for_the_Hebrew_alphabet" title="Unicode and HTML for the Hebrew alphabet">Hebrew</a></small><br />
<small>05C0</small><br />
<i><b>215</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/Arabic_(Unicode_block)" title="Arabic (Unicode block)" class="mw-redirect">Arabic</a></small><br />
<small>0600</small><br />
<i><b>216</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/Arabic_(Unicode_block)" title="Arabic (Unicode block)" class="mw-redirect">Arabic</a></small><br />
<small>0640</small><br />
<i><b>217</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/Arabic_(Unicode_block)" title="Arabic (Unicode block)" class="mw-redirect">Arabic</a></small><br />
<small>0680</small><br />
<i><b>218</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/Arabic_(Unicode_block)" title="Arabic (Unicode block)" class="mw-redirect">Arabic</a></small><br />
<small>06C0</small><br />
<i><b>219</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/Syriac_alphabet" title="Syriac alphabet">Syriac</a></small><br />
<small>0700</small><br />
<i><b>220</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/Arabic_(Unicode_block)" title="Arabic (Unicode block)" class="mw-redirect">Arabic</a></small><br />
<small>0740</small><br />
<i><b>221</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/T%C4%81na" title="Tāna" class="mw-redirect">Thaana</a></small><br />
<small>0780</small><br />
<i><b>222</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/N%27Ko" title="N'Ko" class="mw-redirect">N'Ko</a></small><br />
<small>07C0</small><br />
<i><b>223</b></i></td>
</tr>
<tr>
<th>&#160;<br />
3-byte<br />
E_<br />
&#160;</th>
<td bgcolor="#EEEEEE"><small>Indic</small><br />
<small>0800*</small><br />
<i><b>224</b></i></td>
<td bgcolor="#EEEEEE"><small>Misc.</small><br />
<small>1000</small><br />
<i><b>225</b></i></td>
<td bgcolor="#EEEEEE"><small>Symbol</small><br />
<small>2000</small><br />
<i><b>226</b></i></td>
<td bgcolor="#EEEEEE"><small><a href="/wiki/Kana" title="Kana">Kana</a><br />
<a href="/wiki/CJK_Unified_Ideographs" title="CJK Unified Ideographs">CJK</a></small><br />
<small>3000</small><br />
<i><b>227</b></i></td>
<td bgcolor="#EEEEEE"><small><a href="/wiki/CJK_Unified_Ideographs" title="CJK Unified Ideographs">CJK</a></small><br />
<small>4000</small><br />
<i><b>228</b></i></td>
<td bgcolor="#EEEEEE"><small><a href="/wiki/CJK_Unified_Ideographs" title="CJK Unified Ideographs">CJK</a></small><br />
<small>5000</small><br />
<i><b>229</b></i></td>
<td bgcolor="#EEEEEE"><small><a href="/wiki/CJK_Unified_Ideographs" title="CJK Unified Ideographs">CJK</a></small><br />
<small>6000</small><br />
<i><b>230</b></i></td>
<td bgcolor="#EEEEEE"><small><a href="/wiki/CJK_Unified_Ideographs" title="CJK Unified Ideographs">CJK</a></small><br />
<small>7000</small><br />
<i><b>231</b></i></td>
<td bgcolor="#EEEEEE"><small><a href="/wiki/CJK_Unified_Ideographs" title="CJK Unified Ideographs">CJK</a></small><br />
<small>8000</small><br />
<i><b>232</b></i></td>
<td bgcolor="#EEEEEE"><small><a href="/wiki/CJK_Unified_Ideographs" title="CJK Unified Ideographs">CJK</a></small><br />
<small>9000</small><br />
<i><b>233</b></i></td>
<td bgcolor="#EEEEEE"><small>Asian</small><br />
<small>A000</small><br />
<i><b>234</b></i></td>
<td bgcolor="#EEEEEE"><small><a href="/wiki/Hangul" title="Hangul">Hangul</a></small><br />
<small>B000</small><br />
<i><b>235</b></i></td>
<td bgcolor="#EEEEEE"><small><a href="/wiki/Hangul" title="Hangul">Hangul</a></small><br />
<small>C000</small><br />
<i><b>236</b></i></td>
<td bgcolor="#EEEEEE"><small><a href="/wiki/Hangul" title="Hangul">Hangul</a><br />
Surr</small><br />
<small>D000</small><br />
<i><b>237</b></i></td>
<td bgcolor="#EEEEEE"><small><a href="/wiki/Private_Use_Area" title="Private Use Area" class="mw-redirect">Priv Use</a></small><br />
<small>E000</small><br />
<i><b>238</b></i></td>
<td bgcolor="#EEEEEE"><small>Forms</small><br />
<small>F000</small><br />
<i><b>239</b></i></td>
</tr>
<tr>
<th>&#160;<br />
4-byte<br />
F_<br />
&#160;</th>
<td bgcolor="#FFFFFF"><small>Ancient<br />
Sym,CJK</small><br />
<small>10000*</small><br />
<i><b>240</b></i></td>
<td bgcolor="#FFFFFF"><small>unall</small><br />
<small>40000</small><br />
<i><b>241</b></i></td>
<td bgcolor="#FFFFFF"><small>unall</small><br />
<small>80000</small><br />
<i><b>242</b></i></td>
<td bgcolor="#FFFFFF"><small>Tags<br />
Priv</small><br />
<small>C0000</small><br />
<i><b>243</b></i></td>
<td bgcolor="#FFFFFF"><small><a href="/wiki/Private_Use_Area" title="Private Use Area" class="mw-redirect">Priv<br />
Use</a></small><br />
<small>100000</small><br />
<i><b>244</b></i></td>
<td bgcolor="#FF0000"><small>4-byte<br />
inval</small><br />
<small><small>140000</small></small><br />
<i><b>245</b></i></td>
<td bgcolor="#FF0000"><small>4-byte<br />
inval</small><br />
<small><small>180000</small></small><br />
<i><b>246</b></i></td>
<td bgcolor="#FF0000"><small>4-byte<br />
inval</small><br />
<small><small>1C0000</small></small><br />
<i><b>247</b></i></td>
<td bgcolor="#DD0000"><small>5-byte<br />
inval</small><br />
<small><small>200000*</small></small><br />
<i><b>248</b></i></td>
<td bgcolor="#DD0000"><small>5-byte<br />
inval</small><br />
<small><small>1000000</small></small><br />
<i><b>249</b></i></td>
<td bgcolor="#DD0000"><small>5-byte<br />
inval</small><br />
<small><small>2000000</small></small><br />
<i><b>250</b></i></td>
<td bgcolor="#DD0000"><small>5-byte<br />
inval</small><br />
<small><small>3000000</small></small><br />
<i><b>251</b></i></td>
<td bgcolor="#FF0000"><small>6-byte<br />
inval</small><br />
<small><small>4000000*</small></small><br />
<i><b>252</b></i></td>
<td bgcolor="#FF0000"><small>6-byte<br />
inval</small><br />
<small><small>40000000</small></small><br />
<i><b>253</b></i></td>
<td bgcolor="#FF0000"><br />
<br />
<i><b>254</b></i></td>
<td bgcolor="#FF0000"><br />
<br />
<i><b>255</b></i></td>
</tr>
</table>
<p>Legend: <span style="background: #ffffef; color: black">Yellow</span> cells are control characters, <span style="background: rgb(223, 247, 255); color: black">blue</span> cells are punctuation, <span style="background: rgb(247, 231, 255); color: black">purple</span> cells are <a href="/wiki/Numerical_digit" title="Numerical digit">digits</a> and <span style="background: rgb(231, 255, 231); color: black">green</span> cells are ASCII letters.</p>
<p><span style="background: #ffcc88; color: black">Orange</span> cells with a large dot are continuation bytes. The hexadecimal number shown after a "+" plus sign is the value of the 6 bits they add.</p>
<p><span style="background: #ffffff; color: black">White</span> cells are the start bytes for a sequence of multiple bytes, the length shown at the left edge of the row. The text shows the Unicode blocks encoded by sequences starting with this byte, and the hexadecimal code point shown in the cell is the lowest character value encoded using that start byte. When a start byte could form both overlong and valid encodings, the lowest non-overlong-encoded codepoint is shown, marked by an asterisk "*".</p>
<p><span style="background: Red; color: black">Red</span> cells must never appear in a valid UTF-8 sequence. The first two (C0 and C1) could only be used for overlong encoding of basic ASCII characters. The remaining red cells indicate start bytes of sequences that could only encode numbers larger than the 0x10FFFF limit of Unicode. The byte 244 (hex 0xF4) could also encode some values greater than 0x10FFFF; such a sequence is also invalid.</p>
<h3><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=6" title="Edit section: Invalid byte sequences">edit</a>]</span> <span class="mw-headline" id="Invalid_byte_sequences">Invalid byte sequences</span></h3>
<p>Not all sequences of bytes are valid UTF-8. A UTF-8 decoder should be prepared for:</p>
<ul>
<li>the red invalid bytes in the above table</li>
<li>an unexpected continuation byte</li>
<li>a start byte not followed by enough continuation bytes</li>
<li>a sequence that decodes to a value that should use a shorter sequence (an "overlong form")</li>
<li>A 4-byte sequence (starting with 0xF4) that decodes to a value greater than U+10FFFF</li>
</ul>
<p>Many earlier decoders would happily try to decode these. Carefully crafted invalid UTF-8 could make them either skip or create ASCII characters such as NUL, slash, or quotes. Invalid UTF-8 has been used to bypass security validations in high profile products including Microsoft's <a href="/wiki/Internet_Information_Services" title="Internet Information Services">IIS</a> web server<sup id="cite_ref-MS00-078_10-0" class="reference"><a href="#cite_note-MS00-078-10"><span>[</span>11<span>]</span></a></sup> and Apache's Tomcat servlet container.<sup id="cite_ref-CVE-2008-2938_11-0" class="reference"><a href="#cite_note-CVE-2008-2938-11"><span>[</span>12<span>]</span></a></sup></p>
<p>RFC 3629 states "Implementations of the decoding algorithm MUST protect against decoding invalid sequences."<sup id="cite_ref-rfc3629_12-0" class="reference"><a href="#cite_note-rfc3629-12"><span>[</span>13<span>]</span></a></sup> <i>The Unicode Standard</i> requires decoders to "...treat any ill-formed code unit sequence as an error condition. This guarantees that it will neither interpret nor emit an ill-formed code unit sequence."</p>
<p>Many UTF-8 decoders throw exceptions on encountering errors,<sup id="cite_ref-13" class="reference"><a href="#cite_note-13"><span>[</span>14<span>]</span></a></sup> since such errors suggest the input is not a UTF-8 string at all. This can turn what would otherwise be harmless errors (producing a message such as "no such file") into a <a href="/wiki/Denial_of_service" title="Denial of service" class="mw-redirect">denial of service</a> bug. For instance, Python 3.0 would exit immediately if the command line or <a href="/wiki/Environment_variable" title="Environment variable">environment variables</a> contained invalid UTF-8,<sup id="cite_ref-PEP383_14-0" class="reference"><a href="#cite_note-PEP383-14"><span>[</span>15<span>]</span></a></sup> so it was impossible for any Python program to detect and recover from such an error.</p>
<p>An increasingly popular option is to detect errors with a separate API, and for converters to translate the first byte to a replacement and continue parsing with the next byte. Popular replacements are:</p>
<ul>
<li>The <a href="/wiki/Replacement_character" title="Replacement character" class="mw-redirect">replacement character</a> "<22>" (U+FFFD)</li>
<li>The invalid Unicode code points U+DC80..U+DCFF where the low 8 bits are the byte's value.</li>
<li>Interpret the bytes according to <a href="/wiki/ISO/IEC_8859-1" title="ISO/IEC 8859-1">ISO-8859-1</a> or <a href="/wiki/Windows-1252" title="Windows-1252">CP1252</a>.</li>
</ul>
<p>Replacing errors is "lossy": more than one UTF-8 string converts to the same Unicode result. Therefore the original UTF-8 should be stored, and translation should only be used when displaying the text to the user.</p>
<h3><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=7" title="Edit section: Invalid code points">edit</a>]</span> <span class="mw-headline" id="Invalid_code_points">Invalid code points</span></h3>
<p>According to the UTF-8 definition (<a class="external mw-magiclink-rfc" href="//tools.ietf.org/html/rfc3629">RFC 3629</a>) the high and low surrogate halves used by <a href="/wiki/UTF-16" title="UTF-16">UTF-16</a> (U+D800 through U+DFFF) are not legal Unicode values, and the UTF-8 encoding of them is an invalid byte sequence and thus should be treated as described above.</p>
<p>Whether an actual application should do this with surrogate halves is debatable.<sup class="noprint Inline-Template" style="white-space:nowrap;">[<i><a href="/wiki/Wikipedia:Avoid_weasel_words" title="Wikipedia:Avoid weasel words" class="mw-redirect"><span title="The material in the vicinity of this tag may use weasel words or too-vague attribution. from September 2011">who?</span></a></i>]</sup> Allowing them allows lossless storage of invalid <a href="/wiki/UTF-16" title="UTF-16">UTF-16</a>, and allows CESU encoding (described below) to be decoded. There are other code points that are far more important to detect and reject, such as the reversed-BOM U+FFFE, or the <a href="/wiki/C1_control_code" title="C1 control code" class="mw-redirect">C1 controls</a>, caused by improper conversion of <a href="/wiki/CP1252" title="CP1252" class="mw-redirect">CP1252</a> text or <a href="/wiki/Mojibake" title="Mojibake">double-encoding</a> of UTF-8. These <i>are</i> invalid in <a href="/wiki/HTML" title="HTML">HTML</a>.</p>
<h2><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=8" title="Edit section: Official name and variants">edit</a>]</span> <span class="mw-headline" id="Official_name_and_variants">Official name and variants</span></h2>
<p>The official name is "UTF-8". All letters are upper-case, and the name is hyphenated. This spelling is used in all the documents relating to the encoding.</p>
<p>Alternatively, the name "utf-8" may be used by all standards conforming to the <a href="/wiki/Internet_Assigned_Numbers_Authority" title="Internet Assigned Numbers Authority">Internet Assigned Numbers Authority</a> (IANA) list (which include <a href="/wiki/Cascading_Style_Sheets" title="Cascading Style Sheets">CSS</a>, <a href="/wiki/HTML" title="HTML">HTML</a>, <a href="/wiki/XML" title="XML">XML</a>, and <a href="/wiki/List_of_HTTP_headers" title="List of HTTP headers" class="mw-redirect">HTTP headers</a>),<sup id="cite_ref-15" class="reference"><a href="#cite_note-15"><span>[</span>16<span>]</span></a></sup> as the declaration is case insensitive.<sup id="cite_ref-16" class="reference"><a href="#cite_note-16"><span>[</span>17<span>]</span></a></sup></p>
<p>Other descriptions that omit the hyphen or replace it with a space, such as "utf8" or "UTF 8", are not accepted as correct by the governing standards.<sup id="cite_ref-17" class="reference"><a href="#cite_note-17"><span>[</span>18<span>]</span></a></sup> Despite this, most agents such as browsers can understand them, and so standards intended to describe existing practice (such as HTML5) may effectively require their recognition.</p>
<p>MySQL omits the hyphen in the following query:</p>
<pre>
SET NAMES 'utf8'
</pre>
<h2><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=9" title="Edit section: Derivatives">edit</a>]</span> <span class="mw-headline" id="Derivatives">Derivatives</span></h2>
<p>The following implementations show slight differences from the UTF-8 specification. They are incompatible with the UTF-8 specification.</p>
<h3><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=10" title="Edit section: CESU-8">edit</a>]</span> <span class="mw-headline" id="CESU-8">CESU-8</span></h3>
<div class="rellink relarticle mainarticle">Main article: <a href="/wiki/CESU-8" title="CESU-8">CESU-8</a></div>
<p>Many programs added UTF-8 conversions for <a href="/wiki/UCS-2" title="UCS-2" class="mw-redirect">UCS-2</a> data and did not alter this UTF-8 conversion when UCS-2 was replaced with the surrogate-pair using <a href="/wiki/UTF-16" title="UTF-16">UTF-16</a>. In such programs each half of a UTF-16 surrogate pair is encoded as its own 3-byte UTF-8 encoding, resulting in 6-byte sequences rather than 4 bytes for characters outside the <a href="/wiki/Mapping_of_Unicode_character_planes" title="Mapping of Unicode character planes" class="mw-redirect">Basic Multilingual Plane</a>. <a href="/wiki/Oracle_Database" title="Oracle Database">Oracle</a> and <a href="/wiki/MySQL" title="MySQL">MySQL</a> databases use this, as well as Java and Tcl as described below, and probably many Windows programs where the programmers were unaware of the complexities of UTF-16. Although this non-optimal encoding is generally not deliberate, a supposed benefit is that it preserves UTF-16 binary sorting order when CESU-8 is binary sorted.</p>
<h3><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=11" title="Edit section: Modified UTF-8">edit</a>]</span> <span class="mw-headline" id="Modified_UTF-8">Modified UTF-8</span></h3>
<p>In Modified UTF-8,<sup id="cite_ref-18" class="reference"><a href="#cite_note-18"><span>[</span>19<span>]</span></a></sup> the <a href="/wiki/Null_character" title="Null character">null character</a> (U+0000) is encoded as 0xC0,0x80; this is not valid UTF-8<sup id="cite_ref-19" class="reference"><a href="#cite_note-19"><span>[</span>20<span>]</span></a></sup> because it is not the shortest possible representation. Modified UTF-8 strings never contain any actual null bytes but can contain all Unicode code points including U+0000,<sup id="cite_ref-20" class="reference"><a href="#cite_note-20"><span>[</span>21<span>]</span></a></sup> which allows such strings (with a null byte appended) to be processed by traditional <a href="/wiki/Null-terminated_string" title="Null-terminated string">null-terminated string</a> functions.</p>
<p>All known Modified UTF-8 implementations also treat the surrogate pairs as in <a href="/wiki/CESU-8" title="CESU-8">CESU-8</a>.</p>
<p>In normal usage, the <a href="/wiki/Java_(programming_language)" title="Java (programming language)">Java programming language</a> supports standard UTF-8 when reading and writing strings through <code><a rel="nofollow" class="external text" href="http://download.oracle.com/javase/7/docs/api/java/io/InputStreamReader.html">InputStreamReader</a></code> and <code><a rel="nofollow" class="external text" href="http://download.oracle.com/javase/7/docs/api/java/io/OutputStreamWriter.html">OutputStreamWriter</a></code>. However it uses Modified UTF-8 for object <a href="/wiki/Serialization#Java" title="Serialization">serialization</a>,<sup id="cite_ref-21" class="reference"><a href="#cite_note-21"><span>[</span>22<span>]</span></a></sup> for the <a href="/wiki/Java_Native_Interface" title="Java Native Interface">Java Native Interface</a>,<sup id="cite_ref-22" class="reference"><a href="#cite_note-22"><span>[</span>23<span>]</span></a></sup> and for embedding constant strings in <a href="/wiki/Class_(file_format)" title="Class (file format)" class="mw-redirect">class files</a>.<sup id="cite_ref-23" class="reference"><a href="#cite_note-23"><span>[</span>24<span>]</span></a></sup> <a href="/wiki/Tcl" title="Tcl">Tcl</a> also uses the same modified UTF-8<sup id="cite_ref-24" class="reference"><a href="#cite_note-24"><span>[</span>25<span>]</span></a></sup> as Java for internal representation of Unicode data, but uses strict CESU-8 for external data.</p>
<h3><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=12" title="Edit section: Extending from 31 bit to 36 bit range">edit</a>]</span> <span class="mw-headline" id="Extending_from_31_bit_to_36_bit_range">Extending from 31 bit to 36 bit range</span></h3>
<p>Extending the accepted input pattern from 6 bytes to 7 bytes would allow over 70 billion code points to be encoded;<sup id="cite_ref-25" class="reference"><a href="#cite_note-25"><span>[</span>26<span>]</span></a></sup> however, this would require an initial byte value of 0xFE to be accepted as a 7-byte sequence indicator (see under Advantages in section "<a href="#Compared_to_single-byte_encodings">Compared to single-byte encodings</a>").</p>
<h2><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=13" title="Edit section: Byte order mark">edit</a>]</span> <span class="mw-headline" id="Byte_order_mark">Byte order mark</span></h2>
<p>Many <a href="/wiki/Microsoft_Windows" title="Microsoft Windows">Windows</a> programs (including Windows <a href="/wiki/Notepad_(Windows)" title="Notepad (Windows)" class="mw-redirect">Notepad</a>) add the bytes 0xEF, 0xBB, 0xBF at the start of any document saved as UTF-8. This is the UTF-8 encoding of the Unicode <a href="/wiki/Byte_order_mark" title="Byte order mark">byte order mark</a> (BOM), and is commonly referred to as a UTF-8 BOM, even though it is not relevant to byte order. The BOM can also appear if another encoding with a BOM is translated to UTF-8 without stripping it. Older text editors may display the BOM as "" at the start of the document.</p>
<p>The Unicode Standard neither requires nor recommends the use of the BOM for UTF-8.<sup id="cite_ref-26" class="reference"><a href="#cite_note-26"><span>[</span>27<span>]</span></a></sup> The presence of the UTF-8 BOM may cause interoperability problems with existing software that could otherwise handle UTF-8; for example:</p>
<ul>
<li>Programming language parsers not explicitly designed for UTF-8 can often handle UTF-8 in string constants and comments, but cannot parse the BOM at the start of the file.</li>
<li>Programs that identify file types by leading characters may fail to identify the file if a BOM is present even if the user of the file could skip the BOM. An example is the Unix <a href="/wiki/Shebang_(Unix)" title="Shebang (Unix)">shebang</a> syntax. Another example is Internet Explorer which will render pages in standards mode only when it starts with a <a href="/wiki/Document_type_declaration" title="Document type declaration" class="mw-redirect">document type declaration</a>.</li>
</ul>
<p>If compatibility with existing programs is not important, the BOM <i>could</i> be used to identify UTF-8 encoding. Because checking if text is valid UTF-8 is very reliable (the majority of random byte sequences are <i>not</i> valid UTF-8) such use should not be necessary. Programs that insert information at the start of a file will break this identification (one example is offline browsers that add the originating URL to the start of the file).</p>
<p>Unofficially, UTF-8-BOM or UTF-8-NOBOM are sometimes used to refer to text files which contain or lack a BOM. In Japan especially, "UTF-8 encoding without BOM" is sometimes called "UTF-8N".<sup class="Template-Fact" style="white-space:nowrap;">[<i><a href="/wiki/Wikipedia:Citation_needed" title="Wikipedia:Citation needed"><span title="This claim needs references to reliable sources from February 2012">citation needed</span></a></i>]</sup></p>
<h2><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=14" title="Edit section: Advantages and disadvantages">edit</a>]</span> <span class="mw-headline" id="Advantages_and_disadvantages">Advantages and disadvantages</span></h2>
<table class="metadata plainlinks ambox mbox-small-left ambox-content ambox-Refimprove" style="">
<tr>
<td class="mbox-image"><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/9/99/Question_book-new.svg/50px-Question_book-new.svg.png" width="50" height="39" /></td>
<td class="mbox-text" style=""><span class="mbox-text-span">This section <b>needs additional <a href="/wiki/Wikipedia:Citing_sources#Inline_citations" title="Wikipedia:Citing sources">citations</a> for <a href="/wiki/Wikipedia:Verifiability" title="Wikipedia:Verifiability">verification</a></b>. <small><i>(October 2009)</i></small></span></td>
</tr>
</table>
<h3><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=15" title="Edit section: General">edit</a>]</span> <span class="mw-headline" id="General">General</span></h3>
<h4><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=16" title="Edit section: Advantages">edit</a>]</span> <span class="mw-headline" id="Advantages">Advantages</span></h4>
<ul>
<li>The <a href="/wiki/ASCII" title="ASCII">ASCII</a> characters are represented by themselves as single bytes that do not appear anywhere else, which makes UTF-8 work with the majority of existing APIs that take bytes strings but only treat a small number of ASCII codes specially. This removes the need to write a new Unicode version of every API, and makes it much easier to convert existing systems to UTF-8 than any other Unicode encoding.</li>
<li>UTF-8 is the only encoding for XML entities that does not require a BOM or an indication of the encoding.<sup id="cite_ref-27" class="reference"><a href="#cite_note-27"><span>[</span>28<span>]</span></a></sup></li>
<li>UTF-8 and UTF-16 are the standard encodings for Unicode text in HTML documents, with UTF-8 as the preferred and most used encoding.</li>
<li>UTF-8 strings can be fairly reliably recognized as such by a simple <a href="/wiki/Heuristic_algorithm" title="Heuristic algorithm" class="mw-redirect">heuristic algorithm</a>.<sup id="cite_ref-28" class="reference"><a href="#cite_note-28"><span>[</span>29<span>]</span></a></sup> The probability of a random string of bytes which is not pure ASCII being valid UTF-8 is 3.9% for a two-byte sequence,<sup id="cite_ref-29" class="reference"><a href="#cite_note-29"><span>[</span>30<span>]</span></a></sup> and decreases exponentially for longer sequences. <a href="/wiki/ISO/IEC_8859-1" title="ISO/IEC 8859-1">ISO/IEC 8859-1</a> is even less likely to be mis-recognized as UTF-8: the <i>only</i> non-ASCII characters in it would have to be in sequences starting with either an accented letter or the multiplication symbol and ending with a symbol. This is an advantage that most other encodings do not have, causing errors (<a href="/wiki/Mojibake" title="Mojibake">mojibake</a>) if the receiving application isn't told and can't guess the correct encoding. Even word-based UTF-16 can be mistaken for byte encodings (like in the "<a href="/wiki/Bush_hid_the_facts" title="Bush hid the facts">bush hid the facts</a>" bug).</li>
<li><a href="/wiki/Lexicographical_order" title="Lexicographical order">Sorting</a> of UTF-8 strings as arrays of unsigned bytes will produce the same results as sorting them based on Unicode code points.</li>
<li>Other byte-based encodings can pass through the same API. This means, however, that the encoding must be identified. Because the other encodings are unlikely to be valid UTF-8, a reliable way to implement this is to assume UTF-8 and switch to a legacy encoding only if several invalid UTF-8 byte sequences are encountered.</li>
</ul>
<h4><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=17" title="Edit section: Disadvantages">edit</a>]</span> <span class="mw-headline" id="Disadvantages">Disadvantages</span></h4>
<ul>
<li>A UTF-8 <a href="/wiki/Parsing" title="Parsing">parser</a> that is not compliant with current versions of the standard might accept a number of different pseudo-UTF-8 representations and convert them to the same Unicode output. This provides a way for information to leak past validation routines designed to process data in its eight-bit representation.<sup id="cite_ref-30" class="reference"><a href="#cite_note-30"><span>[</span>31<span>]</span></a></sup></li>
</ul>
<h3><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=18" title="Edit section: Compared to single-byte encodings">edit</a>]</span> <span class="mw-headline" id="Compared_to_single-byte_encodings">Compared to single-byte encodings</span></h3>
<h4><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=19" title="Edit section: Advantages">edit</a>]</span> <span class="mw-headline" id="Advantages_2">Advantages</span></h4>
<ul>
<li>UTF-8 can encode any <a href="/wiki/Unicode" title="Unicode">Unicode</a> character, avoiding the need to figure out and set a "<a href="/wiki/Code_page" title="Code page">code page</a>" or otherwise indicate what character set is in use, and allowing output in multiple scripts at the same time. For many scripts there have been more than one single-byte encoding in usage, so even knowing the script was insufficient information to display it correctly.</li>
<li>The bytes 0xFE and 0xFF do not appear, so a valid UTF-8 stream never matches the UTF-16 <a href="/wiki/Byte_order_mark" title="Byte order mark">byte order mark</a> and thus cannot be confused with it. The absence of 0xFF (0377) also eliminates the need to escape this byte in <a href="/wiki/Telnet" title="Telnet">Telnet</a> (and FTP control connection).</li>
</ul>
<h4><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=20" title="Edit section: Disadvantages">edit</a>]</span> <span class="mw-headline" id="Disadvantages_2">Disadvantages</span></h4>
<ul>
<li>UTF-8 encoded text is larger than the appropriate single-byte encoding except for plain ASCII characters. In the case of scripts which used 8-bit character sets with non-Latin scripts encoded in the upper half (such as most <a href="/wiki/Cyrillic_script" title="Cyrillic script">Cyrillic</a> and <a href="/wiki/Greek_alphabet" title="Greek alphabet">Greek alphabet</a> code pages), characters in UTF-8 will be double the size. For some scripts such as <a href="/wiki/Thai_alphabet" title="Thai alphabet">Thai</a> and <a href="/wiki/Hindi" title="Hindi">Hindi</a>'s <a href="/wiki/Devanagari" title="Devanagari">Devanagari</a>, characters will be triple the size (this has caused objections in India and other countries).</li>
<li>It is possible in UTF-8 (or any other multi-byte encoding) to split or <a href="/wiki/Data_truncation" title="Data truncation">truncate</a> a string in the middle of a character, which may result in an invalid string. This will not happen in correct handling of UTF-8.</li>
<li>If the code points are all the same size, measurements of a fixed number of them is easy. Due to ASCII-era documentation where "character" is used as a synonym for "byte" this is often considered important. However, by measuring string positions using bytes instead of "characters" most algorithms can be easily and efficiently adapted for UTF-8<sup class="Template-Fact" style="white-space:nowrap;">[<i><a href="/wiki/Wikipedia:Citation_needed" title="Wikipedia:Citation needed"><span title="This claim needs references to reliable sources from December 2009">citation needed</span></a></i>]</sup>.</li>
<li>Some software such as text editors will refuse to correctly display or interpret UTF-8 unless it starts with a <a href="/wiki/Byte_Order_Mark" title="Byte Order Mark" class="mw-redirect">Byte Order Mark</a>. This has the effect of making it impossible to use UTF-8 with any older software than can handle ASCII-like encodings but cannot handle the byte order mark. This is considered an incorrect implementation of the text editor, not the older software.</li>
</ul>
<h3><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=21" title="Edit section: Compared to other multi-byte encodings">edit</a>]</span> <span class="mw-headline" id="Compared_to_other_multi-byte_encodings">Compared to other multi-byte encodings</span></h3>
<h4><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=22" title="Edit section: Advantages">edit</a>]</span> <span class="mw-headline" id="Advantages_3">Advantages</span></h4>
<ul>
<li>UTF-8 uses the codes 0127 <i>only</i> for the ASCII characters. This means that UTF-8 is an <a href="/wiki/Extended_ASCII" title="Extended ASCII">ASCII extension</a> and can with limited change be supported by software that supports an ASCII extension and handles non-ASCII characters as free text.</li>
<li>UTF-8 can encode any <a href="/wiki/Unicode" title="Unicode">Unicode</a> character. Files in different scripts can be displayed correctly without having to choose the correct code page or font. For instance Chinese and Arabic can be supported (in the same text) without special codes inserted or manual settings to switch the encoding.</li>
<li>UTF-8 is "self-synchronizing": character boundaries are easily found when searching either forwards or backwards. If bytes are lost due to error or <a href="/wiki/Data_corruption" title="Data corruption">corruption</a>, one can always locate the beginning of the next character and thus limit the damage. Many multi-byte encodings are much harder to resynchronize.</li>
<li>Any <a href="/wiki/Byte_orientation" title="Byte orientation">byte oriented</a> <a href="/wiki/String_searching_algorithm" title="String searching algorithm">string searching algorithm</a> can be used with UTF-8 data, since the sequence of bytes for a character cannot occur anywhere else. Some older variable-length encodings (such as <a href="/wiki/Shift_JIS" title="Shift JIS">Shift JIS</a>) did not have this property and thus made string-matching algorithms rather complicated. In Shift JIS the end byte of a character and the first byte of the next character could look like another legal character, something that can't happen in UTF-8.</li>
<li>Efficient to encode using simple <a href="/wiki/Bit_operation" title="Bit operation" class="mw-redirect">bit operations</a>. UTF-8 does not require slower mathematical operations such as multiplication or division (unlike the obsolete <a href="/wiki/UTF-1" title="UTF-1">UTF-1</a> encoding).</li>
</ul>
<h4><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=23" title="Edit section: Disadvantages">edit</a>]</span> <span class="mw-headline" id="Disadvantages_3">Disadvantages</span></h4>
<ul>
<li>For certain scripts UTF-8 will take more space than an older multi-byte encoding. East Asian scripts generally have two bytes per character in their multi-byte encodings yet take three bytes per character in UTF-8.</li>
</ul>
<p><br /></p>
<h3><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=24" title="Edit section: Compared to UTF-16">edit</a>]</span> <span class="mw-headline" id="Compared_to_UTF-16">Compared to UTF-16</span></h3>
<h4><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=25" title="Edit section: Advantages">edit</a>]</span> <span class="mw-headline" id="Advantages_4">Advantages</span></h4>
<ul>
<li>A text byte stream cannot be losslessly converted to UTF-16, due to the possible presence of errors in the byte stream encoding. This causes unexpected and often severe problems attempting to use existing data in a system that uses UTF-16 as an internal encoding. Results are security bugs, <a href="/wiki/Denial_of_service" title="Denial of service" class="mw-redirect">DoS</a> if bad encoding throws an exception, and data loss when different byte streams convert to the same UTF-16. Due to the ASCII compatibility and high degree of pattern recognition in UTF-8, random byte streams can be passed losslessly through a system using it, as interpretation can be deferred until display.</li>
<li>Converting to UTF-16 while maintaining compatibility with existing programs (such as was done with Windows) requires <i>every</i> API and data structure that takes a string to be duplicated. Invalid encodings make the duplicated APIs not exactly map to each other, often making it impossible to do some action with one of them.</li>
<li>Characters outside the basic multilingual plane are not a special case. UTF-16 is often mistaken to be the obsolete constant-length <a href="/wiki/UCS-2" title="UCS-2" class="mw-redirect">UCS-2</a> encoding, leading to code that works for most text but suddenly fails for non-<a href="/wiki/Mapping_of_Unicode_character_planes" title="Mapping of Unicode character planes" class="mw-redirect">BMP</a> characters.<sup id="cite_ref-31" class="reference"><a href="#cite_note-31"><span>[</span>32<span>]</span></a></sup></li>
<li>Text encoded in UTF-8 is often smaller than (or the same size as) the same text encoded in UTF-16.
<ul>
<li>This is always true for text using only code points below U+0800 (which includes all modern European scripts), as each code point's UTF-8 encoding is one or two bytes then.</li>
<li>Even if text contains code points between U+0800 and U+FFFF, it might contain so many code points below U+0080 (which UTF-8 encodes in one byte) that the UTF-8 encoding is still smaller. As HTML markup and line terminators are code points below U+0080, most HTML source is smaller if encoded in UTF-8 even for Asian scripts.</li>
<li><a href="/wiki/Unicode_plane" title="Unicode plane" class="mw-redirect">Non-BMP</a> characters (U+10000 and above) are encoded in UTF-8 in four bytes, the same size as in UTF-16.</li>
</ul>
</li>
<li>Most communication and storage was designed for a stream of bytes. A UTF-16 string must use a pair of bytes for each code unit:
<ul>
<li>The order of those two bytes becomes an issue and must be specified in the UTF-16 protocol, such as with a <a href="/wiki/Byte_order_mark" title="Byte order mark">byte order mark</a>.</li>
<li>If an odd number of bytes is missing from UTF-16, the whole rest of the string will be meaningless text. Any bytes missing from UTF-8 will still allow the text to be recovered accurately starting with the next character after the missing bytes. If any partial character is removed the corruption is always recognizable.</li>
</ul>
</li>
</ul>
<h4><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=26" title="Edit section: Disadvantages">edit</a>]</span> <span class="mw-headline" id="Disadvantages_4">Disadvantages</span></h4>
<ul>
<li>Characters U+0800 through U+FFFF use three bytes in UTF-8, but only two in UTF-16. As a result, text in (for example) Chinese, Japanese or Hindi could take more space in UTF-8 if there are more of these characters than there are ASCII characters. This happens for pure text,<sup id="cite_ref-32" class="reference"><a href="#cite_note-32"><span>[</span>33<span>]</span></a></sup> but rarely for HTML documents. For example, both the Japanese UTF-8 and the Hindi Unicode articles on Wikipedia take more space in UTF-16 than in UTF-8 .<sup id="cite_ref-33" class="reference"><a href="#cite_note-33"><span>[</span>34<span>]</span></a></sup></li>
</ul>
<h2><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=27" title="Edit section: See also">edit</a>]</span> <span class="mw-headline" id="See_also">See also</span></h2>
<ul>
<li><a href="/wiki/Alt_code" title="Alt code">Alt code</a></li>
<li><a href="/wiki/Character_encodings_in_HTML" title="Character encodings in HTML">Character encodings in HTML</a></li>
<li><a href="/wiki/Comparison_of_e-mail_clients#Features" title="Comparison of e-mail clients" class="mw-redirect">Comparison of e-mail clients#Features</a></li>
<li><a href="/wiki/Comparison_of_Unicode_encodings" title="Comparison of Unicode encodings">Comparison of Unicode encodings</a></li>
<li><a href="/wiki/GB_18030" title="GB 18030">GB 18030</a></li>
<li><a href="/wiki/Iconv" title="Iconv">Iconv</a>—a standardized <a href="/wiki/Application_programming_interface" title="Application programming interface">API</a> used to convert between different <a href="/wiki/Character_encoding" title="Character encoding">character encodings</a></li>
<li><a href="/wiki/ISO/IEC_8859" title="ISO/IEC 8859">ISO/IEC 8859</a></li>
<li><a href="/wiki/Specials_(Unicode_block)" title="Specials (Unicode block)">Specials (Unicode block)</a></li>
<li><a href="/wiki/Unicode_and_e-mail" title="Unicode and e-mail" class="mw-redirect">Unicode and e-mail</a></li>
<li><a href="/wiki/Unicode_and_HTML" title="Unicode and HTML">Unicode and HTML</a></li>
<li><a href="/wiki/Universal_Character_Set" title="Universal Character Set">Universal Character Set</a></li>
<li><a href="/wiki/Percent-encoding#Current_standard" title="Percent-encoding">UTF-8 in URIs</a></li>
<li><a href="/wiki/UTF-9_and_UTF-18" title="UTF-9 and UTF-18">UTF-9 and UTF-18</a></li>
<li><a href="/wiki/UTF-16/UCS-2" title="UTF-16/UCS-2" class="mw-redirect">UTF-16/UCS-2</a></li>
</ul>
<h2><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=28" title="Edit section: References">edit</a>]</span> <span class="mw-headline" id="References">References</span></h2>
<div class="reflist references-column-count references-column-count-2" style="-moz-column-count: 2; -webkit-column-count: 2; column-count: 2; list-style-type: decimal;">
<ol class="references">
<li id="cite_note-0"><span class="mw-cite-backlink"><b><a href="#cite_ref-0">^</a></b></span> <span class="reference-text"><span class="citation book">[<a rel="nofollow" class="external text" href="http://www.unicode.org/">|The Unicode Consortium</a>]. <a rel="nofollow" class="external text" href="http://www.unicode.org/versions/Unicode6.0.0/">"Chapter 2. General Structure"</a>. <i>The Unicode Standard</i> (6.0 ed.). Mountain View, California, USA: The Unicode Consortium. <a href="/wiki/International_Standard_Book_Number" title="International Standard Book Number">ISBN</a>&#160;<a href="/wiki/Special:BookSources/978-1-936213-01-6" title="Special:BookSources/978-1-936213-01-6">978-1-936213-01-6</a><span class="printonly">. <a rel="nofollow" class="external free" href="http://www.unicode.org/versions/Unicode6.0.0/">http://www.unicode.org/versions/Unicode6.0.0/</a></span>.</span><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=Chapter+2.+General+Structure&amp;rft.atitle=The+Unicode+Standard&amp;rft.aulast=The+Unicode+Consortium&amp;rft.au=The+Unicode+Consortium&amp;rft.edition=6.0&amp;rft.place=Mountain+View%2C+California%2C+USA&amp;rft.pub=The+Unicode+Consortium&amp;rft.isbn=978-1-936213-01-6&amp;rft_id=http%3A%2F%2Fwww.unicode.org%2Fversions%2FUnicode6.0.0%2F&amp;rfr_id=info:sid/en.wikipedia.org:UTF-8"><span style="display: none;">&#160;</span></span>. <a class="external mw-magiclink-rfc" href="//tools.ietf.org/html/rfc3629">RFC 3629</a> also refers to UTF-8 as "UCS transformation format". Also commonly known as "Unicode Transformation Format".</span></li>
<li id="cite_note-1"><span class="mw-cite-backlink"><b><a href="#cite_ref-1">^</a></b></span> <span class="reference-text"><span class="citation web">Mark Davis (28 January 2010). <a rel="nofollow" class="external text" href="http://googleblog.blogspot.com/2010/01/unicode-nearing-50-of-web.html">"Unicode nearing 50% of the web"</a>. <i>Official Google Blog</i>. <a href="/wiki/Google" title="Google">Google</a><span class="printonly">. <a rel="nofollow" class="external free" href="http://googleblog.blogspot.com/2010/01/unicode-nearing-50-of-web.html">http://googleblog.blogspot.com/2010/01/unicode-nearing-50-of-web.html</a></span><span class="reference-accessdate">. Retrieved 5 December 2010</span>.</span><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=Unicode+nearing+50%25+of+the+web&amp;rft.atitle=Official+Google+Blog&amp;rft.aulast=Mark+Davis&amp;rft.au=Mark+Davis&amp;rft.date=28+January+2010&amp;rft.pub=%5B%5BGoogle%5D%5D&amp;rft_id=http%3A%2F%2Fgoogleblog.blogspot.com%2F2010%2F01%2Funicode-nearing-50-of-web.html&amp;rfr_id=info:sid/en.wikipedia.org:UTF-8"><span style="display: none;">&#160;</span></span></span></li>
<li id="cite_note-BuiltWith-2"><span class="mw-cite-backlink"><b><a href="#cite_ref-BuiltWith_2-0">^</a></b></span> <span class="reference-text"><span class="citation web"><a rel="nofollow" class="external text" href="http://trends.builtwith.com/encoding/UTF-8">"UTF-8 Usage Statistics"</a>. BuiltWith<span class="printonly">. <a rel="nofollow" class="external free" href="http://trends.builtwith.com/encoding/UTF-8">http://trends.builtwith.com/encoding/UTF-8</a></span><span class="reference-accessdate">. Retrieved 2011-03-28</span>.</span><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=UTF-8+Usage+Statistics&amp;rft.atitle=&amp;rft.pub=BuiltWith&amp;rft_id=http%3A%2F%2Ftrends.builtwith.com%2Fencoding%2FUTF-8&amp;rfr_id=info:sid/en.wikipedia.org:UTF-8"><span style="display: none;">&#160;</span></span></span></li>
<li id="cite_note-W3Techs-3"><span class="mw-cite-backlink"><b><a href="#cite_ref-W3Techs_3-0">^</a></b></span> <span class="reference-text"><span class="citation web"><a rel="nofollow" class="external text" href="http://w3techs.com/technologies/overview/character_encoding/all">"Usage of character encodings for websites"</a>. W3Techs<span class="printonly">. <a rel="nofollow" class="external free" href="http://w3techs.com/technologies/overview/character_encoding/all">http://w3techs.com/technologies/overview/character_encoding/all</a></span><span class="reference-accessdate">. Retrieved 2010-03-30</span>.</span><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=Usage+of+character+encodings+for+websites&amp;rft.atitle=&amp;rft.pub=W3Techs&amp;rft_id=http%3A%2F%2Fw3techs.com%2Ftechnologies%2Foverview%2Fcharacter_encoding%2Fall&amp;rfr_id=info:sid/en.wikipedia.org:UTF-8"><span style="display: none;">&#160;</span></span></span></li>
<li id="cite_note-rfc2277-4"><span class="mw-cite-backlink"><b><a href="#cite_ref-rfc2277_4-0">^</a></b></span> <span class="reference-text"><span class="citation Journal">Alvestrand, H. (1998). "IETF Policy on Character Sets and Languages". <i><a class="external mw-magiclink-rfc" href="//tools.ietf.org/html/rfc2277">RFC 2277</a></i>. <a href="/wiki/Internet_Engineering_Task_Force" title="Internet Engineering Task Force">Internet Engineering Task Force</a>.</span><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=IETF+Policy+on+Character+Sets+and+Languages&amp;rft.atitle=RFC+2277&amp;rft.aulast=Alvestrand&amp;rft.aufirst=H.&amp;rft.au=Alvestrand%2C%26%2332%3BH.&amp;rft.date=1998&amp;rft.pub=%5B%5BInternet+Engineering+Task+Force%5D%5D&amp;rfr_id=info:sid/en.wikipedia.org:UTF-8"><span style="display: none;">&#160;</span></span></span></li>
<li id="cite_note-IMC-5"><span class="mw-cite-backlink"><b><a href="#cite_ref-IMC_5-0">^</a></b></span> <span class="reference-text"><span class="citation web"><a rel="nofollow" class="external text" href="http://www.imc.org/mail-i18n.html">"Using International Characters in Internet Mail"</a>. Internet Mail Consortium. August 1, 1998<span class="printonly">. <a rel="nofollow" class="external free" href="http://www.imc.org/mail-i18n.html">http://www.imc.org/mail-i18n.html</a></span><span class="reference-accessdate">. Retrieved 2007-11-08</span>.</span><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=Using+International+Characters+in+Internet+Mail&amp;rft.atitle=&amp;rft.date=August+1%2C+1998&amp;rft.pub=Internet+Mail+Consortium&amp;rft_id=http%3A%2F%2Fwww.imc.org%2Fmail-i18n.html&amp;rfr_id=info:sid/en.wikipedia.org:UTF-8"><span style="display: none;">&#160;</span></span></span></li>
<li id="cite_note-6"><span class="mw-cite-backlink"><b><a href="#cite_ref-6">^</a></b></span> <span class="reference-text">Not all of the 1,112,064 possible code points have been assigned characters; many are reserved for future use, and some are reserved for private use, while still others are specified as permanently undefined.</span></li>
<li id="cite_note-7"><span class="mw-cite-backlink"><b><a href="#cite_ref-7">^</a></b></span> <span class="reference-text">More precisely, the number of bytes used to encode a character at a given code point is a <a href="/wiki/Monotonic_function" title="Monotonic function">monotonically increasing function</a> of the numerical value of the code point.</span></li>
<li id="cite_note-8"><span class="mw-cite-backlink"><b><a href="#cite_ref-8">^</a></b></span> <span class="reference-text"><span class="citation web">Internet Assigned Numbers Authority (4 November 2010). <a rel="nofollow" class="external text" href="http://www.iana.org/assignments/character-sets">"CHARACTER SETS"</a>. IANA<span class="printonly">. <a rel="nofollow" class="external free" href="http://www.iana.org/assignments/character-sets">http://www.iana.org/assignments/character-sets</a></span><span class="reference-accessdate">. Retrieved 5 December 2010</span>.</span><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=CHARACTER+SETS&amp;rft.atitle=&amp;rft.aulast=Internet+Assigned+Numbers+Authority&amp;rft.au=Internet+Assigned+Numbers+Authority&amp;rft.date=4+November+2010&amp;rft.pub=IANA&amp;rft_id=http%3A%2F%2Fwww.iana.org%2Fassignments%2Fcharacter-sets&amp;rfr_id=info:sid/en.wikipedia.org:UTF-8"><span style="display: none;">&#160;</span></span></span></li>
<li id="cite_note-9"><span class="mw-cite-backlink"><b><a href="#cite_ref-9">^</a></b></span> <span class="reference-text"><span class="citation web">Pike, Rob (2003-04-03). <a rel="nofollow" class="external text" href="http://www.cl.cam.ac.uk/~mgk25/ucs/utf-8-history.txt">"UTF-8 history"</a><span class="printonly">. <a rel="nofollow" class="external free" href="http://www.cl.cam.ac.uk/~mgk25/ucs/utf-8-history.txt">http://www.cl.cam.ac.uk/~mgk25/ucs/utf-8-history.txt</a></span>.</span><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=UTF-8+history&amp;rft.atitle=&amp;rft.aulast=Pike&amp;rft.aufirst=Rob&amp;rft.au=Pike%2C%26%2332%3BRob&amp;rft.date=2003-04-03&amp;rft_id=http%3A%2F%2Fwww.cl.cam.ac.uk%2F%7Emgk25%2Fucs%2Futf-8-history.txt&amp;rfr_id=info:sid/en.wikipedia.org:UTF-8"><span style="display: none;">&#160;</span></span></span></li>
<li id="cite_note-MS00-078-10"><span class="mw-cite-backlink"><b><a href="#cite_ref-MS00-078_10-0">^</a></b></span> <span class="reference-text"><span class="citation web">Marin, Marvin (2000-10-17). <a rel="nofollow" class="external text" href="http://www.sans.org/resources/malwarefaq/wnt-unicode.php">"Web Server Folder Traversal MS00-078"</a><span class="printonly">. <a rel="nofollow" class="external free" href="http://www.sans.org/resources/malwarefaq/wnt-unicode.php">http://www.sans.org/resources/malwarefaq/wnt-unicode.php</a></span>.</span><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=Web+Server+Folder+Traversal+MS00-078&amp;rft.atitle=&amp;rft.aulast=Marin&amp;rft.aufirst=Marvin&amp;rft.au=Marin%2C%26%2332%3BMarvin&amp;rft.date=2000-10-17&amp;rft_id=http%3A%2F%2Fwww.sans.org%2Fresources%2Fmalwarefaq%2Fwnt-unicode.php&amp;rfr_id=info:sid/en.wikipedia.org:UTF-8"><span style="display: none;">&#160;</span></span></span></li>
<li id="cite_note-CVE-2008-2938-11"><span class="mw-cite-backlink"><b><a href="#cite_ref-CVE-2008-2938_11-0">^</a></b></span> <span class="reference-text"><span class="citation web"><a rel="nofollow" class="external text" href="http://web.nvd.nist.gov/view/vuln/detail?vulnId=CVE-2008-2938">"National Vulnerability Database - Summary for CVE-2008-2938"</a><span class="printonly">. <a rel="nofollow" class="external free" href="http://web.nvd.nist.gov/view/vuln/detail?vulnId=CVE-2008-2938">http://web.nvd.nist.gov/view/vuln/detail?vulnId=CVE-2008-2938</a></span>.</span><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=National+Vulnerability+Database+-+Summary+for+CVE-2008-2938&amp;rft.atitle=&amp;rft_id=http%3A%2F%2Fweb.nvd.nist.gov%2Fview%2Fvuln%2Fdetail%3FvulnId%3DCVE-2008-2938&amp;rfr_id=info:sid/en.wikipedia.org:UTF-8"><span style="display: none;">&#160;</span></span></span></li>
<li id="cite_note-rfc3629-12"><span class="mw-cite-backlink"><b><a href="#cite_ref-rfc3629_12-0">^</a></b></span> <span class="reference-text"><span class="citation Journal">Yergeau, F. (2003). "UTF-8, a transformation format of ISO 10646". <i><a class="external mw-magiclink-rfc" href="//tools.ietf.org/html/rfc3629">RFC 3629</a></i>. <a href="/wiki/Internet_Engineering_Task_Force" title="Internet Engineering Task Force">Internet Engineering Task Force</a></span><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=UTF-8%2C+a+transformation+format+of+ISO+10646&amp;rft.atitle=RFC+3629&amp;rft.aulast=Yergeau&amp;rft.aufirst=F.&amp;rft.au=Yergeau%2C%26%2332%3BF.&amp;rft.date=2003&amp;rft.pub=%5B%5BInternet+Engineering+Task+Force%5D%5D&amp;rfr_id=info:sid/en.wikipedia.org:UTF-8"><span style="display: none;">&#160;</span></span></span></li>
<li id="cite_note-13"><span class="mw-cite-backlink"><b><a href="#cite_ref-13">^</a></b></span> <span class="reference-text">Examples: <a rel="nofollow" class="external text" href="http://gwt-lzma.googlecode.com/svn-history/r8/trunk/publish/javadoc/org/dellroad/lzma/client/UTF8.html">UTF8 (Java Class Library API)</a> or <a rel="nofollow" class="external text" href="http://download.oracle.com/javase/1.4.2/docs/api/java/nio/charset/CharsetDecoder.html#decode(java.nio.ByteBuffer)">java.nio.charset.CharsetDecoder.decode</a></span></li>
<li id="cite_note-PEP383-14"><span class="mw-cite-backlink"><b><a href="#cite_ref-PEP383_14-0">^</a></b></span> <span class="reference-text"><span class="citation web"><a rel="nofollow" class="external text" href="http://www.python.org/dev/peps/pep-0383/">"Non-decodable Bytes in System Character Interfaces"</a><span class="printonly">. <a rel="nofollow" class="external free" href="http://www.python.org/dev/peps/pep-0383/">http://www.python.org/dev/peps/pep-0383/</a></span>.</span><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=Non-decodable+Bytes+in+System+Character+Interfaces&amp;rft.atitle=&amp;rft_id=http%3A%2F%2Fwww.python.org%2Fdev%2Fpeps%2Fpep-0383%2F&amp;rfr_id=info:sid/en.wikipedia.org:UTF-8"><span style="display: none;">&#160;</span></span></span></li>
<li id="cite_note-15"><span class="mw-cite-backlink"><b><a href="#cite_ref-15">^</a></b></span> <span class="reference-text"><a rel="nofollow" class="external text" href="http://www.w3.org/International/O-HTTP-charset">W3C: Setting the HTTP charset parameter</a> notes that the <a href="/wiki/Internet_Assigned_Numbers_Authority" title="Internet Assigned Numbers Authority">IANA</a> list is used for HTTP</span></li>
<li id="cite_note-16"><span class="mw-cite-backlink"><b><a href="#cite_ref-16">^</a></b></span> <span class="reference-text"><a href="/wiki/Internet_Assigned_Numbers_Authority" title="Internet Assigned Numbers Authority">Internet Assigned Numbers Authority</a> <a rel="nofollow" class="external text" href="http://www.iana.org/assignments/character-sets">Character Sets</a></span></li>
<li id="cite_note-17"><span class="mw-cite-backlink"><b><a href="#cite_ref-17">^</a></b></span> <span class="reference-text"><a rel="nofollow" class="external text" href="http://www.ietf.org/rfc/rfc3629.txt">RFC 3629 UTF-8</a> see chapter 8. MIME registration, first paragraph</span></li>
<li id="cite_note-18"><span class="mw-cite-backlink"><b><a href="#cite_ref-18">^</a></b></span> <span class="reference-text"><span class="citation web"><a rel="nofollow" class="external text" href="http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#modified-utf-8">"Java SE 6 documentation for Interface java.io.DataInput, subsection on Modified UTF-8"</a>. <a href="/wiki/Sun_Microsystems" title="Sun Microsystems">Sun Microsystems</a>. 2008<span class="printonly">. <a rel="nofollow" class="external free" href="http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#modified-utf-8">http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#modified-utf-8</a></span><span class="reference-accessdate">. Retrieved 2009-05-22</span>.</span><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=Java+SE+6+documentation+for+Interface+java.io.DataInput%2C+subsection+on+Modified+UTF-8&amp;rft.atitle=&amp;rft.date=2008&amp;rft.pub=%5B%5BSun+Microsystems%5D%5D&amp;rft_id=http%3A%2F%2Fjava.sun.com%2Fjavase%2F6%2Fdocs%2Fapi%2Fjava%2Fio%2FDataInput.html%23modified-utf-8&amp;rfr_id=info:sid/en.wikipedia.org:UTF-8"><span style="display: none;">&#160;</span></span></span></li>
<li id="cite_note-19"><span class="mw-cite-backlink"><b><a href="#cite_ref-19">^</a></b></span> <span class="reference-text">"[...] the overlong UTF-8 sequence C0 80 [...]", "[...] the illegal two-octet sequence C0 80 [...]"<span class="citation web"><a rel="nofollow" class="external text" href="http://www.apps.ietf.org/rfc/rfc3629.html#page-5">"Request for Comments 3629: "UTF-8, a transformation format of ISO 10646""</a>. 2003<span class="printonly">. <a rel="nofollow" class="external free" href="http://www.apps.ietf.org/rfc/rfc3629.html#page-5">http://www.apps.ietf.org/rfc/rfc3629.html#page-5</a></span><span class="reference-accessdate">. Retrieved 2009-05-22</span>.</span><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=Request+for+Comments+3629%3A+%22UTF-8%2C+a+transformation+format+of+ISO+10646%22&amp;rft.atitle=&amp;rft.date=2003&amp;rft_id=http%3A%2F%2Fwww.apps.ietf.org%2Frfc%2Frfc3629.html%23page-5&amp;rfr_id=info:sid/en.wikipedia.org:UTF-8"><span style="display: none;">&#160;</span></span></span></li>
<li id="cite_note-20"><span class="mw-cite-backlink"><b><a href="#cite_ref-20">^</a></b></span> <span class="reference-text">"[...] Java virtual machine UTF-8 strings never have embedded nulls."<span class="citation web"><a rel="nofollow" class="external text" href="http://java.sun.com/docs/books/jvms/second_edition/html/ClassFile.doc.html#7963">"The Java Virtual Machine Specification, 2nd Edition, section 4.4.7: "The CONSTANT_Utf8_info Structure""</a>. <a href="/wiki/Sun_Microsystems" title="Sun Microsystems">Sun Microsystems</a>. 1999<span class="printonly">. <a rel="nofollow" class="external free" href="http://java.sun.com/docs/books/jvms/second_edition/html/ClassFile.doc.html#7963">http://java.sun.com/docs/books/jvms/second_edition/html/ClassFile.doc.html#7963</a></span><span class="reference-accessdate">. Retrieved 2009-05-24</span>.</span><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=The+Java+Virtual+Machine+Specification%2C+2nd+Edition%2C+section+4.4.7%3A+%22The+CONSTANT_Utf8_info+Structure%22&amp;rft.atitle=&amp;rft.date=1999&amp;rft.pub=%5B%5BSun+Microsystems%5D%5D&amp;rft_id=http%3A%2F%2Fjava.sun.com%2Fdocs%2Fbooks%2Fjvms%2Fsecond_edition%2Fhtml%2FClassFile.doc.html%237963&amp;rfr_id=info:sid/en.wikipedia.org:UTF-8"><span style="display: none;">&#160;</span></span></span></li>
<li id="cite_note-21"><span class="mw-cite-backlink"><b><a href="#cite_ref-21">^</a></b></span> <span class="reference-text">"[...] encoded in modified UTF-8."<span class="citation web"><a rel="nofollow" class="external text" href="http://java.sun.com/javase/6/docs/platform/serialization/spec/protocol.html#8299">"Java Object Serialization Specification, chapter 6: Object Serialization Stream Protocol, section 2: Stream Elements"</a>. <a href="/wiki/Sun_Microsystems" title="Sun Microsystems">Sun Microsystems</a>. 2005<span class="printonly">. <a rel="nofollow" class="external free" href="http://java.sun.com/javase/6/docs/platform/serialization/spec/protocol.html#8299">http://java.sun.com/javase/6/docs/platform/serialization/spec/protocol.html#8299</a></span><span class="reference-accessdate">. Retrieved 2009-05-22</span>.</span><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=Java+Object+Serialization+Specification%2C+chapter+6%3A+Object+Serialization+Stream+Protocol%2C+section+2%3A+Stream+Elements&amp;rft.atitle=&amp;rft.date=2005&amp;rft.pub=%5B%5BSun+Microsystems%5D%5D&amp;rft_id=http%3A%2F%2Fjava.sun.com%2Fjavase%2F6%2Fdocs%2Fplatform%2Fserialization%2Fspec%2Fprotocol.html%238299&amp;rfr_id=info:sid/en.wikipedia.org:UTF-8"><span style="display: none;">&#160;</span></span></span></li>
<li id="cite_note-22"><span class="mw-cite-backlink"><b><a href="#cite_ref-22">^</a></b></span> <span class="reference-text">"The JNI uses modified UTF-8 strings to represent various string types."<span class="citation web"><a rel="nofollow" class="external text" href="http://java.sun.com/j2se/1.5.0/docs/guide/jni/spec/types.html#wp16542">"Java Native Interface Specification, chapter 3: JNI Types and Data Structures, section: Modified UTF-8 Strings"</a>. <a href="/wiki/Sun_Microsystems" title="Sun Microsystems">Sun Microsystems</a>. 2003<span class="printonly">. <a rel="nofollow" class="external free" href="http://java.sun.com/j2se/1.5.0/docs/guide/jni/spec/types.html#wp16542">http://java.sun.com/j2se/1.5.0/docs/guide/jni/spec/types.html#wp16542</a></span><span class="reference-accessdate">. Retrieved 2009-05-22</span>.</span><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=Java+Native+Interface+Specification%2C+chapter+3%3A+JNI+Types+and+Data+Structures%2C+section%3A+Modified+UTF-8+Strings&amp;rft.atitle=&amp;rft.date=2003&amp;rft.pub=%5B%5BSun+Microsystems%5D%5D&amp;rft_id=http%3A%2F%2Fjava.sun.com%2Fj2se%2F1.5.0%2Fdocs%2Fguide%2Fjni%2Fspec%2Ftypes.html%23wp16542&amp;rfr_id=info:sid/en.wikipedia.org:UTF-8"><span style="display: none;">&#160;</span></span></span></li>
<li id="cite_note-23"><span class="mw-cite-backlink"><b><a href="#cite_ref-23">^</a></b></span> <span class="reference-text">"[...] differences between this format and the "standard" UTF-8 format."<span class="citation web"><a rel="nofollow" class="external text" href="http://java.sun.com/docs/books/jvms/second_edition/html/ClassFile.doc.html#7963">"The Java Virtual Machine Specification, 2nd Edition, section 4.4.7: "The CONSTANT_Utf8_info Structure""</a>. <a href="/wiki/Sun_Microsystems" title="Sun Microsystems">Sun Microsystems</a>. 1999<span class="printonly">. <a rel="nofollow" class="external free" href="http://java.sun.com/docs/books/jvms/second_edition/html/ClassFile.doc.html#7963">http://java.sun.com/docs/books/jvms/second_edition/html/ClassFile.doc.html#7963</a></span><span class="reference-accessdate">. Retrieved 2009-05-23</span>.</span><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=The+Java+Virtual+Machine+Specification%2C+2nd+Edition%2C+section+4.4.7%3A+%22The+CONSTANT_Utf8_info+Structure%22&amp;rft.atitle=&amp;rft.date=1999&amp;rft.pub=%5B%5BSun+Microsystems%5D%5D&amp;rft_id=http%3A%2F%2Fjava.sun.com%2Fdocs%2Fbooks%2Fjvms%2Fsecond_edition%2Fhtml%2FClassFile.doc.html%237963&amp;rfr_id=info:sid/en.wikipedia.org:UTF-8"><span style="display: none;">&#160;</span></span></span></li>
<li id="cite_note-24"><span class="mw-cite-backlink"><b><a href="#cite_ref-24">^</a></b></span> <span class="reference-text">"In orthodox UTF-8, a NUL byte(\x00) is represented by a NUL byte. [...] But [...] we [...] want NUL bytes inside [...] strings [...]"<span class="citation web"><a rel="nofollow" class="external text" href="http://wiki.tcl.tk/_/revision?N=1211&amp;V=6">"Tcler's Wiki: UTF-8 bit by bit (Revision 6)"</a>. 2009-04-25<span class="printonly">. <a rel="nofollow" class="external free" href="http://wiki.tcl.tk/_/revision?N=1211&amp;V=6">http://wiki.tcl.tk/_/revision?N=1211&amp;V=6</a></span><span class="reference-accessdate">. Retrieved 2009-05-22</span>.</span><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=Tcler%27s+Wiki%3A+UTF-8+bit+by+bit+%28Revision+6%29&amp;rft.atitle=&amp;rft.date=2009-04-25&amp;rft_id=http%3A%2F%2Fwiki.tcl.tk%2F_%2Frevision%3FN%3D1211%26V%3D6&amp;rfr_id=info:sid/en.wikipedia.org:UTF-8"><span style="display: none;">&#160;</span></span></span></li>
<li id="cite_note-25"><span class="mw-cite-backlink"><b><a href="#cite_ref-25">^</a></b></span> <span class="reference-text"><a rel="nofollow" class="external text" href="http://www.ling.upenn.edu/courses/Spring_2003/ling538/Lecnotes/Unicode.html">Computational Methods in Linguistic Research, 2004</a></span></li>
<li id="cite_note-26"><span class="mw-cite-backlink"><b><a href="#cite_ref-26">^</a></b></span> <span class="reference-text"><a rel="nofollow" class="external text" href="http://www.unicode.org/versions/Unicode6.0.0/ch02.pdf">The Unicode Standard - Chapter 2</a>, see chapter 2.6 page 30 bottom.</span></li>
<li id="cite_note-27"><span class="mw-cite-backlink"><b><a href="#cite_ref-27">^</a></b></span> <span class="reference-text"><a rel="nofollow" class="external text" href="http://www.w3.org/TR/REC-xml/#charencoding">W3.org</a></span></li>
<li id="cite_note-28"><span class="mw-cite-backlink"><b><a href="#cite_ref-28">^</a></b></span> <span class="reference-text"><a rel="nofollow" class="external text" href="http://www.w3.org/International/questions/qa-forms-utf-8">W3 FAQ: Multilingual Forms</a>: a Perl regular expression to validate a UTF-8 string)</span></li>
<li id="cite_note-29"><span class="mw-cite-backlink"><b><a href="#cite_ref-29">^</a></b></span> <span class="reference-text">There are <span class="texhtml">256×256 128×128</span> not-pure-ASCII two-byte sequences, and of those, only 1920 encode valid UTF-8 characters (the range U+0080 to U+07FF), so the proportion of valid not-pure-ASCII two-byte sequences is 3.9%. Note that this assumes that control characters pass as ASCII; without the control characters, the percentage proportions drop somewhat).</span></li>
<li id="cite_note-30"><span class="mw-cite-backlink"><b><a href="#cite_ref-30">^</a></b></span> <span class="reference-text"><a rel="nofollow" class="external text" href="http://tools.ietf.org/html/rfc3629#section-10">Tools.ietf.org</a></span></li>
<li id="cite_note-31"><span class="mw-cite-backlink"><b><a href="#cite_ref-31">^</a></b></span> <span class="reference-text"><span class="citation web"><a rel="nofollow" class="external text" href="http://stackoverflow.com/questions/1049947/should-utf-16-be-considered-harmful">"Should UTF-16 be considered harmful?"</a>. Stackoverflow.com<span class="printonly">. <a rel="nofollow" class="external free" href="http://stackoverflow.com/questions/1049947/should-utf-16-be-considered-harmful">http://stackoverflow.com/questions/1049947/should-utf-16-be-considered-harmful</a></span><span class="reference-accessdate">. Retrieved 2010-09-13</span>.</span><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.btitle=Should+UTF-16+be+considered+harmful%3F&amp;rft.atitle=&amp;rft.pub=Stackoverflow.com&amp;rft_id=http%3A%2F%2Fstackoverflow.com%2Fquestions%2F1049947%2Fshould-utf-16-be-considered-harmful&amp;rfr_id=info:sid/en.wikipedia.org:UTF-8"><span style="display: none;">&#160;</span></span></span></li>
<li id="cite_note-32"><span class="mw-cite-backlink"><b><a href="#cite_ref-32">^</a></b></span> <span class="reference-text">Although the difference may not be great: the 2010-11-22 version of <a href="//hi.wikipedia.org/wiki/%E0%A4%AF%E0%A5%82%E0%A4%A8%E0%A4%BF%E0%A4%95%E0%A5%8B%E0%A4%A1" class="extiw" title="hi:यूनिकोड">hi:यूनिकोड</a> (Unicode in Hindi), when the pure text was pasted to Notepad, generated 19 KB when saved as UTF-16 and 22 KB when saved as UTF-8.</span></li>
<li id="cite_note-33"><span class="mw-cite-backlink"><b><a href="#cite_ref-33">^</a></b></span> <span class="reference-text">The 2010-10-27 version of <a href="//ja.wikipedia.org/wiki/UTF-8" class="extiw" title="ja:UTF-8">ja:UTF-8</a> generated 169 KB when converted with Notepad to UTF-16, and only 101 KB when converted back to UTF-8. The 2010-11-22 version of <a href="//hi.wikipedia.org/wiki/%E0%A4%AF%E0%A5%82%E0%A4%A8%E0%A4%BF%E0%A4%95%E0%A5%8B%E0%A4%A1" class="extiw" title="hi:यूनिकोड">hi:यूनिकोड</a> (Unicode in Hindi) required 119 KB in UTF-16 and 76 KB in UTF-8.</span></li>
</ol>
</div>
<h2><span class="editsection">[<a href="/w/index.php?title=UTF-8&amp;action=edit&amp;section=29" title="Edit section: External links">edit</a>]</span> <span class="mw-headline" id="External_links">External links</span></h2>
<p>There are several current definitions of UTF-8 in various standards documents:</p>
<ul>
<li><a class="external mw-magiclink-rfc" href="//tools.ietf.org/html/rfc3629">RFC 3629</a> / STD 63 (2003), which establishes UTF-8 as a standard Internet protocol element</li>
<li><i>The Unicode Standard, Version 6.0</i>, <a rel="nofollow" class="external text" href="http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf">§3.9 D92, §3.10 D95</a> (2011)</li>
<li>ISO/IEC 10646:2003 Annex D (2003)</li>
</ul>
<p>They supersede the definitions given in the following obsolete works:</p>
<ul>
<li>ISO/IEC 10646-1:1993 Amendment 2 / Annex R (1996)</li>
<li><i>The Unicode Standard, Version 5.0</i>, §3.9 D92, §3.10 D95 (2007)</li>
<li><i>The Unicode Standard, Version 4.0</i>, §3.9§3.10 (2003)</li>
<li><i>The Unicode Standard, Version 2.0</i>, Appendix A (1996)</li>
<li><a class="external mw-magiclink-rfc" href="//tools.ietf.org/html/rfc2044">RFC 2044</a> (1996)</li>
<li><a class="external mw-magiclink-rfc" href="//tools.ietf.org/html/rfc2279">RFC 2279</a> (1998)</li>
<li><i>The Unicode Standard, Version 3.0</i>, §2.3 (2000) plus Corrigendum #1&#160;: UTF-8 Shortest Form (2000)</li>
<li><i>Unicode Standard Annex #27: Unicode 3.1</i> (2001)</li>
</ul>
<p>They are all the same in their general mechanics, with the main differences being on issues such as allowed range of code point values and safe handling of invalid input.</p>
<ul>
<li><a rel="nofollow" class="external text" href="http://doc.cat-v.org/plan_9/4th_edition/papers/utf">Original UTF-8 paper</a> (<a rel="nofollow" class="external text" href="http://plan9.bell-labs.com/sys/doc/utf.pdf">or pdf</a>) for <a href="/wiki/Plan_9_from_Bell_Labs" title="Plan 9 from Bell Labs">Plan 9 from Bell Labs</a></li>
<li><a class="external mw-magiclink-rfc" href="//tools.ietf.org/html/rfc5198">RFC 5198</a> defines UTF-8 <a href="/wiki/Unicode_equivalence" title="Unicode equivalence">NFC</a> for Network Interchange</li>
<li>UTF-8 test pages by <a rel="nofollow" class="external text" href="http://www.user.uni-hannover.de/nhtcapri/multilingual1.html">Andreas Prilop</a>, <a rel="nofollow" class="external text" href="http://titus.uni-frankfurt.de/indexe.htm?/unicode/unitest.htm">Jost Gippert</a> and the <a rel="nofollow" class="external text" href="http://www.w3.org/2001/06/utf-8-test/UTF-8-demo.html">World Wide Web Consortium</a></li>
<li><a rel="nofollow" class="external text" href="http://dotancohen.com/howto/email-utf8.html">How to configure e-mail clients to send UTF-8 text</a></li>
<li>Unix/Linux: <a rel="nofollow" class="external text" href="http://www.cl.cam.ac.uk/~mgk25/unicode.html">UTF-8/Unicode FAQ</a>, <a rel="nofollow" class="external text" href="http://www.linux.org/docs/ldp/howto/Unicode-HOWTO.html">Linux Unicode HOWTO</a>, <a rel="nofollow" class="external text" href="http://www.gentoo.org/doc/en/utf-8.xml">UTF-8 and Gentoo</a></li>
<li><a rel="nofollow" class="external text" href="http://www.utf8-chartable.de/">The Unicode/UTF-8-character table</a> displays UTF-8 in a variety of formats (with Unicode and HTML encoding information)</li>
<li><a rel="nofollow" class="external text" href="http://www.alanwood.net/unicode/browsers.html">Unicode and Multilingual Web Browsers</a> from Alan Wood's Unicode Resources describes support and additional configuration of Unicode/UTF-8 in modern browsers</li>
<li><a rel="nofollow" class="external text" href="http://jspwiki.org/wiki/JSPWikiBrowserCompatibility">JSP Wiki Browser Compatibility page</a> details specific problems with UTF-8 in older browsers</li>
<li><a rel="nofollow" class="external text" href="http://tlt.psu.edu/suggestions/international/bylanguage/math.html#browsers">Mathematical Symbols in Unicode</a></li>
<li><a rel="nofollow" class="external text" href="http://demo.icu-project.org/icu-bin/convexp?conv=UTF-8">Graphical View of UTF-8 in ICU's Converter Explorer</a></li>
</ul>
<table cellspacing="0" class="navbox" style="border-spacing:0;;">
<tr>
<td style="padding:2px;">
<table cellspacing="0" class="nowraplinks collapsible collapsed navbox-inner" style="border-spacing:0;background:transparent;color:inherit;;">
<tr>
<th scope="col" style=";" class="navbox-title" colspan="2">
<div class="noprint plainlinks hlist navbar mini" style="">
<ul>
<li class="nv-view"><a href="/wiki/Template:Unicode_navigation" title="Template:Unicode navigation"><span title="View this template" style=";;background:none transparent;border:none;">v</span></a></li>
<li class="nv-talk"><a href="/wiki/Template_talk:Unicode_navigation" title="Template talk:Unicode navigation"><span title="Discuss this template" style=";;background:none transparent;border:none;">t</span></a></li>
<li class="nv-edit"><a class="external text" href="//en.wikipedia.org/w/index.php?title=Template:Unicode_navigation&amp;action=edit"><span title="Edit this template" style=";;background:none transparent;border:none;">e</span></a></li>
</ul>
</div>
<div class="" style="font-size:110%;"><a href="/wiki/Unicode" title="Unicode">Unicode</a></div>
</th>
</tr>
<tr style="height:2px;">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;">Unicode</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-odd hlist">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/Unicode_Consortium" title="Unicode Consortium">Unicode Consortium</a></li>
<li><a href="/wiki/Universal_Character_Set" title="Universal Character Set">ISO/IEC 10646 (Universal Character Set)</a></li>
<li><a href="/wiki/Unicode#Versions" title="Unicode">Versions</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;"><a href="/wiki/Code_point" title="Code point">Code points</a></th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-even hlist">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/Unicode_block" title="Unicode block">Block</a></li>
<li><a href="/wiki/Category:Unicode_charts" title="Category:Unicode charts">Character charts</a></li>
<li><a href="/wiki/Unicode_character_property" title="Unicode character property">Character property</a></li>
<li><a href="/wiki/Mapping_of_Unicode_characters" title="Mapping of Unicode characters">Mapping characters</a></li>
<li><a href="/wiki/Plane_(Unicode)" title="Plane (Unicode)">Plane</a></li>
<li><a href="/wiki/Private_Use_(Unicode)" title="Private Use (Unicode)">Private Use Area</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;">Characters</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-odd hlist">
<div style="padding:0em 0.25em"></div>
<table cellspacing="0" class="nowraplinks navbox-subgroup" style="border-spacing:0;;;;">
<tr>
<th scope="row" class="navbox-group" style=";width:12em;padding-left:0em;padding-right:0em;;">
<div style="padding:0em 0.75em;"><a href="/wiki/Mapping_of_Unicode_characters#Special-purpose_characters" title="Mapping of Unicode characters">Special purpose</a></div>
</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;padding:0px;;;" class="navbox-list navbox-odd hlist">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/Byte_order_mark" title="Byte order mark">BOM</a></li>
<li><a href="/wiki/Combining_grapheme_joiner" title="Combining grapheme joiner">Combining grapheme joiner</a></li>
<li><a href="/wiki/Left-to-right_mark" title="Left-to-right mark">Left-to-right mark</a> / <a href="/wiki/Right-to-left_mark" title="Right-to-left mark">Right-to-left mark</a></li>
<li><a href="/wiki/Soft_hyphen" title="Soft hyphen">Soft hyphen</a></li>
<li><a href="/wiki/Zero-width_joiner" title="Zero-width joiner">Zero-width joiner</a></li>
<li><a href="/wiki/Zero-width_non-breaking_space" title="Zero-width non-breaking space">Zero-width non-breaking space</a></li>
<li><a href="/wiki/Zero-width_non-joiner" title="Zero-width non-joiner">Zero-width non-joiner</a></li>
<li><a href="/wiki/Zero-width_space" title="Zero-width space">Zero-width space</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";width:12em;padding-left:0em;padding-right:0em;;">
<div style="padding:0em 0.75em;">Lists</div>
</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;padding:0px;;;" class="navbox-list navbox-even hlist">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/CJK_Unified_Ideographs" title="CJK Unified Ideographs">CJK Unified Ideographs</a></li>
<li><a href="/wiki/Combining_character" title="Combining character">Combining character</a></li>
<li><a href="/wiki/Duplicate_characters_in_Unicode" title="Duplicate characters in Unicode">Duplicate characters</a></li>
<li><a href="/wiki/Numerals_in_Unicode" title="Numerals in Unicode">Numerals</a></li>
<li><a href="/wiki/Script_(Unicode)" title="Script (Unicode)">Scripts</a></li>
<li><a href="/wiki/Space_(punctuation)#Spaces_in_Unicode" title="Space (punctuation)">Spaces</a></li>
<li><a href="/wiki/Unicode_symbols" title="Unicode symbols">Symbols</a></li>
</ul>
</div>
</td>
</tr>
</table>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;">Processing</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-even hlist">
<div style="padding:0em 0.25em"></div>
<table cellspacing="0" class="nowraplinks navbox-subgroup" style="border-spacing:0;;;;">
<tr>
<th scope="row" class="navbox-group" style=";width:12em;padding-left:0em;padding-right:0em;;">
<div style="padding:0em 0.75em;">Algorithms</div>
</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;padding:0px;;;" class="navbox-list navbox-odd">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/Bi-directional_text" title="Bi-directional text">Bi-directional text</a></li>
<li><a href="/wiki/Unicode_collation_algorithm" title="Unicode collation algorithm">Collation</a>
<ul>
<li><a href="/wiki/ISO_14651" title="ISO 14651">ISO 14651</a></li>
</ul>
</li>
<li><a href="/wiki/Unicode_equivalence" title="Unicode equivalence">Equivalence</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";width:12em;padding-left:0em;padding-right:0em;;">
<div style="padding:0em 0.75em;"><a href="/wiki/Comparison_of_Unicode_encodings" title="Comparison of Unicode encodings">Comparison</a></div>
</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;padding:0px;;;" class="navbox-list navbox-even">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/Binary_Ordered_Compression_for_Unicode" title="Binary Ordered Compression for Unicode">BOCU-1</a></li>
<li><a href="/wiki/CESU-8" title="CESU-8">CESU-8</a></li>
<li><a href="/wiki/Punycode" title="Punycode">Punycode</a></li>
<li><a href="/wiki/Standard_Compression_Scheme_for_Unicode" title="Standard Compression Scheme for Unicode">SCSU</a></li>
<li><a href="/wiki/UTF-1" title="UTF-1">UTF-1</a></li>
<li><a href="/wiki/UTF-7" title="UTF-7">UTF-7</a></li>
<li><strong class="selflink">UTF-8</strong></li>
<li><a href="/wiki/UTF-9_and_UTF-18" title="UTF-9 and UTF-18">UTF-9/UTF-18</a></li>
<li><a href="/wiki/UTF-16" title="UTF-16">UTF-16/UCS-2</a></li>
<li><a href="/wiki/UTF-32" title="UTF-32">UTF-32/UCS-4</a></li>
<li><a href="/wiki/UTF-EBCDIC" title="UTF-EBCDIC">UTF-EBCDIC</a></li>
</ul>
</div>
</td>
</tr>
</table>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;">On pairs of<br />
code points</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-odd hlist">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/Combining_character" title="Combining character">Combining character</a></li>
<li><a href="/wiki/Unicode_compatibility_characters" title="Unicode compatibility characters">Compatibility characters</a></li>
<li><a href="/wiki/Duplicate_characters_in_Unicode" title="Duplicate characters in Unicode">Duplicate characters</a></li>
<li><a href="/wiki/Unicode_equivalence" title="Unicode equivalence">Equivalence</a></li>
<li><a href="/wiki/Homoglyph" title="Homoglyph">Homoglyph</a></li>
<li><a href="/wiki/Precomposed_character" title="Precomposed character">Precomposed character</a>
<ul>
<li><a href="/wiki/List_of_precomposed_Latin_characters_in_Unicode" title="List of precomposed Latin characters in Unicode">list</a></li>
</ul>
</li>
<li><a href="/wiki/Z-variant" title="Z-variant">Z-variant</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;">Usage</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-even hlist">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/Internationalized_domain_name" title="Internationalized domain name">Domain names (IDN)</a></li>
<li><a href="/wiki/Unicode_and_email" title="Unicode and email">Email</a></li>
<li><a href="/wiki/Unicode_font" title="Unicode font">Fonts</a></li>
<li><a href="/wiki/Unicode_and_HTML" title="Unicode and HTML">HTML</a>
<ul>
<li><a href="/wiki/List_of_XML_and_HTML_character_entity_references" title="List of XML and HTML character entity references">entity references</a></li>
<li><a href="/wiki/Numeric_character_reference" title="Numeric character reference">numeric reference</a></li>
</ul>
</li>
<li><a href="/wiki/Unicode_input" title="Unicode input">Input</a></li>
<li><a href="/wiki/Private_Character_Editor" title="Private Character Editor">Private Character Editor (MS)</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;">Related standards</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-odd hlist">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/Common_Locale_Data_Repository" title="Common Locale Data Repository">Common Locale Data Repository (CLDR)</a></li>
<li><a href="/wiki/GB_18030" title="GB 18030">GB 18030</a></li>
<li><a href="/wiki/Han_unification" title="Han unification">Han unification</a></li>
<li><a href="/wiki/ISO/IEC_8859" title="ISO/IEC 8859">ISO/IEC 8859</a></li>
<li><a href="/wiki/ISO_15924" title="ISO 15924">ISO 15924</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;">Related topics</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-even hlist">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/Unicode_anomaly" title="Unicode anomaly">Anomalies</a></li>
<li><a href="/wiki/ConScript_Unicode_Registry" title="ConScript Unicode Registry">ConScript Unicode Registry</a></li>
<li><a href="/wiki/Ideographic_Rapporteur_Group" title="Ideographic Rapporteur Group">Ideographic Rapporteur Group</a></li>
<li><a href="/wiki/International_Components_for_Unicode" title="International Components for Unicode">International Components for Unicode</a></li>
<li><a href="/wiki/Category:People_involved_with_Unicode" title="Category:People involved with Unicode">People involved with Unicode</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<td colspan="2" style="width:100%;padding:0px;;;" class="navbox-list navbox-odd hlist">
<div style="padding:0em 0.25em"></div>
<table cellspacing="0" class="nowraplinks collapsible collapsed navbox-subgroup" style="border-spacing:0;;;;">
<tr>
<th scope="col" style=";" class="navbox-title" colspan="2"><span style="float:left;width:6em;">&#160;</span>
<div class="" style="font-size:110%;"><a href="/wiki/Script_(Unicode)" title="Script (Unicode)">Scripts</a> and symbols in Unicode</div>
</th>
</tr>
<tr style="height:2px;">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;"><a href="/wiki/Script_(Unicode)#Common_and_inherited_scripts" title="Script (Unicode)">Common and<br />
inherited scripts</a></th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-odd hlist">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/Combining_character" title="Combining character">Combining marks</a></li>
<li><a href="/wiki/Diacritic" title="Diacritic">Diacritics</a></li>
<li><a href="/wiki/Punctuation" title="Punctuation">Punctuation</a></li>
<li><a href="/wiki/Space_(punctuation)#Spaces_in_Unicode" title="Space (punctuation)">Space</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;">Modern scripts</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-even hlist">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/Arabic_script" title="Arabic script">Arabic</a>
<ul>
<li><a href="/wiki/Arabic_diacritics" title="Arabic diacritics">diacritics</a></li>
</ul>
</li>
<li><a href="/wiki/Armenian_alphabet" title="Armenian alphabet">Armenian</a></li>
<li><a href="/wiki/Balinese_alphabet" title="Balinese alphabet">Balinese</a></li>
<li><a href="/wiki/Bamum_script" title="Bamum script">Bamum</a></li>
<li><a href="/wiki/Batak_alphabet" title="Batak alphabet">Batak</a></li>
<li><a href="/wiki/Bengali_alphabet" title="Bengali alphabet">Bengali</a></li>
<li><a href="/wiki/Bopomofo" title="Bopomofo">Bopomofo</a></li>
<li><a href="/wiki/Braille" title="Braille">Braille</a></li>
<li><a href="/wiki/Buhid_alphabet" title="Buhid alphabet">Buhid</a></li>
<li><a href="/wiki/Burmese_alphabet" title="Burmese alphabet">Burmese</a></li>
<li><a href="/wiki/Canadian_Aboriginal_syllabics" title="Canadian Aboriginal syllabics">Canadian Aboriginal</a></li>
<li><a href="/wiki/Chakma_alphabet" title="Chakma alphabet">Chakma</a></li>
<li><a href="/wiki/Cham_alphabet" title="Cham alphabet">Cham</a></li>
<li><a href="/wiki/Cherokee_syllabary" title="Cherokee syllabary">Cherokee</a></li>
<li><a href="/wiki/CJK_Unified_Ideographs" title="CJK Unified Ideographs">CJK Unified Ideographs (Han)</a></li>
<li><a href="/wiki/Cyrillic_script" title="Cyrillic script">Cyrillic</a></li>
<li><a href="/wiki/Deseret_alphabet" title="Deseret alphabet">Deseret</a></li>
<li><a href="/wiki/Devanagari" title="Devanagari">Devanagari</a></li>
<li><a href="/wiki/Ge%27ez_script" title="Ge'ez script">Ge'ez</a></li>
<li><a href="/wiki/Georgian_alphabet" title="Georgian alphabet">Georgian</a></li>
<li><a href="/wiki/Greek_alphabet" title="Greek alphabet">Greek</a></li>
<li><a href="/wiki/Gujarati_alphabet" title="Gujarati alphabet">Gujarati</a></li>
<li><a href="/wiki/Gurmukh%C4%AB_alphabet" title="Gurmukhī alphabet">Gurmukhī</a></li>
<li><a href="/wiki/Han_tu" title="Han tu">Han tu</a></li>
<li><a href="/wiki/Hangul" title="Hangul">Hangul</a></li>
<li><a href="/wiki/Hanja" title="Hanja">Hanja</a></li>
<li><a href="/wiki/Hanun%C3%B3%27o_alphabet" title="Hanunó'o alphabet">Hanunó'o</a></li>
<li><a href="/wiki/Unicode_and_HTML_for_the_Hebrew_alphabet" title="Unicode and HTML for the Hebrew alphabet">Hebrew</a>
<ul>
<li><a href="/wiki/Hebrew_diacritics" title="Hebrew diacritics">diacritics</a></li>
</ul>
</li>
<li><a href="/wiki/Hiragana" title="Hiragana">Hiragana</a></li>
<li><a href="/wiki/Javanese_alphabet" title="Javanese alphabet">Javanese</a></li>
<li><a href="/wiki/Kanji" title="Kanji">Kanji</a></li>
<li><a href="/wiki/Kannada_alphabet" title="Kannada alphabet">Kannada</a></li>
<li><a href="/wiki/Katakana" title="Katakana">Katakana</a></li>
<li><a href="/wiki/Kayah_Li_alphabet" title="Kayah Li alphabet">Kayah Li</a></li>
<li><a href="/wiki/Khmer_alphabet" title="Khmer alphabet">Khmer</a></li>
<li><a href="/wiki/Lao_alphabet" title="Lao alphabet">Lao</a></li>
<li><a href="/wiki/Latin_script_in_Unicode" title="Latin script in Unicode">Latin</a></li>
<li><a href="/wiki/Lepcha_alphabet" title="Lepcha alphabet">Lepcha</a></li>
<li><a href="/wiki/Limbu_alphabet" title="Limbu alphabet">Limbu</a></li>
<li><a href="/wiki/Fraser_alphabet" title="Fraser alphabet">Lisu (Fraser)</a></li>
<li><a href="/wiki/Lontara_alphabet" title="Lontara alphabet">Lontara</a></li>
<li><a href="/wiki/Malayalam_alphabet" title="Malayalam alphabet">Malayalam</a></li>
<li><a href="/wiki/Manchu_alphabet" title="Manchu alphabet">Manchu</a></li>
<li><a href="/wiki/Mandaic_alphabet" title="Mandaic alphabet">Mandaic</a></li>
<li><a href="/wiki/Meitei_Mayek_alphabet" title="Meitei Mayek alphabet">Meetei Mayek</a></li>
<li><a href="/wiki/Pollard_script" title="Pollard script">Miao (Pollard)</a></li>
<li><a href="/wiki/Mongolian_script" title="Mongolian script">Mongolian</a></li>
<li><a href="/wiki/N%27Ko_alphabet" title="N'Ko alphabet">N'Ko</a></li>
<li><a href="/wiki/New_Tai_Lue_alphabet" title="New Tai Lue alphabet">New Tai Lue</a></li>
<li><a href="/wiki/Ol_Chiki_alphabet" title="Ol Chiki alphabet">Ol Chiki</a></li>
<li><a href="/wiki/Oriya_alphabet" title="Oriya alphabet">Oriya</a></li>
<li><a href="/wiki/Osmanya_alphabet" title="Osmanya alphabet">Osmanya</a></li>
<li><a href="/wiki/Rejang_alphabet" title="Rejang alphabet">Rejang</a></li>
<li><a href="/wiki/Samaritan_alphabet" title="Samaritan alphabet">Samaritan</a></li>
<li><a href="/wiki/%C5%9A%C4%81rad%C4%81_script" title="Śāradā script">Śāradā</a></li>
<li><a href="/wiki/Saurashtra_alphabet" title="Saurashtra alphabet">Saurashtra</a></li>
<li><a href="/wiki/Shavian_alphabet" title="Shavian alphabet">Shavian</a></li>
<li><a href="/wiki/Sinhala_alphabet" title="Sinhala alphabet">Sinhala</a></li>
<li><a href="/wiki/Sorang_Sompeng_alphabet" title="Sorang Sompeng alphabet">Sorang Sompeng</a></li>
<li><a href="/wiki/Sundanese_alphabet" title="Sundanese alphabet">Sundanese</a></li>
<li><a href="/wiki/Sylheti_Nagari" title="Sylheti Nagari">Sylheti Nagari</a></li>
<li><a href="/wiki/Syriac_alphabet" title="Syriac alphabet">Syriac</a></li>
<li><a href="/wiki/Baybayin" title="Baybayin">Tagalog (Baybayin)</a></li>
<li><a href="/wiki/Tagbanwa_alphabet" title="Tagbanwa alphabet">Tagbanwa</a></li>
<li><a href="/wiki/Tai_Le_alphabet" title="Tai Le alphabet">Tai Le</a></li>
<li><a href="/wiki/Tai_Tham_alphabet" title="Tai Tham alphabet">Tai Tham</a></li>
<li><a href="/wiki/Tai_Dam_language#Writing_system" title="Tai Dam language">Tai Viet</a></li>
<li><a href="/wiki/Takri_alphabet" title="Takri alphabet">Takri</a></li>
<li><a href="/wiki/Tamil_script" title="Tamil script">Tamil</a></li>
<li><a href="/wiki/Telugu_alphabet" title="Telugu alphabet">Telugu</a></li>
<li><a href="/wiki/Thaana" title="Thaana">Thaana</a></li>
<li><a href="/wiki/Thai_alphabet" title="Thai alphabet">Thai</a></li>
<li><a href="/wiki/Tibetan_alphabet" title="Tibetan alphabet">Tibetan</a></li>
<li><a href="/wiki/Tifinagh" title="Tifinagh">Tifinagh</a></li>
<li><a href="/wiki/Vai_syllabary" title="Vai syllabary">Vai</a></li>
<li><a href="/wiki/Yi_script" title="Yi script">Yi</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;">Ancient and<br />
historic scripts</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-odd hlist">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/Avestan_alphabet" title="Avestan alphabet">Avestan</a></li>
<li><a href="/wiki/Br%C4%81hm%C4%AB_script" title="Brāhmī script">Brāhmī</a></li>
<li><a href="/wiki/Carian_alphabets" title="Carian alphabets">Carian</a></li>
<li><a href="/wiki/Coptic_alphabet" title="Coptic alphabet">Coptic</a></li>
<li><a href="/wiki/Cuneiform" title="Cuneiform">Cuneiform</a></li>
<li><a href="/wiki/Cypriot_syllabary" title="Cypriot syllabary">Cypriot</a></li>
<li><a href="/wiki/Egyptian_hieroglyphs" title="Egyptian hieroglyphs">Egyptian hieroglyphs</a></li>
<li><a href="/wiki/Glagolitic_alphabet" title="Glagolitic alphabet">Glagolitic</a></li>
<li><a href="/wiki/Gothic_alphabet" title="Gothic alphabet">Gothic</a></li>
<li><a href="/wiki/Aramaic_language#Imperial_Aramaic" title="Aramaic language">Imperial Aramaic</a></li>
<li><a href="/wiki/Pahlavi_scripts#Inscriptional_Pahlavi" title="Pahlavi scripts">Inscriptional Pahlavi</a></li>
<li><a href="/wiki/Parthian_language#Written_Parthian" title="Parthian language">Inscriptional Parthian</a></li>
<li><a href="/wiki/Kaithi" title="Kaithi">Kaithi</a></li>
<li><a href="/wiki/Kharosthi" title="Kharosthi">Kharosthi</a></li>
<li><a href="/wiki/Linear_B" title="Linear B">Linear B</a></li>
<li><a href="/wiki/Lycian_alphabet" title="Lycian alphabet">Lycian</a></li>
<li><a href="/wiki/Lydian_alphabet" title="Lydian alphabet">Lydian</a></li>
<li><a href="/wiki/Meroitic_alphabet" title="Meroitic alphabet">Meroitic</a></li>
<li><a href="/wiki/Ogham" title="Ogham">Ogham</a></li>
<li><a href="/wiki/Old_Italic_script" title="Old Italic script">Old Italic</a></li>
<li><a href="/wiki/Old_Persian_cuneiform" title="Old Persian cuneiform">Old Persian cuneiform</a></li>
<li><a href="/wiki/Old_Turkic_alphabet" title="Old Turkic alphabet">Old Turkic</a></li>
<li><a href="/wiki/%27Phags-pa_script" title="'Phags-pa script">'Phags-pa</a></li>
<li><a href="/wiki/Phoenician_alphabet" title="Phoenician alphabet">Phoenician</a></li>
<li><a href="/wiki/Runes" title="Runes">Runic</a></li>
<li><a href="/wiki/South_Arabian_alphabet" title="South Arabian alphabet">South Arabian</a></li>
<li><a href="/wiki/Ugaritic_alphabet" title="Ugaritic alphabet">Ugaritic</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;">Symbols</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-even hlist">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/Cultural,_political,_and_religious_symbols_in_Unicode" title="Cultural, political, and religious symbols in Unicode">Cultural, political, and religious symbols</a></li>
<li><a href="/wiki/Currency_sign" title="Currency sign">Currency</a></li>
<li><a href="/wiki/Mathematical_operators_and_symbols_in_Unicode" title="Mathematical operators and symbols in Unicode">Mathematical operators and symbols</a></li>
<li><a href="/wiki/Phonetic_symbols_in_Unicode" title="Phonetic symbols in Unicode">Phonetic symbols (including IPA)</a></li>
</ul>
</div>
</td>
</tr>
</table>
</td>
</tr>
</table>
</td>
</tr>
</table>
<table cellspacing="0" class="navbox" style="border-spacing:0;;">
<tr>
<td style="padding:2px;">
<table cellspacing="0" class="nowraplinks collapsible autocollapse navbox-inner" style="border-spacing:0;background:transparent;color:inherit;;">
<tr>
<th scope="col" style=";" class="navbox-title" colspan="2">
<div class="noprint plainlinks hlist navbar mini" style="">
<ul>
<li class="nv-view"><a href="/wiki/Template:Character_encoding" title="Template:Character encoding"><span title="View this template" style=";;background:none transparent;border:none;">v</span></a></li>
<li class="nv-talk"><a href="/wiki/Template_talk:Character_encoding" title="Template talk:Character encoding"><span title="Discuss this template" style=";;background:none transparent;border:none;">t</span></a></li>
<li class="nv-edit"><a class="external text" href="//en.wikipedia.org/w/index.php?title=Template:Character_encoding&amp;action=edit"><span title="Edit this template" style=";;background:none transparent;border:none;">e</span></a></li>
</ul>
</div>
<div class="" style="font-size:110%;"><a href="/wiki/Character_encoding" title="Character encoding">Character encodings</a></div>
</th>
</tr>
<tr style="height:2px;">
<td></td>
</tr>
<tr>
<td class="navbox-abovebelow" style=";" colspan="2">
<div><img alt="Category" src="//upload.wikimedia.org/wikipedia/en/thumb/4/48/Folder_Hexagonal_Icon.svg/16px-Folder_Hexagonal_Icon.svg.png" width="16" height="14" /> <a href="/wiki/Category:Character_sets" title="Category:Character sets">Character sets</a></div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;">Early telecommunications</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-even hlist">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/ASCII" title="ASCII">ASCII</a></li>
<li><a href="/wiki/ISO/IEC_646" title="ISO/IEC 646">ISO/IEC 646</a></li>
<li><a href="/wiki/ISO/IEC_6937" title="ISO/IEC 6937">ISO/IEC 6937</a></li>
<li><a href="/wiki/T.61_(ITU-T_recommendation)" title="T.61 (ITU-T recommendation)" class="mw-redirect">T.61</a></li>
<li><a href="/wiki/BCD_(6-bit)" title="BCD (6-bit)">BCD (6-bit)</a></li>
<li><a href="/wiki/Baudot_code" title="Baudot code">Baudot code</a></li>
<li><a href="/wiki/Morse_code" title="Morse code">Morse code</a></li>
<li><a href="/wiki/Chinese_telegraph_code" title="Chinese telegraph code">Chinese telegraph code</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;"><a href="/wiki/ISO/IEC_8859" title="ISO/IEC 8859">ISO/IEC 8859</a></th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-odd hlist">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/ISO/IEC_8859-1" title="ISO/IEC 8859-1">-1</a></li>
<li><a href="/wiki/ISO/IEC_8859-2" title="ISO/IEC 8859-2">-2</a></li>
<li><a href="/wiki/ISO/IEC_8859-3" title="ISO/IEC 8859-3">-3</a></li>
<li><a href="/wiki/ISO/IEC_8859-4" title="ISO/IEC 8859-4">-4</a></li>
<li><a href="/wiki/ISO/IEC_8859-5" title="ISO/IEC 8859-5">-5</a></li>
<li><a href="/wiki/ISO/IEC_8859-6" title="ISO/IEC 8859-6">-6</a></li>
<li><a href="/wiki/ISO/IEC_8859-7" title="ISO/IEC 8859-7">-7</a></li>
<li><a href="/wiki/ISO/IEC_8859-8" title="ISO/IEC 8859-8">-8</a></li>
<li><a href="/wiki/ISO/IEC_8859-9" title="ISO/IEC 8859-9">-9</a></li>
<li><a href="/wiki/ISO/IEC_8859-10" title="ISO/IEC 8859-10">-10</a></li>
<li><a href="/wiki/ISO/IEC_8859-11" title="ISO/IEC 8859-11">-11</a></li>
<li><a href="/wiki/ISO/IEC_8859-12" title="ISO/IEC 8859-12">-12</a></li>
<li><a href="/wiki/ISO/IEC_8859-13" title="ISO/IEC 8859-13">-13</a></li>
<li><a href="/wiki/ISO/IEC_8859-14" title="ISO/IEC 8859-14">-14</a></li>
<li><a href="/wiki/ISO/IEC_8859-15" title="ISO/IEC 8859-15">-15</a></li>
<li><a href="/wiki/ISO/IEC_8859-16" title="ISO/IEC 8859-16">-16</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;">Bibliographic use</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-even hlist">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/ANSEL" title="ANSEL">ANSEL</a></li>
<li>ISO 5426 / 5426-2 / 5427 / 5428 / <a href="/wiki/ISO_6438" title="ISO 6438">6438</a> / 6861 / 6862 / 10585 / 10586 / 10754 / 11822</li>
<li><a href="/wiki/MARC-8" title="MARC-8">MARC-8</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;">National standards</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-odd hlist">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/ArmSCII" title="ArmSCII">ArmSCII</a></li>
<li><a href="/wiki/CNS_11643" title="CNS 11643">CNS 11643</a></li>
<li><a href="/wiki/GOST_10859" title="GOST 10859">GOST 10859</a></li>
<li><a href="/wiki/GB_2312" title="GB 2312">GB 2312</a></li>
<li><a href="/wiki/HKSCS" title="HKSCS">HKSCS</a></li>
<li><a href="/wiki/ISCII" title="ISCII" class="mw-redirect">ISCII</a></li>
<li><a href="/wiki/JIS_X_0201" title="JIS X 0201">JIS X 0201</a></li>
<li><a href="/wiki/JIS_X_0208" title="JIS X 0208">JIS X 0208</a></li>
<li><a href="/wiki/JIS_X_0212" title="JIS X 0212">JIS X 0212</a></li>
<li><a href="/wiki/JIS_X_0213" title="JIS X 0213">JIS X 0213</a></li>
<li><a href="/wiki/KPS_9566" title="KPS 9566">KPS 9566</a></li>
<li><a href="/wiki/KS_X_1001" title="KS X 1001">KS X 1001</a></li>
<li><a href="/wiki/Perso-Arabic_Script_Code_for_Information_Interchange" title="Perso-Arabic Script Code for Information Interchange">PASCII</a></li>
<li><a href="/wiki/TIS-620" title="TIS-620" class="mw-redirect">TIS-620</a></li>
<li><a href="/wiki/TSCII" title="TSCII" class="mw-redirect">TSCII</a></li>
<li><a href="/wiki/VISCII" title="VISCII" class="mw-redirect">VISCII</a></li>
<li><a href="/wiki/YUSCII" title="YUSCII">YUSCII</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;"><a href="/wiki/Extended_Unix_Code" title="Extended Unix Code">EUC</a></th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-even hlist">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/EUC-CN" title="EUC-CN" class="mw-redirect">CN</a></li>
<li><a href="/wiki/EUC-JP" title="EUC-JP" class="mw-redirect">JP</a></li>
<li><a href="/wiki/EUC-KR" title="EUC-KR" class="mw-redirect">KR</a></li>
<li><a href="/wiki/EUC-TW" title="EUC-TW" class="mw-redirect">TW</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;"><a href="/wiki/ISO/IEC_2022" title="ISO/IEC 2022">ISO/IEC 2022</a></th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-odd hlist">
<div style="padding:0em 0.25em">
<ul>
<li>CN</li>
<li><a href="/wiki/ISO-2022-JP" title="ISO-2022-JP" class="mw-redirect">JP</a></li>
<li><a href="/wiki/ISO-2022-KR" title="ISO-2022-KR" class="mw-redirect">KR</a></li>
<li><a href="/wiki/CCCII" title="CCCII" class="mw-redirect">CCCII</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;"><a href="/wiki/Category:Mac_OS_character_encodings" title="Category:Mac OS character encodings">MacOS codepages ("scripts")</a></th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-even hlist">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/MacArabic_encoding" title="MacArabic encoding">Arabic</a></li>
<li><a href="/wiki/Macintosh_Central_European_encoding" title="Macintosh Central European encoding">CentralEurRoman</a></li>
<li>ChineseSimp / <a href="/wiki/EUC-CN" title="EUC-CN" class="mw-redirect">EUC-CN</a></li>
<li>ChineseTrad / <a href="/wiki/Big5" title="Big5">Big5</a></li>
<li>Croatian</li>
<li><a href="/wiki/Macintosh_Cyrillic_encoding" title="Macintosh Cyrillic encoding">Cyrillic</a></li>
<li>Devanagari</li>
<li>Dingbats</li>
<li>Farsi</li>
<li>Greek</li>
<li>Gujarati</li>
<li>Gurmukhi</li>
<li>Hebrew</li>
<li><a href="/wiki/Mac_Icelandic_encoding" title="Mac Icelandic encoding">Icelandic</a></li>
<li>Japanese / <a href="/wiki/Shift_JIS" title="Shift JIS">ShiftJIS</a></li>
<li>Korean / <a href="/wiki/EUC-KR" title="EUC-KR" class="mw-redirect">EUC-KR</a></li>
<li><a href="/wiki/Mac_OS_Roman" title="Mac OS Roman">Roman</a></li>
<li>Romanian</li>
<li>Symbol</li>
<li>Thai / <a href="/wiki/TIS-620" title="TIS-620" class="mw-redirect">TIS-620</a></li>
<li>Turkish</li>
<li><a href="/wiki/Macintosh_Ukrainian_encoding" title="Macintosh Ukrainian encoding">Ukrainian</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;"><a href="/wiki/Category:DOS_code_pages" title="Category:DOS code pages">DOS codepages</a></th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-odd hlist">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/Code_page_437" title="Code page 437">437</a></li>
<li><a href="/wiki/Code_page_720" title="Code page 720">720</a></li>
<li><a href="/wiki/Code_page_737" title="Code page 737">737</a></li>
<li><a href="/wiki/Code_page_775" title="Code page 775">775</a></li>
<li><a href="/wiki/Code_page_850" title="Code page 850">850</a></li>
<li><a href="/wiki/Code_page_852" title="Code page 852">852</a></li>
<li><a href="/wiki/Code_page_855" title="Code page 855">855</a></li>
<li><a href="/wiki/Code_page_857" title="Code page 857">857</a></li>
<li><a href="/wiki/Code_page_858" title="Code page 858">858</a></li>
<li><a href="/wiki/Code_page_860" title="Code page 860">860</a></li>
<li><a href="/wiki/Code_page_861" title="Code page 861">861</a></li>
<li><a href="/wiki/Code_page_862" title="Code page 862">862</a></li>
<li><a href="/wiki/Code_page_863" title="Code page 863">863</a></li>
<li><a href="/wiki/Code_page_864" title="Code page 864">864</a></li>
<li><a href="/wiki/Code_page_865" title="Code page 865">865</a></li>
<li><a href="/wiki/Code_page_866" title="Code page 866">866</a></li>
<li><a href="/wiki/Code_page_869" title="Code page 869">869</a></li>
<li><a href="/wiki/Kamenick%C3%BD_encoding" title="Kamenický encoding">Kamenický</a></li>
<li><a href="/wiki/Mazovia_encoding" title="Mazovia encoding">Mazovia</a></li>
<li><a href="/wiki/MIK_Code_page" title="MIK Code page" class="mw-redirect">MIK</a></li>
<li><a href="/wiki/Iran_System_encoding_standard" title="Iran System encoding standard">Iran System</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;"><a href="/wiki/Category:Windows_code_pages" title="Category:Windows code pages">Windows codepages</a></th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-even hlist">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/Windows-874" title="Windows-874" class="mw-redirect">874</a> / <a href="/wiki/TIS-620" title="TIS-620" class="mw-redirect">TIS-620</a></li>
<li><a href="/wiki/Code_page_932" title="Code page 932">932</a> / <a href="/wiki/Shift_JIS" title="Shift JIS">Shift JIS</a></li>
<li><a href="/wiki/Code_page_936" title="Code page 936">936</a> / <a href="/wiki/GBK" title="GBK">GBK</a></li>
<li><a href="/wiki/Code_page_949" title="Code page 949">949</a> / <a href="/wiki/EUC-KR" title="EUC-KR" class="mw-redirect">EUC-KR</a></li>
<li><a href="/wiki/Code_page_950" title="Code page 950">950</a> / <a href="/wiki/Big5" title="Big5">Big5</a></li>
<li><a href="/wiki/Windows-1250" title="Windows-1250">1250</a></li>
<li><a href="/wiki/Windows-1251" title="Windows-1251">1251</a></li>
<li><a href="/wiki/Windows-1252" title="Windows-1252">1252</a></li>
<li><a href="/wiki/Windows-1253" title="Windows-1253">1253</a></li>
<li><a href="/wiki/Windows-1254" title="Windows-1254">1254</a></li>
<li><a href="/wiki/Windows-1255" title="Windows-1255">1255</a></li>
<li><a href="/wiki/Windows-1256" title="Windows-1256">1256</a></li>
<li><a href="/wiki/Windows-1257" title="Windows-1257">1257</a></li>
<li><a href="/wiki/Windows-1258" title="Windows-1258">1258</a></li>
<li><a href="/w/index.php?title=Code_page_1361&amp;action=edit&amp;redlink=1" class="new" title="Code page 1361 (page does not exist)">1361</a></li>
<li><a href="/wiki/Windows-54936" title="Windows-54936" class="mw-redirect">54936</a> / <a href="/wiki/GB_18030" title="GB 18030">GB18030</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;"><a href="/wiki/Category:EBCDIC_code_pages" title="Category:EBCDIC code pages">EBCDIC codepages</a></th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-odd hlist">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/EBCDIC_037" title="EBCDIC 037">37/1140</a></li>
<li>273/1141</li>
<li>277/1142</li>
<li>278/1143</li>
<li>280/1144</li>
<li>284/1145</li>
<li><a href="/wiki/EBCDIC_285" title="EBCDIC 285">285/1146</a></li>
<li>297/1147</li>
<li>420/16804</li>
<li>424/12712</li>
<li><a href="/wiki/EBCDIC_500" title="EBCDIC 500">500/1148</a></li>
<li>838/1160</li>
<li>871/1149</li>
<li><a href="/wiki/EBCDIC_875" title="EBCDIC 875">875/9067</a></li>
<li><a href="/wiki/EBCDIC_930" title="EBCDIC 930">930/1390</a></li>
<li>933/1364</li>
<li>937/1371</li>
<li>935/1388</li>
<li>939/1399</li>
<li>1025/1154</li>
<li>1026/1155</li>
<li><a href="/wiki/EBCDIC_1047" title="EBCDIC 1047">1047/924</a></li>
<li>1112/1156</li>
<li>1122/1157</li>
<li>1123/1158</li>
<li>1130/1164</li>
<li><a href="/wiki/JEF_codepage" title="JEF codepage">JEF</a></li>
<li><a href="/wiki/KEIS" title="KEIS">KEIS</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;">Platform specific</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-even hlist">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/ATASCII" title="ATASCII">ATASCII</a></li>
<li><a href="/wiki/CDC_display_code" title="CDC display code">CDC display code</a></li>
<li><a href="/wiki/Multinational_Character_Set" title="Multinational Character Set">DEC-MCS</a></li>
<li><a href="/wiki/DEC_Radix-50" title="DEC Radix-50">DEC Radix-50</a></li>
<li><a href="/wiki/Fieldata" title="Fieldata">Fieldata</a></li>
<li><a href="/wiki/GSM_03.38" title="GSM 03.38">GSM 03.38</a></li>
<li><a href="/wiki/HP_roman8" title="HP roman8">HP roman8</a></li>
<li><a href="/wiki/PETSCII" title="PETSCII">PETSCII</a></li>
<li><a href="/wiki/TI_calculator_character_sets" title="TI calculator character sets">TI calculator character sets</a></li>
<li><a href="/wiki/Wang_International_Standard_Code_for_Information_Interchange" title="Wang International Standard Code for Information Interchange">WISCII</a></li>
<li><a href="/wiki/ZX_Spectrum_character_set" title="ZX Spectrum character set">ZX Spectrum character set</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;"><a href="/wiki/Unicode" title="Unicode">Unicode</a> / <a href="/wiki/ISO/IEC_10646" title="ISO/IEC 10646" class="mw-redirect">ISO/IEC 10646</a></th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-odd hlist">
<div style="padding:0em 0.25em">
<ul>
<li><strong class="selflink">UTF-8</strong></li>
<li><a href="/wiki/UTF-16/UCS-2" title="UTF-16/UCS-2" class="mw-redirect">UTF-16/UCS-2</a></li>
<li><a href="/wiki/UTF-32/UCS-4" title="UTF-32/UCS-4" class="mw-redirect">UTF-32/UCS-4</a></li>
<li><a href="/wiki/UTF-7" title="UTF-7">UTF-7</a></li>
<li><a href="/wiki/UTF-1" title="UTF-1">UTF-1</a></li>
<li><a href="/wiki/UTF-EBCDIC" title="UTF-EBCDIC">UTF-EBCDIC</a></li>
<li><a href="/wiki/GB_18030" title="GB 18030">GB 18030</a></li>
<li><a href="/wiki/Standard_Compression_Scheme_for_Unicode" title="Standard Compression Scheme for Unicode">SCSU</a></li>
<li><a href="/wiki/Binary_Ordered_Compression_for_Unicode" title="Binary Ordered Compression for Unicode">BOCU-1</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;">Miscellaneous codepages</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-even hlist">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/APL_(codepage)" title="APL (codepage)">APL</a></li>
<li><a href="/wiki/Cork_encoding" title="Cork encoding">Cork</a></li>
<li><a href="/wiki/HZ_(character_encoding)" title="HZ (character encoding)">HZ</a></li>
<li><a href="/wiki/Code_page_1133" title="Code page 1133">IBM code page 1133</a></li>
<li><a href="/wiki/KOI_character_encodings" title="KOI character encodings">KOI8</a></li>
<li><a href="/wiki/TRON_(encoding)" title="TRON (encoding)">TRON</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;">Related topics</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-odd hlist">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/Control_character" title="Control character">control character</a>&#160;(<a href="/wiki/C0_and_C1_control_codes" title="C0 and C1 control codes">C0 C1</a>)</li>
<li><a href="/wiki/CCSID" title="CCSID">CCSID</a></li>
<li><a href="/wiki/Character_encodings_in_HTML" title="Character encodings in HTML">Character encodings in HTML</a></li>
<li><a href="/wiki/Charset_detection" title="Charset detection">charset detection</a></li>
<li><a href="/wiki/Han_unification" title="Han unification">Han unification</a></li>
<li><a href="/wiki/ISO/IEC_6429" title="ISO/IEC 6429" class="mw-redirect">ISO 6429/IEC 6429/ANSI X3.64</a></li>
<li><a href="/wiki/Mojibake" title="Mojibake">mojibake</a></li>
</ul>
</div>
</td>
</tr>
</table>
</td>
</tr>
</table>
<table cellspacing="0" class="navbox" style="border-spacing:0;;">
<tr>
<td style="padding:2px;">
<table cellspacing="0" class="nowraplinks hlist collapsible autocollapse navbox-inner" style="border-spacing:0;background:transparent;color:inherit;;">
<tr>
<th scope="col" style=";" class="navbox-title" colspan="2">
<div class="noprint plainlinks hlist navbar mini" style="">
<ul>
<li class="nv-view"><a href="/wiki/Template:Rob_Pike_navbox" title="Template:Rob Pike navbox"><span title="View this template" style=";;background:none transparent;border:none;">v</span></a></li>
<li class="nv-talk"><a href="/w/index.php?title=Template_talk:Rob_Pike_navbox&amp;action=edit&amp;redlink=1" class="new" title="Template talk:Rob Pike navbox (page does not exist)"><span title="Discuss this template" style=";;background:none transparent;border:none;">t</span></a></li>
<li class="nv-edit"><a class="external text" href="//en.wikipedia.org/w/index.php?title=Template:Rob_Pike_navbox&amp;action=edit"><span title="Edit this template" style=";;background:none transparent;border:none;">e</span></a></li>
</ul>
</div>
<div class="" style="font-size:110%;"><a href="/wiki/Rob_Pike" title="Rob Pike">Rob Pike</a></div>
</th>
</tr>
<tr style="height:2px;">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;">Operating systems</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-odd">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/Plan_9_from_Bell_Labs" title="Plan 9 from Bell Labs">Plan 9 from Bell Labs</a></li>
<li><a href="/wiki/Inferno_(operating_system)" title="Inferno (operating system)">Inferno</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;">Programming languages</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-even">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/Newsqueak" title="Newsqueak">Newsqueak</a></li>
<li><a href="/wiki/Limbo_(programming_language)" title="Limbo (programming language)">Limbo</a></li>
<li><a href="/wiki/Go_(programming_language)" title="Go (programming language)">Go</a></li>
<li><a href="/wiki/Sawzall_(programming_language)" title="Sawzall (programming language)">Sawzall</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;">Software</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-odd">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/Acme_(text_editor)" title="Acme (text editor)">acme</a></li>
<li><a href="/wiki/Blit_(computer_terminal)" title="Blit (computer terminal)">Blit</a></li>
<li><a href="/wiki/Sam_(text_editor)" title="Sam (text editor)">sam</a></li>
<li><a href="/wiki/Rio_(windowing_system)" title="Rio (windowing system)">rio</a></li>
<li><a href="/wiki/8%C2%BD_(Plan_9)" title="8½ (Plan 9)"></a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;">Publications</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-even">
<div style="padding:0em 0.25em">
<ul>
<li><i><a href="/wiki/The_Practice_of_Programming" title="The Practice of Programming">The Practice of Programming</a></i></li>
<li><i><a href="/wiki/The_Unix_Programming_Environment" title="The Unix Programming Environment">The Unix Programming Environment</a></i></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;">Other</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-odd">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/Ren%C3%A9e_French" title="Renée French">Renée French</a></li>
<li><a href="/wiki/Mark_V_Shaney" title="Mark V Shaney">Mark V Shaney</a></li>
<li><strong class="selflink">UTF-8</strong></li>
</ul>
</div>
</td>
</tr>
</table>
</td>
</tr>
</table>
<table cellspacing="0" class="navbox" style="border-spacing:0;;">
<tr>
<td style="padding:2px;">
<table cellspacing="0" class="nowraplinks hlist collapsible autocollapse navbox-inner" style="border-spacing:0;background:transparent;color:inherit;;">
<tr>
<th scope="col" style=";" class="navbox-title" colspan="2">
<div class="noprint plainlinks hlist navbar mini" style="">
<ul>
<li class="nv-view"><a href="/wiki/Template:Ken_Thompson_navbox" title="Template:Ken Thompson navbox"><span title="View this template" style=";;background:none transparent;border:none;">v</span></a></li>
<li class="nv-talk"><a href="/w/index.php?title=Template_talk:Ken_Thompson_navbox&amp;action=edit&amp;redlink=1" class="new" title="Template talk:Ken Thompson navbox (page does not exist)"><span title="Discuss this template" style=";;background:none transparent;border:none;">t</span></a></li>
<li class="nv-edit"><a class="external text" href="//en.wikipedia.org/w/index.php?title=Template:Ken_Thompson_navbox&amp;action=edit"><span title="Edit this template" style=";;background:none transparent;border:none;">e</span></a></li>
</ul>
</div>
<div class="" style="font-size:110%;"><a href="/wiki/Ken_Thompson" title="Ken Thompson">Ken Thompson</a></div>
</th>
</tr>
<tr style="height:2px;">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;">Operating systems</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-odd">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/Unix" title="Unix">Unix</a></li>
<li><a href="/wiki/Plan_9_from_Bell_Labs" title="Plan 9 from Bell Labs">Plan 9 from Bell Labs</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;">Programming languages</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-even">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/B_(programming_language)" title="B (programming language)">B</a></li>
<li><a href="/wiki/Bon_(programming_language)" title="Bon (programming language)">Bon</a></li>
<li><a href="/wiki/Go_(programming_language)" title="Go (programming language)">Go</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;">Software</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-odd">
<div style="padding:0em 0.25em">
<ul>
<li><a href="/wiki/Belle_(chess_machine)" title="Belle (chess machine)">Belle</a></li>
<li><a href="/wiki/Ed_(text_editor)" title="Ed (text editor)">ed</a></li>
<li><a href="/wiki/Sam_(text_editor)" title="Sam (text editor)">sam</a></li>
<li><a href="/wiki/Space_Travel_(video_game)" title="Space Travel (video game)">Space Travel</a></li>
</ul>
</div>
</td>
</tr>
<tr style="height:2px">
<td></td>
</tr>
<tr>
<th scope="row" class="navbox-group" style=";;">Other</th>
<td style="text-align:left;border-left-width:2px;border-left-style:solid;width:100%;padding:0px;;;" class="navbox-list navbox-even">
<div style="padding:0em 0.25em">
<ul>
<li><strong class="selflink">UTF-8</strong></li>
</ul>
</div>
</td>
</tr>
</table>
</td>
</tr>
</table>
<!--
NewPP limit report
Preprocessor node count: 22444/1000000
Post-expand include size: 235354/2048000 bytes
Template argument size: 87024/2048000 bytes
Highest expansion depth: 21/40
Expensive parser function count: 5/500
-->
<!-- Saved in parser cache with key enwiki:pcache:idhash:32188-0!*!0!!en!4!* and timestamp 20120815033327 -->
</div> <!-- /bodycontent -->
<!-- printfooter -->
<div class="printfooter">
Retrieved from "<a href="http://en.wikipedia.org/w/index.php?title=UTF-8&amp;oldid=507478399">http://en.wikipedia.org/w/index.php?title=UTF-8&amp;oldid=507478399</a>" </div>
<!-- /printfooter -->
<!-- catlinks -->
<div id='catlinks' class='catlinks'><div id="mw-normal-catlinks" class="mw-normal-catlinks"><a href="/wiki/Special:Categories" title="Special:Categories">Categories</a>: <ul><li><a href="/wiki/Category:Character_sets" title="Category:Character sets">Character sets</a></li><li><a href="/wiki/Category:Encodings" title="Category:Encodings">Encodings</a></li><li><a href="/wiki/Category:Character_encoding" title="Category:Character encoding">Character encoding</a></li><li><a href="/wiki/Category:Unicode_Transformation_Formats" title="Category:Unicode Transformation Formats">Unicode Transformation Formats</a></li></ul></div><div id="mw-hidden-catlinks" class="mw-hidden-catlinks mw-hidden-cats-hidden">Hidden categories: <ul><li><a href="/wiki/Category:All_articles_with_unsourced_statements" title="Category:All articles with unsourced statements">All articles with unsourced statements</a></li><li><a href="/wiki/Category:Articles_with_unsourced_statements_from_December_2011" title="Category:Articles with unsourced statements from December 2011">Articles with unsourced statements from December 2011</a></li><li><a href="/wiki/Category:All_articles_with_specifically_marked_weasel-worded_phrases" title="Category:All articles with specifically marked weasel-worded phrases">All articles with specifically marked weasel-worded phrases</a></li><li><a href="/wiki/Category:Articles_with_specifically_marked_weasel-worded_phrases_from_September_2011" title="Category:Articles with specifically marked weasel-worded phrases from September 2011">Articles with specifically marked weasel-worded phrases from September 2011</a></li><li><a href="/wiki/Category:Articles_with_unsourced_statements_from_February_2012" title="Category:Articles with unsourced statements from February 2012">Articles with unsourced statements from February 2012</a></li><li><a href="/wiki/Category:Articles_needing_additional_references_from_October_2009" title="Category:Articles needing additional references from October 2009">Articles needing additional references from October 2009</a></li><li><a href="/wiki/Category:All_articles_needing_additional_references" title="Category:All articles needing additional references">All articles needing additional references</a></li><li><a href="/wiki/Category:Articles_with_unsourced_statements_from_December_2009" title="Category:Articles with unsourced statements from December 2009">Articles with unsourced statements from December 2009</a></li></ul></div></div> <!-- /catlinks -->
<div class="visualClear"></div>
<!-- debughtml -->
<!-- /debughtml -->
</div>
<!-- /bodyContent -->
</div>
<!-- /content -->
<!-- header -->
<div id="mw-head" class="noprint">
<!-- 0 -->
<div id="p-personal" class="">
<h5>Personal tools</h5>
<ul>
<li id="pt-createaccount"><a href="/w/index.php?title=Special:UserLogin&amp;returnto=UTF-8&amp;type=signup">Create account</a></li>
<li id="pt-login"><a href="/w/index.php?title=Special:UserLogin&amp;returnto=UTF-8" title="You are encouraged to log in; however, it is not mandatory. [o]" accesskey="o">Log in</a></li>
</ul>
</div>
<!-- /0 -->
<div id="left-navigation">
<!-- 0 -->
<div id="p-namespaces" class="vectorTabs">
<h5>Namespaces</h5>
<ul>
<li id="ca-nstab-main" class="selected"><span><a href="/wiki/UTF-8" title="View the content page [c]" accesskey="c">Article</a></span></li>
<li id="ca-talk"><span><a href="/wiki/Talk:UTF-8" title="Discussion about the content page [t]" accesskey="t">Talk</a></span></li>
</ul>
</div>
<!-- /0 -->
<!-- 1 -->
<div id="p-variants" class="vectorMenu emptyPortlet">
<h4>
</h4>
<h5><span>Variants</span><a href="#"></a></h5>
<div class="menu">
<ul>
</ul>
</div>
</div>
<!-- /1 -->
</div>
<div id="right-navigation">
<!-- 0 -->
<div id="p-views" class="vectorTabs">
<h5>Views</h5>
<ul>
<li id="ca-view" class="selected"><span><a href="/wiki/UTF-8" >Read</a></span></li>
<li id="ca-edit"><span><a href="/w/index.php?title=UTF-8&amp;action=edit" title="You can edit this page. &#10;Please use the preview button before saving. [e]" accesskey="e">Edit</a></span></li>
<li id="ca-history" class="collapsible"><span><a href="/w/index.php?title=UTF-8&amp;action=history" title="Past versions of this page [h]" accesskey="h">View history</a></span></li>
</ul>
</div>
<!-- /0 -->
<!-- 1 -->
<div id="p-cactions" class="vectorMenu emptyPortlet">
<h5><span>Actions</span><a href="#"></a></h5>
<div class="menu">
<ul>
</ul>
</div>
</div>
<!-- /1 -->
<!-- 2 -->
<div id="p-search">
<h5><label for="searchInput">Search</label></h5>
<form action="/w/index.php" id="searchform">
<div id="simpleSearch">
<input type="text" name="search" value="" title="Search Wikipedia [f]" accesskey="f" id="searchInput" /> <button type="submit" name="button" title="Search Wikipedia for this text" id="searchButton" width="12" height="13"><img src="//bits.wikimedia.org/static-1.20wmf9/skins/vector/images/search-ltr.png?303-4" alt="Search" /></button> <input type='hidden' name="title" value="Special:Search"/>
</div>
</form>
</div>
<!-- /2 -->
</div>
</div>
<!-- /header -->
<!-- panel -->
<div id="mw-panel" class="noprint">
<!-- logo -->
<div id="p-logo"><a style="background-image: url(//upload.wikimedia.org/wikipedia/en/b/bc/Wiki.png);" href="/wiki/Main_Page" title="Visit the main page"></a></div>
<!-- /logo -->
<!-- navigation -->
<div class="portal" id='p-navigation'>
<h5>Navigation</h5>
<div class="body">
<ul>
<li id="n-mainpage-description"><a href="/wiki/Main_Page" title="Visit the main page [z]" accesskey="z">Main page</a></li>
<li id="n-contents"><a href="/wiki/Portal:Contents" title="Guides to browsing Wikipedia">Contents</a></li>
<li id="n-featuredcontent"><a href="/wiki/Portal:Featured_content" title="Featured content the best of Wikipedia">Featured content</a></li>
<li id="n-currentevents"><a href="/wiki/Portal:Current_events" title="Find background information on current events">Current events</a></li>
<li id="n-randompage"><a href="/wiki/Special:Random" title="Load a random article [x]" accesskey="x">Random article</a></li>
<li id="n-sitesupport"><a href="//donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&amp;utm_medium=sidebar&amp;utm_campaign=20120717SB001&amp;uselang=en" title="Support us">Donate to Wikipedia</a></li>
</ul>
</div>
</div>
<!-- /navigation -->
<!-- SEARCH -->
<!-- /SEARCH -->
<!-- interaction -->
<div class="portal" id='p-interaction'>
<h5>Interaction</h5>
<div class="body">
<ul>
<li id="n-help"><a href="/wiki/Help:Contents" title="Guidance on how to use and edit Wikipedia">Help</a></li>
<li id="n-aboutsite"><a href="/wiki/Wikipedia:About" title="Find out about Wikipedia">About Wikipedia</a></li>
<li id="n-portal"><a href="/wiki/Wikipedia:Community_portal" title="About the project, what you can do, where to find things">Community portal</a></li>
<li id="n-recentchanges"><a href="/wiki/Special:RecentChanges" title="A list of recent changes in the wiki [r]" accesskey="r">Recent changes</a></li>
<li id="n-contact"><a href="/wiki/Wikipedia:Contact_us" title="How to contact Wikipedia">Contact Wikipedia</a></li>
</ul>
</div>
</div>
<!-- /interaction -->
<!-- TOOLBOX -->
<div class="portal" id='p-tb'>
<h5>Toolbox</h5>
<div class="body">
<ul>
<li id="t-whatlinkshere"><a href="/wiki/Special:WhatLinksHere/UTF-8" title="List of all English Wikipedia pages containing links to this page [j]" accesskey="j">What links here</a></li>
<li id="t-recentchangeslinked"><a href="/wiki/Special:RecentChangesLinked/UTF-8" title="Recent changes in pages linked from this page [k]" accesskey="k">Related changes</a></li>
<li id="t-upload"><a href="/wiki/Wikipedia:Upload" title="Upload files [u]" accesskey="u">Upload file</a></li>
<li id="t-specialpages"><a href="/wiki/Special:SpecialPages" title="A list of all special pages [q]" accesskey="q">Special pages</a></li>
<li id="t-permalink"><a href="/w/index.php?title=UTF-8&amp;oldid=507478399" title="Permanent link to this revision of the page">Permanent link</a></li>
<li id="t-cite"><a href="/w/index.php?title=Special:Cite&amp;page=UTF-8&amp;id=507478399" title="Information on how to cite this page">Cite this page</a></li> </ul>
</div>
</div>
<!-- /TOOLBOX -->
<!-- coll-print_export -->
<div class="portal" id='p-coll-print_export'>
<h5>Print/export</h5>
<div class="body">
<ul id="collectionPortletList"><li id="coll-create_a_book"><a href="/w/index.php?title=Special:Book&amp;bookcmd=book_creator&amp;referer=UTF-8" title="Create a book or page collection" rel="nofollow">Create a book</a></li><li id="coll-download-as-rl"><a href="/w/index.php?title=Special:Book&amp;bookcmd=render_article&amp;arttitle=UTF-8&amp;oldid=507478399&amp;writer=rl" title="Download a PDF version of this wiki page" rel="nofollow">Download as PDF</a></li><li id="t-print"><a href="/w/index.php?title=UTF-8&amp;printable=yes" title="Printable version of this page [p]" accesskey="p">Printable version</a></li></ul> </div>
</div>
<!-- /coll-print_export -->
<!-- LANGUAGES -->
<div class="portal" id='p-lang'>
<h5>Languages</h5>
<div class="body">
<ul>
<li class="interwiki-ar"><a href="//ar.wikipedia.org/wiki/%D8%B5%D9%8A%D8%BA%D8%A9_%D8%A7%D9%84%D8%AA%D8%AD%D9%88%D9%8A%D9%84_%D8%A7%D9%84%D9%85%D9%88%D8%AD%D8%AF-8" title="صيغة التحويل الموحد-8" lang="ar" hreflang="ar">العربية</a></li>
<li class="interwiki-bg"><a href="//bg.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="bg" hreflang="bg">Български</a></li>
<li class="interwiki-ca"><a href="//ca.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="ca" hreflang="ca">Català</a></li>
<li class="interwiki-cs"><a href="//cs.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="cs" hreflang="cs">Česky</a></li>
<li class="interwiki-da"><a href="//da.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="da" hreflang="da">Dansk</a></li>
<li class="interwiki-de"><a href="//de.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="de" hreflang="de">Deutsch</a></li>
<li class="interwiki-el"><a href="//el.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="el" hreflang="el">Ελληνικά</a></li>
<li class="interwiki-es"><a href="//es.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="es" hreflang="es">Español</a></li>
<li class="interwiki-eo"><a href="//eo.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="eo" hreflang="eo">Esperanto</a></li>
<li class="interwiki-fa"><a href="//fa.wikipedia.org/wiki/%DB%8C%D9%88%D8%AA%DB%8C%E2%80%8C%D8%A7%D9%81-%DB%B8" title="یوتی‌اف-۸" lang="fa" hreflang="fa">فارسی</a></li>
<li class="interwiki-fr"><a href="//fr.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="fr" hreflang="fr">Français</a></li>
<li class="interwiki-ko"><a href="//ko.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="ko" hreflang="ko">한국어</a></li>
<li class="interwiki-hr"><a href="//hr.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="hr" hreflang="hr">Hrvatski</a></li>
<li class="interwiki-it"><a href="//it.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="it" hreflang="it">Italiano</a></li>
<li class="interwiki-he"><a href="//he.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="he" hreflang="he">עברית</a></li>
<li class="interwiki-lv"><a href="//lv.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="lv" hreflang="lv">Latviešu</a></li>
<li class="interwiki-lt"><a href="//lt.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="lt" hreflang="lt">Lietuvių</a></li>
<li class="interwiki-hu"><a href="//hu.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="hu" hreflang="hu">Magyar</a></li>
<li class="interwiki-ml"><a href="//ml.wikipedia.org/wiki/%E0%B4%AF%E0%B5%81.%E0%B4%9F%E0%B4%BF.%E0%B4%8E%E0%B4%AB%E0%B5%8D-8" title="യു.ടി.എഫ്-8" lang="ml" hreflang="ml">മലയാളം</a></li>
<li class="interwiki-ms"><a href="//ms.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="ms" hreflang="ms">Bahasa Melayu</a></li>
<li class="interwiki-nl"><a href="//nl.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="nl" hreflang="nl">Nederlands</a></li>
<li class="interwiki-ja"><a href="//ja.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="ja" hreflang="ja">日本語</a></li>
<li class="interwiki-no"><a href="//no.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="no" hreflang="no">norsk (bokmål)</a></li>
<li class="interwiki-nn"><a href="//nn.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="nn" hreflang="nn">norsk (nynorsk)</a></li>
<li class="interwiki-pl"><a href="//pl.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="pl" hreflang="pl">Polski</a></li>
<li class="interwiki-pt"><a href="//pt.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="pt" hreflang="pt">Português</a></li>
<li class="interwiki-ru"><a href="//ru.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="ru" hreflang="ru">Русский</a></li>
<li class="interwiki-sk"><a href="//sk.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="sk" hreflang="sk">Slovenčina</a></li>
<li class="interwiki-sl"><a href="//sl.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="sl" hreflang="sl">Slovenščina</a></li>
<li class="interwiki-sr"><a href="//sr.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="sr" hreflang="sr">Српски / srpski</a></li>
<li class="interwiki-fi"><a href="//fi.wikipedia.org/wiki/Unicode#UTF-8" title="Unicode" lang="fi" hreflang="fi">Suomi</a></li>
<li class="interwiki-sv"><a href="//sv.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="sv" hreflang="sv">Svenska</a></li>
<li class="interwiki-tr"><a href="//tr.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="tr" hreflang="tr">Türkçe</a></li>
<li class="interwiki-uk"><a href="//uk.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="uk" hreflang="uk">Українська</a></li>
<li class="interwiki-vi"><a href="//vi.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="vi" hreflang="vi">Tiếng Việt</a></li>
<li class="interwiki-zh"><a href="//zh.wikipedia.org/wiki/UTF-8" title="UTF-8" lang="zh" hreflang="zh">中文</a></li>
</ul>
</div>
</div>
<!-- /LANGUAGES -->
</div>
<!-- /panel -->
<!-- footer -->
<div id="footer">
<ul id="footer-info">
<li id="footer-info-lastmod"> This page was last modified on 15 August 2012 at 03:33.<br /></li>
<li id="footer-info-copyright">Text is available under the <a rel="license" href="//en.wikipedia.org/wiki/Wikipedia:Text_of_Creative_Commons_Attribution-ShareAlike_3.0_Unported_License">Creative Commons Attribution-ShareAlike License</a><a rel="license" href="//creativecommons.org/licenses/by-sa/3.0/" style="display:none;"></a>;
additional terms may apply.
See <a href="//wikimediafoundation.org/wiki/Terms_of_use">Terms of use</a> for details.<br/>
Wikipedia&reg; is a registered trademark of the <a href="//www.wikimediafoundation.org/">Wikimedia Foundation, Inc.</a>, a non-profit organization.<br /></li><li class="noprint"><a class='internal' href="//en.wikipedia.org/wiki/Wikipedia:Contact_us">Contact us</a></li>
</ul>
<ul id="footer-places">
<li id="footer-places-privacy"><a href="//wikimediafoundation.org/wiki/Privacy_policy" title="wikimedia:Privacy policy">Privacy policy</a></li>
<li id="footer-places-about"><a href="/wiki/Wikipedia:About" title="Wikipedia:About">About Wikipedia</a></li>
<li id="footer-places-disclaimer"><a href="/wiki/Wikipedia:General_disclaimer" title="Wikipedia:General disclaimer">Disclaimers</a></li>
<li id="footer-places-mobileview"><a href="http://en.m.wikipedia.org/w/index.php?title=UTF-8&amp;mobileaction=toggle_view_mobile" class="noprint">Mobile view</a></li>
</ul>
<ul id="footer-icons" class="noprint">
<li id="footer-copyrightico">
<a href="//wikimediafoundation.org/"><img src="//bits.wikimedia.org/images/wikimedia-button.png" width="88" height="31" alt="Wikimedia Foundation"/></a>
</li>
<li id="footer-poweredbyico">
<a href="//www.mediawiki.org/"><img src="//bits.wikimedia.org/static-1.20wmf9/skins/common/images/poweredby_mediawiki_88x31.png" alt="Powered by MediaWiki" width="88" height="31" /></a>
</li>
</ul>
<div style="clear:both"></div>
</div>
<!-- /footer -->
<script type="text/javascript">if(window.mw){
mw.loader.state({"site":"loading","user":"ready","user.groups":"ready"});
}</script>
<script src="//bits.wikimedia.org/en.wikipedia.org/load.php?debug=false&amp;lang=en&amp;modules=skins.vector&amp;only=scripts&amp;skin=vector&amp;*" type="text/javascript"></script>
<script type="text/javascript">if(window.mw){
mw.loader.load(["mediawiki.user","mediawiki.page.ready","mediawiki.legacy.mwsuggest","ext.gadget.teahouse","ext.gadget.ReferenceTooltips","ext.vector.collapsibleNav","ext.vector.collapsibleTabs","ext.vector.editWarning","ext.vector.simpleSearch","ext.UserBuckets","ext.articleFeedback.startup","ext.articleFeedbackv5.startup","ext.markAsHelpful","ext.Experiments.lib","ext.Experiments.experiments"], null, true);
}</script>
<script src="/w/index.php?title=MediaWiki:Gadget-ReferenceTooltips.js&amp;action=raw&amp;ctype=text/javascript&amp;500778781" type="text/javascript"></script>
<script src="/w/index.php?title=MediaWiki:Gadget-DRN-wizard-loader.js&amp;action=raw&amp;ctype=text/javascript&amp;504341206" type="text/javascript"></script>
<script type="text/javascript">
window._reg = "";
</script>
<script src="/w/index.php?title=Special:BannerController&amp;cache=/cn.js&amp;303-4" type="text/javascript"></script>
<script src="//bits.wikimedia.org/en.wikipedia.org/load.php?debug=false&amp;lang=en&amp;modules=site&amp;only=scripts&amp;skin=vector&amp;*" type="text/javascript"></script>
<script src="//bits.wikimedia.org/geoiplookup" type="text/javascript"></script><!-- Served by srv241 in 0.156 secs. -->
</body>
</html>