asciiblaster

- draw irc art in your web browser
git clone git://git.acid.vegas/asciiblaster.git
Log | Files | Refs | Archive | README

unicode.js (19722B)

      1 var unicode = (function(){
      2   var UNICODE_BLOCK_LIST = [
      3     0x0020, 0x007F, "Basic Latin",
      4     0x0080, 0x00FF, "Latin-1 Supplement",
      5     0x0100, 0x017F, "Latin Extended-A",
      6     0x0180, 0x024F, "Latin Extended-B",
      7     0x0250, 0x02AF, "IPA Extensions",
      8     0x02B0, 0x02FF, "Spacing Modifier Letters",
      9     0x0300, 0x036F, "Combining Diacritical Marks",
     10     0x0370, 0x03FF, "Greek and Coptic",
     11     0x0400, 0x04FF, "Cyrillic",
     12     0x0500, 0x052F, "Cyrillic Supplement",
     13     0x0530, 0x058F, "Armenian",
     14     0x0590, 0x05FF, "Hebrew",
     15     0x0600, 0x06FF, "Arabic",
     16     0x0700, 0x074F, "Syriac",
     17     0x0750, 0x077F, "Arabic Supplement",
     18     0x0780, 0x07BF, "Thaana",
     19     0x07C0, 0x07FF, "NKo",
     20     0x0800, 0x083F, "Samaritan",
     21     0x0840, 0x085F, "Mandaic",
     22     0x08A0, 0x08FF, "Arabic Extended-A",
     23     0x0900, 0x097F, "Devanagari",
     24     0x0980, 0x09FF, "Bengali",
     25     0x0A00, 0x0A7F, "Gurmukhi",
     26     0x0A80, 0x0AFF, "Gujarati",
     27     0x0B00, 0x0B7F, "Oriya",
     28     0x0B80, 0x0BFF, "Tamil",
     29     0x0C00, 0x0C7F, "Telugu",
     30     0x0C80, 0x0CFF, "Kannada",
     31     0x0D00, 0x0D7F, "Malayalam",
     32     0x0D80, 0x0DFF, "Sinhala",
     33     0x0E00, 0x0E7F, "Thai",
     34     0x0E80, 0x0EFF, "Lao",
     35     0x0F00, 0x0FFF, "Tibetan",
     36     0x1000, 0x109F, "Myanmar",
     37     0x10A0, 0x10FF, "Georgian",
     38     0x1100, 0x11FF, "Hangul Jamo",
     39     0x1200, 0x137F, "Ethiopic",
     40     0x1380, 0x139F, "Ethiopic Supplement",
     41     0x13A0, 0x13FF, "Cherokee",
     42     0x1400, 0x167F, "Unified Canadian Aboriginal Syllabics",
     43     0x1680, 0x169F, "Ogham",
     44     0x16A0, 0x16FF, "Runic",
     45     0x1700, 0x171F, "Tagalog",
     46     0x1720, 0x173F, "Hanunoo",
     47     0x1740, 0x175F, "Buhid",
     48     0x1760, 0x177F, "Tagbanwa",
     49     0x1780, 0x17FF, "Khmer",
     50     0x1800, 0x18AF, "Mongolian",
     51     0x18B0, 0x18FF, "Unified Canadian Aboriginal Syllabics Extended",
     52     0x1900, 0x194F, "Limbu",
     53     0x1950, 0x197F, "Tai Le",
     54     0x1980, 0x19DF, "New Tai Lue",
     55     0x19E0, 0x19FF, "Khmer Symbols",
     56     0x1A00, 0x1A1F, "Buginese",
     57     0x1A20, 0x1AAF, "Tai Tham",
     58     0x1AB0, 0x1AFF, "Combining Diacritical Marks Extended",
     59     0x1B00, 0x1B7F, "Balinese",
     60     0x1B80, 0x1BBF, "Sundanese",
     61     0x1BC0, 0x1BFF, "Batak",
     62     0x1C00, 0x1C4F, "Lepcha",
     63     0x1C50, 0x1C7F, "Ol Chiki",
     64     0x1CC0, 0x1CCF, "Sundanese Supplement",
     65     0x1CD0, 0x1CFF, "Vedic Extensions",
     66     0x1D00, 0x1D7F, "Phonetic Extensions",
     67     0x1D80, 0x1DBF, "Phonetic Extensions Supplement",
     68     0x1DC0, 0x1DFF, "Combining Diacritical Marks Supplement",
     69     0x1E00, 0x1EFF, "Latin Extended Additional",
     70     0x1F00, 0x1FFF, "Greek Extended",
     71     0x2000, 0x206F, "General Punctuation",
     72     0x2070, 0x209F, "Superscripts and Subscripts",
     73     0x20A0, 0x20CF, "Currency Symbols",
     74     0x20D0, 0x20FF, "Combining Diacritical Marks for Symbols",
     75     0x2100, 0x214F, "Letterlike Symbols",
     76     0x2150, 0x218F, "Number Forms",
     77     0x2190, 0x21FF, "Arrows",
     78     0x2200, 0x22FF, "Mathematical Operators",
     79     0x2300, 0x23FF, "Miscellaneous Technical",
     80     0x2400, 0x243F, "Control Pictures",
     81     0x2440, 0x245F, "Optical Character Recognition",
     82     0x2460, 0x24FF, "Enclosed Alphanumerics",
     83     0x2500, 0x257F, "Box Drawing",
     84     0x2580, 0x259F, "Block Elements",
     85     0x25A0, 0x25FF, "Geometric Shapes",
     86     0x2600, 0x26FF, "Miscellaneous Symbols",
     87     0x2700, 0x27BF, "Dingbats",
     88     0x27C0, 0x27EF, "Miscellaneous Mathematical Symbols-A",
     89     0x27F0, 0x27FF, "Supplemental Arrows-A",
     90     0x2800, 0x28FF, "Braille Patterns",
     91     0x2900, 0x297F, "Supplemental Arrows-B",
     92     0x2980, 0x29FF, "Miscellaneous Mathematical Symbols-B",
     93     0x2A00, 0x2AFF, "Supplemental Mathematical Operators",
     94     0x2B00, 0x2BFF, "Miscellaneous Symbols and Arrows",
     95     0x2C00, 0x2C5F, "Glagolitic",
     96     0x2C60, 0x2C7F, "Latin Extended-C",
     97     0x2C80, 0x2CFF, "Coptic",
     98     0x2D00, 0x2D2F, "Georgian Supplement",
     99     0x2D30, 0x2D7F, "Tifinagh",
    100     0x2D80, 0x2DDF, "Ethiopic Extended",
    101     0x2DE0, 0x2DFF, "Cyrillic Extended-A",
    102     0x2E00, 0x2E7F, "Supplemental Punctuation",
    103     0x2E80, 0x2EFF, "CJK Radicals Supplement",
    104     0x2F00, 0x2FDF, "Kangxi Radicals",
    105     0x2FF0, 0x2FFF, "Ideographic Description Characters",
    106     0x3000, 0x303F, "CJK Symbols and Punctuation",
    107     0x3040, 0x309F, "Hiragana",
    108     0x30A0, 0x30FF, "Katakana",
    109     0x3100, 0x312F, "Bopomofo",
    110     0x3130, 0x318F, "Hangul Compatibility Jamo",
    111     0x3190, 0x319F, "Kanbun",
    112     0x31A0, 0x31BF, "Bopomofo Extended",
    113     0x31C0, 0x31EF, "CJK Strokes",
    114     0x31F0, 0x31FF, "Katakana Phonetic Extensions",
    115     0x3200, 0x32FF, "Enclosed CJK Letters and Months",
    116     0x3300, 0x33FF, "CJK Compatibility",
    117     0x3400, 0x4DBF, "CJK Unified Ideographs Extension A",
    118     0x4DC0, 0x4DFF, "Yijing Hexagram Symbols",
    119     0x4E00, 0x9FFF, "CJK Unified Ideographs",
    120     0xA000, 0xA48F, "Yi Syllables",
    121     0xA490, 0xA4CF, "Yi Radicals",
    122     0xA4D0, 0xA4FF, "Lisu",
    123     0xA500, 0xA63F, "Vai",
    124     0xA640, 0xA69F, "Cyrillic Extended-B",
    125     0xA6A0, 0xA6FF, "Bamum",
    126     0xA700, 0xA71F, "Modifier Tone Letters",
    127     0xA720, 0xA7FF, "Latin Extended-D",
    128     0xA800, 0xA82F, "Syloti Nagri",
    129     0xA830, 0xA83F, "Common Indic Number Forms",
    130     0xA840, 0xA87F, "Phags-pa",
    131     0xA880, 0xA8DF, "Saurashtra",
    132     0xA8E0, 0xA8FF, "Devanagari Extended",
    133     0xA900, 0xA92F, "Kayah Li",
    134     0xA930, 0xA95F, "Rejang",
    135     0xA960, 0xA97F, "Hangul Jamo Extended-A",
    136     0xA980, 0xA9DF, "Javanese",
    137     0xA9E0, 0xA9FF, "Myanmar Extended-B",
    138     0xAA00, 0xAA5F, "Cham",
    139     0xAA60, 0xAA7F, "Myanmar Extended-A",
    140     0xAA80, 0xAADF, "Tai Viet",
    141     0xAAE0, 0xAAFF, "Meetei Mayek Extensions",
    142     0xAB00, 0xAB2F, "Ethiopic Extended-A",
    143     0xAB30, 0xAB6F, "Latin Extended-E",
    144     0xABC0, 0xABFF, "Meetei Mayek",
    145     0xAC00, 0xD7AF, "Hangul Syllables",
    146     0xD7B0, 0xD7FF, "Hangul Jamo Extended-B",
    147     0xD800, 0xDB7F, "High Surrogates",
    148     0xDB80, 0xDBFF, "High Private Use Surrogates",
    149     0xDC00, 0xDFFF, "Low Surrogates",
    150     0xE000, 0xF8FF, "Private Use Area",
    151     0xF900, 0xFAFF, "CJK Compatibility Ideographs",
    152     0xFB00, 0xFB4F, "Alphabetic Presentation Forms",
    153     0xFB50, 0xFDFF, "Arabic Presentation Forms-A",
    154     0xFE00, 0xFE0F, "Variation Selectors",
    155     0xFE10, 0xFE1F, "Vertical Forms",
    156     0xFE20, 0xFE2F, "Combining Half Marks",
    157     0xFE30, 0xFE4F, "CJK Compatibility Forms",
    158     0xFE50, 0xFE6F, "Small Form Variants",
    159     0xFE70, 0xFEFF, "Arabic Presentation Forms-B",
    160     0xFF00, 0xFFEF, "Halfwidth and Fullwidth Forms",
    161     0xFFF0, 0xFFFF, "Specials",
    162     0x10000, 0x1007F, "Linear B Syllabary",
    163     0x10080, 0x100FF, "Linear B Ideograms",
    164     0x10100, 0x1013F, "Aegean Numbers",
    165     0x10140, 0x1018F, "Ancient Greek Numbers",
    166     0x10190, 0x101CF, "Ancient Symbols",
    167     0x101D0, 0x101FF, "Phaistos Disc",
    168     0x10280, 0x1029F, "Lycian",
    169     0x102A0, 0x102DF, "Carian",
    170     0x102E0, 0x102FF, "Coptic Epact Numbers",
    171     0x10300, 0x1032F, "Old Italic",
    172     0x10330, 0x1034F, "Gothic",
    173     0x10350, 0x1037F, "Old Permic",
    174     0x10380, 0x1039F, "Ugaritic",
    175     0x103A0, 0x103DF, "Old Persian",
    176     0x10400, 0x1044F, "Deseret",
    177     0x10450, 0x1047F, "Shavian",
    178     0x10480, 0x104AF, "Osmanya",
    179     0x10500, 0x1052F, "Elbasan",
    180     0x10530, 0x1056F, "Caucasian Albanian",
    181     0x10600, 0x1077F, "Linear A",
    182     0x10800, 0x1083F, "Cypriot Syllabary",
    183     0x10840, 0x1085F, "Imperial Aramaic",
    184     0x10860, 0x1087F, "Palmyrene",
    185     0x10880, 0x108AF, "Nabataean",
    186     0x10900, 0x1091F, "Phoenician",
    187     0x10920, 0x1093F, "Lydian",
    188     0x10980, 0x1099F, "Meroitic Hieroglyphs",
    189     0x109A0, 0x109FF, "Meroitic Cursive",
    190     0x10A00, 0x10A5F, "Kharoshthi",
    191     0x10A60, 0x10A7F, "Old South Arabian",
    192     0x10A80, 0x10A9F, "Old North Arabian",
    193     0x10AC0, 0x10AFF, "Manichaean",
    194     0x10B00, 0x10B3F, "Avestan",
    195     0x10B40, 0x10B5F, "Inscriptional Parthian",
    196     0x10B60, 0x10B7F, "Inscriptional Pahlavi",
    197     0x10B80, 0x10BAF, "Psalter Pahlavi",
    198     0x10C00, 0x10C4F, "Old Turkic",
    199     0x10E60, 0x10E7F, "Rumi Numeral Symbols",
    200     0x11000, 0x1107F, "Brahmi",
    201     0x11080, 0x110CF, "Kaithi",
    202     0x110D0, 0x110FF, "Sora Sompeng",
    203     0x11100, 0x1114F, "Chakma",
    204     0x11150, 0x1117F, "Mahajani",
    205     0x11180, 0x111DF, "Sharada",
    206     0x111E0, 0x111FF, "Sinhala Archaic Numbers",
    207     0x11200, 0x1124F, "Khojki",
    208     0x112B0, 0x112FF, "Khudawadi",
    209     0x11300, 0x1137F, "Grantha",
    210     0x11480, 0x114DF, "Tirhuta",
    211     0x11580, 0x115FF, "Siddham",
    212     0x11600, 0x1165F, "Modi",
    213     0x11680, 0x116CF, "Takri",
    214     0x118A0, 0x118FF, "Warang Citi",
    215     0x11AC0, 0x11AFF, "Pau Cin Hau",
    216     0x12000, 0x123FF, "Cuneiform",
    217     0x12400, 0x1247F, "Cuneiform Numbers and Punctuation",
    218     0x13000, 0x1342F, "Egyptian Hieroglyphs",
    219     0x16800, 0x16A3F, "Bamum Supplement",
    220     0x16A40, 0x16A6F, "Mro",
    221     0x16AD0, 0x16AFF, "Bassa Vah",
    222     0x16B00, 0x16B8F, "Pahawh Hmong",
    223     0x16F00, 0x16F9F, "Miao",
    224     0x1B000, 0x1B0FF, "Kana Supplement",
    225     0x1BC00, 0x1BC9F, "Duployan",
    226     0x1BCA0, 0x1BCAF, "Shorthand Format Controls",
    227     0x1D000, 0x1D0FF, "Byzantine Musical Symbols",
    228     0x1D100, 0x1D1FF, "Musical Symbols",
    229     0x1D200, 0x1D24F, "Ancient Greek Musical Notation",
    230     0x1D300, 0x1D35F, "Tai Xuan Jing Symbols",
    231     0x1D360, 0x1D37F, "Counting Rod Numerals",
    232     0x1D400, 0x1D7FF, "Mathematical Alphanumeric Symbols",
    233     0x1E800, 0x1E8DF, "Mende Kikakui",
    234     0x1EE00, 0x1EEFF, "Arabic Mathematical Alphabetic Symbols",
    235     0x1F000, 0x1F02F, "Mahjong Tiles",
    236     0x1F030, 0x1F09F, "Domino Tiles",
    237     0x1F0A0, 0x1F0FF, "Playing Cards",
    238     0x1F100, 0x1F1FF, "Enclosed Alphanumeric Supplement",
    239     0x1F200, 0x1F2FF, "Enclosed Ideographic Supplement",
    240     0x1F300, 0x1F5FF, "Miscellaneous Symbols and Pictographs",
    241     0x1F600, 0x1F64F, "Emoticons",
    242     0x1F650, 0x1F67F, "Ornamental Dingbats",
    243     0x1F680, 0x1F6FF, "Transport and Map Symbols",
    244     0x1F700, 0x1F77F, "Alchemical Symbols",
    245     0x1F780, 0x1F7FF, "Geometric Shapes Extended",
    246     0x1F800, 0x1F8FF, "Supplemental Arrows-C",
    247     0x20000, 0x2A6DF, "CJK Unified Ideographs Extension B",
    248     0x2A700, 0x2B73F, "CJK Unified Ideographs Extension C",
    249     0x2B740, 0x2B81F, "CJK Unified Ideographs Extension D",
    250     0x2F800, 0x2FA1F, "CJK Compatibility Ideographs Supplement",
    251     0xE0000, 0xE007F, "Tags",
    252     0xE0100, 0xE01EF, "Variation Selectors Supplement",
    253     0xF0000, 0xFFFFF, "Supplementary Private Use Area-A",
    254     0x100000, 0x10FFFF, "Supplementary Private Use Area-B",
    255   ]
    256   var UNICODE_BLOCK_COUNT = UNICODE_BLOCK_LIST.length / 3
    257   var UNICODE_LOOKUP = {}
    258   for (var i = 0, len = UNICODE_BLOCK_LIST.length; i < len; i += 3) {
    259     UNICODE_LOOKUP[ UNICODE_BLOCK_LIST[i+2] ] = [ UNICODE_BLOCK_LIST[i], UNICODE_BLOCK_LIST[i+1] ]
    260   }
    261 
    262   function index (j) {
    263     return [ UNICODE_BLOCK_LIST[j*3], UNICODE_BLOCK_LIST[j*3+1], UNICODE_BLOCK_LIST[j*3+2], [] ]
    264   }
    265   function range(m,n){
    266     if (m > n) return []
    267     var a = new Array (n-m)
    268     for (var i = 0, j = m; j <= n; i++, j++) {
    269       a[i] = j
    270     }
    271     return a
    272   }
    273   function paginate (a, n){
    274     var aa = [], ai, i = 0
    275     while (i < 100) {
    276       ai = a.slice(i * n, (i+1) * n)
    277       if (! ai.length) break
    278       aa.push(ai)
    279       i++
    280     }
    281     return aa
    282   }
    283   function block (name, n){
    284     var b = UNICODE_LOOKUP[name]
    285     if (! b) return ""
    286     return range.apply(null, b).map(function(n){ return String.fromCharCode(n) })
    287   }
    288   function entities (a) {
    289     return a.map(function(k){ return "&#" + k.join(";&#") + ";" }).join("<br>")
    290   }
    291   function findGroups (chars){
    292     var groups = [], row, list
    293     for (var i = 0, j = -1, next = -1, len = chars.length; i < len; i++) {
    294       if (chars[i] < next) {
    295         list.push(chars[i])
    296         continue
    297       }
    298       do {
    299         j += 1
    300         next = UNICODE_BLOCK_LIST[(j+1)*3]
    301       } while (chars[i] > next)
    302       row = index(j)
    303       list = row[3]
    304       groups.push( row )
    305     }
    306     return groups
    307   }
    308   
    309   // encodes unicode characters as escaped utf16 - \xFFFF
    310   // encodes ONLY non-ascii characters
    311   function escapeToUtf16 (txt) {
    312     var escaped_txt = "", kode
    313     for (var i = 0; i < txt.length; i++) {
    314       kode = txt.charCodeAt(i)
    315       if (kode > 0x7f) {
    316         kode = kode.toString(16)
    317         switch (kode.length) {
    318           case 2:
    319             kode = "0" + kode
    320           case 3:
    321             kode = "0" + kode
    322         }
    323         escaped_txt += "\\u" + kode
    324       }
    325       else {
    326         escaped_txt += txt[i]
    327       }
    328     }
    329     return escaped_txt
    330   }
    331 
    332   // encodes unicode characters as escaped bytes - \xFF
    333   // encodes ONLY non-ascii characters
    334   function escapeToEscapedBytes (txt) {
    335     var escaped_txt = "", kode, utf8_bytes
    336     for (var i = 0; i < txt.length; i++) {
    337       kode = txt.charCodeAt(i)
    338       if (kode > 0x7f) {
    339         utf8_bytes = convertUnicodeCodePointToUtf8Bytes(kode)
    340         escaped_txt += convertBytesToEscapedString(utf8_bytes, 16)
    341       }
    342       else {
    343         escaped_txt += txt[i]
    344       }
    345     }
    346     return escaped_txt
    347   }
    348 
    349   // encodes unicode characters as escaped bytes - \xFF
    350   // encodes an ENTIRE string
    351   function escapeAllToEscapedBytes(str, base) {
    352     var unicode_codes = convertStringToUnicodeCodePoints(str);
    353     var data_bytes = convertUnicodeCodePointsToBytes(unicode_codes);
    354     return convertBytesToEscapedString(data_bytes, 16);
    355   }
    356   // [ 0xE3, 0x81, 0x82, 0xE3, 0x81, 0x84 ] => '\xE3\x81\x82\xE3\x81\x84'
    357   // [ 0343, 0201, 0202, 0343, 0201, 0204 ] => '\343\201\202\343\201\204'
    358   function convertBytesToEscapedString(data_bytes, base) {
    359     var escaped = '';
    360     for (var i = 0; i < data_bytes.length; ++i) {
    361       var prefix = (base == 16 ? "\\x" : "\\");
    362       var num_digits = base == 16 ? 2 : 3;
    363       var escaped_byte = prefix + formatNumber(data_bytes[i], base, num_digits)
    364       escaped += escaped_byte;
    365     }
    366     return escaped;
    367   }
    368   // [ 0x3042, 0x3044 ] => [ 0xE3, 0x81, 0x82, 0xE3, 0x81, 0x84 ]
    369   function convertUnicodeCodePointsToBytes(unicode_codes) {
    370     var utf8_bytes = [];
    371     for (var i = 0; i < unicode_codes.length; ++i) {
    372       var bytes = convertUnicodeCodePointToUtf8Bytes(unicode_codes[i]);
    373       utf8_bytes = utf8_bytes.concat(bytes);
    374     }
    375     return utf8_bytes;
    376   }
    377   // 0x3042 => [ 0xE3, 0x81, 0x82 ]
    378   function convertUnicodeCodePointToUtf8Bytes(unicode_code) {
    379     var utf8_bytes = [];
    380     if (unicode_code < 0x80) {  // 1-byte
    381       utf8_bytes.push(unicode_code);
    382     } else if (unicode_code < (1 << 11)) {  // 2-byte
    383       utf8_bytes.push((unicode_code >>> 6) | 0xC0);
    384       utf8_bytes.push((unicode_code & 0x3F) | 0x80);
    385     } else if (unicode_code < (1 << 16)) {  // 3-byte
    386       utf8_bytes.push((unicode_code >>> 12) | 0xE0);
    387       utf8_bytes.push(((unicode_code >> 6) & 0x3f) | 0x80);
    388       utf8_bytes.push((unicode_code & 0x3F) | 0x80);
    389     } else if (unicode_code < (1 << 21)) {  // 4-byte
    390       utf8_bytes.push((unicode_code >>> 18) | 0xF0);
    391       utf8_bytes.push(((unicode_code >> 12) & 0x3F) | 0x80);
    392       utf8_bytes.push(((unicode_code >> 6) & 0x3F) | 0x80);
    393       utf8_bytes.push((unicode_code & 0x3F) | 0x80);
    394     }
    395     return utf8_bytes;
    396   }
    397   // "あい" => [ 0x3042,  0x3044 ]
    398   function convertStringToUnicodeCodePoints(str) {
    399     var surrogate_1st = 0;
    400     var unicode_codes = [];
    401     for (var i = 0; i < str.length; ++i) {
    402       var utf16_code = str.charCodeAt(i);
    403       if (surrogate_1st != 0) {
    404         if (utf16_code >= 0xDC00 && utf16_code <= 0xDFFF) {
    405           var surrogate_2nd = utf16_code;
    406           var unicode_code = (surrogate_1st - 0xD800) * (1 << 10) + (1 << 16) +
    407                              (surrogate_2nd - 0xDC00);
    408           unicode_codes.push(unicode_code);
    409         } else {
    410           // Malformed surrogate pair ignored.
    411         }
    412         surrogate_1st = 0;
    413       } else if (utf16_code >= 0xD800 && utf16_code <= 0xDBFF) {
    414         surrogate_1st = utf16_code;
    415       } else {
    416         unicode_codes.push(utf16_code);
    417       }
    418     }
    419     return unicode_codes;
    420   }
    421   // 0xff => "ff"
    422   // 0xff => "377"
    423   function formatNumber(number, base, num_digits) {
    424     var str = number.toString(base).toUpperCase();
    425     for (var i = str.length; i < num_digits; ++i) {
    426       str = "0" + str;
    427     }
    428     return str;
    429   }
    430 
    431   // convert \xFF\xFF\xFF to unicode
    432   function unescapeFromEscapedBytes (str) {
    433     var data_bytes = convertEscapedBytesToBytes(str);
    434     var unicode_codes = convertUtf8BytesToUnicodeCodePoints(data_bytes);
    435     return convertUnicodeCodePointsToString(unicode_codes);
    436   }
    437   // r'\xE3\x81\x82\xE3\x81\x84' => [ 0xE3, 0x81, 0x82, 0xE3, 0x81, 0x84 ]
    438   // r'\343\201\202\343\201\204' => [ 0343, 0201, 0202, 0343, 0201, 0204 ]
    439   function convertEscapedBytesToBytes(str) {
    440     var parts = str.split("\\x");
    441     parts.shift();  // Trim the first element.
    442     var codes = [];
    443     var max = Math.pow(2, 8);
    444     for (var i = 0; i < parts.length; ++i) {
    445       var code = parseInt(parts[i], 16);
    446       if (code >= 0 && code < max) {
    447         codes.push(code);
    448       } else {
    449         // Malformed code ignored.
    450       }
    451     }
    452     return codes;
    453   }
    454   // [ 0xE3, 0x81, 0x82, 0xE3, 0x81, 0x84 ] => [ 0x3042, 0x3044 ]
    455   function convertUtf8BytesToUnicodeCodePoints(utf8_bytes) {
    456     var unicode_codes = [];
    457     var unicode_code = 0;
    458     var num_followed = 0;
    459     for (var i = 0; i < utf8_bytes.length; ++i) {
    460       var utf8_byte = utf8_bytes[i];
    461       if (utf8_byte >= 0x100) {
    462         // Malformed utf8 byte ignored.
    463       } else if ((utf8_byte & 0xC0) == 0x80) {
    464         if (num_followed > 0) {
    465           unicode_code = (unicode_code << 6) | (utf8_byte & 0x3f);
    466           num_followed -= 1;
    467         } else {
    468           // Malformed UTF-8 sequence ignored.
    469         }
    470       } else {
    471         if (num_followed == 0) {
    472           unicode_codes.push(unicode_code);
    473         } else {
    474           // Malformed UTF-8 sequence ignored.
    475         }
    476         if (utf8_byte < 0x80){  // 1-byte
    477           unicode_code = utf8_byte;
    478           num_followed = 0;
    479         } else if ((utf8_byte & 0xE0) == 0xC0) {  // 2-byte
    480           unicode_code = utf8_byte & 0x1f;
    481           num_followed = 1;
    482         } else if ((utf8_byte & 0xF0) == 0xE0) {  // 3-byte
    483           unicode_code = utf8_byte & 0x0f;
    484           num_followed = 2;
    485         } else if ((utf8_byte & 0xF8) == 0xF0) {  // 4-byte
    486           unicode_code = utf8_byte & 0x07;
    487           num_followed = 3;
    488         } else {
    489           // Malformed UTF-8 sequence ignored.
    490         }
    491       }
    492     }
    493     if (num_followed == 0) {
    494       unicode_codes.push(unicode_code);
    495     } else {
    496       // Malformed UTF-8 sequence ignored.
    497     }
    498     unicode_codes.shift();  // Trim the first element.
    499     return unicode_codes;
    500   }
    501   // [ 0x3042, 0x3044 ] => [ 0x3042, 0x3044 ]
    502   // [ 0xD840, 0xDC0B ] => [ 0x2000B ]  // A surrogate pair.
    503   function convertUnicodeCodePointsToUtf16Codes(unicode_codes) {
    504     var utf16_codes = [];
    505     for (var i = 0; i < unicode_codes.length; ++i) {
    506       var unicode_code = unicode_codes[i];
    507       if (unicode_code < (1 << 16)) {
    508         utf16_codes.push(unicode_code);
    509       } else {
    510         var first = ((unicode_code - (1 << 16)) / (1 << 10)) + 0xD800;
    511         var second = (unicode_code % (1 << 10)) + 0xDC00;
    512         utf16_codes.push(first)
    513         utf16_codes.push(second)
    514       }
    515     }
    516     return utf16_codes;
    517   }
    518   // [ 0x3042, 0x3044 ] => "あい"
    519   function convertUnicodeCodePointsToString(unicode_codes) {
    520     var utf16_codes = convertUnicodeCodePointsToUtf16Codes(unicode_codes);
    521     return convertUtf16CodesToString(utf16_codes);
    522   }
    523   // [ 0x3042, 0x3044 ] => "あい"
    524   function convertUtf16CodesToString(utf16_codes) {
    525     var unescaped = '';
    526     for (var i = 0; i < utf16_codes.length; ++i) {
    527       unescaped += String.fromCharCode(utf16_codes[i]);
    528     }
    529     return unescaped;
    530   }
    531 
    532   return {
    533     raw: UNICODE_BLOCK_LIST,
    534     lookup: UNICODE_LOOKUP,
    535     index: index,
    536     range: range,
    537     block: block,
    538     findGroups: findGroups,
    539     paginate: paginate,
    540     escapeToEscapedBytes: escapeToEscapedBytes,
    541     unescapeFromEscapedBytes: unescapeFromEscapedBytes,
    542   }
    543 })()