gb18030-decoder.any.js (4573B)
1 // META: script=./resources/ranges.js 2 3 const decode = (input, output, desc) => { 4 test(function () { 5 for (const encoding of ["gb18030", "gbk"]) { 6 assert_equals( 7 new TextDecoder(encoding).decode(new Uint8Array(input)), 8 output, 9 ); 10 } 11 }, "gb18030 decoder: " + desc); 12 }; 13 14 decode([115], "s", "ASCII"); 15 decode([0x80], "\u20AC", "euro"); 16 decode([0xFF], "\uFFFD", "initial byte out of accepted ranges"); 17 decode([0x81], "\uFFFD", "end of queue, gb18030 first not 0"); 18 decode([0x81, 0x28], "\ufffd(", "two bytes 0x81 0x28"); 19 decode([0x81, 0x40], "\u4E02", "two bytes 0x81 0x40"); 20 decode([0x81, 0x7E], "\u4E8A", "two bytes 0x81 0x7e"); 21 decode([0x81, 0x7F], "\ufffd\u007f", "two bytes 0x81 0x7f"); 22 decode([0x81, 0x80], "\u4E90", "two bytes 0x81 0x80"); 23 decode([0x81, 0xFE], "\u4FA2", "two bytes 0x81 0xFE"); 24 decode([0x81, 0xFF], "\ufffd", "two bytes 0x81 0xFF"); 25 decode([0xFE, 0x40], "\uFA0C", "two bytes 0xFE 0x40"); 26 decode([0xFE, 0xFE], "\uE4C5", "two bytes 0xFE 0xFE"); 27 decode([0xFE, 0xFF], "\ufffd", "two bytes 0xFE 0xFF"); 28 decode([0x81, 0x30], "\ufffd", "two bytes 0x81 0x30"); 29 decode([0x81, 0x30, 0xFE], "\ufffd", "three bytes 0x81 0x30 0xFE"); 30 decode([0x81, 0x30, 0xFF], "\ufffd0\ufffd", "three bytes 0x81 0x30 0xFF"); 31 decode( 32 [0x81, 0x30, 0xFE, 0x29], 33 "\ufffd0\ufffd)", 34 "four bytes 0x81 0x30 0xFE 0x29", 35 ); 36 decode([0xFE, 0x39, 0xFE, 0x39], "\ufffd", "four bytes 0xFE 0x39 0xFE 0x39"); 37 decode([0x81, 0x35, 0xF4, 0x36], "\u1E3E", "pointer 7458"); 38 decode([0x81, 0x35, 0xF4, 0x37], "\ue7c7", "pointer 7457"); 39 decode([0x81, 0x35, 0xF4, 0x38], "\u1E40", "pointer 7459"); 40 decode([0x84, 0x31, 0xA4, 0x39], "\uffff", "pointer 39419"); 41 decode([0x84, 0x31, 0xA5, 0x30], "\ufffd", "pointer 39420"); 42 decode([0x8F, 0x39, 0xFE, 0x39], "\ufffd", "pointer 189999"); 43 decode([0x90, 0x30, 0x81, 0x30], "\u{10000}", "pointer 189000"); 44 decode([0xE3, 0x32, 0x9A, 0x35], "\u{10FFFF}", "pointer 1237575"); 45 decode([0xE3, 0x32, 0x9A, 0x36], "\ufffd", "pointer 1237576"); 46 decode([0x83, 0x36, 0xC8, 0x30], "\uE7C8", "legacy ICU special case 1"); 47 decode([0xA1, 0xAD], "\u2026", "legacy ICU special case 2"); 48 decode([0xA1, 0xAB], "\uFF5E", "legacy ICU special case 3"); 49 decode([0xA3, 0xA0], "\u3000"); 50 51 // GB18030-2022 52 decode([0xA6, 0xD9], "\uFE10", "GB18030-2022 1"); 53 decode([0xA6, 0xDA], "\uFE12", "GB18030-2022 2"); 54 decode([0xA6, 0xDB], "\uFE11", "GB18030-2022 3"); 55 decode([0xA6, 0xDC], "\uFE13", "GB18030-2022 4"); 56 decode([0xA6, 0xDD], "\uFE14", "GB18030-2022 5"); 57 decode([0xA6, 0xDE], "\uFE15", "GB18030-2022 6"); 58 decode([0xA6, 0xDF], "\uFE16", "GB18030-2022 7"); 59 decode([0xA6, 0xEC], "\uFE17", "GB18030-2022 8"); 60 decode([0xA6, 0xED], "\uFE18", "GB18030-2022 9"); 61 decode([0xA6, 0xF3], "\uFE19", "GB18030-2022 10"); 62 decode([0xFE, 0x59], "\u9FB4", "GB18030-2022 11"); 63 decode([0xFE, 0x61], "\u9FB5", "GB18030-2022 12"); 64 decode([0xFE, 0x66], "\u9FB6", "GB18030-2022 13"); 65 decode([0xFE, 0x67], "\u9FB7", "GB18030-2022 14"); 66 decode([0xFE, 0x6D], "\u9FB8", "GB18030-2022 15"); 67 decode([0xFE, 0x7E], "\u9FB9", "GB18030-2022 16"); 68 decode([0xFE, 0x90], "\u9FBA", "GB18030-2022 17"); 69 decode([0xFE, 0xA0], "\u9FBB", "GB18030-2022 18"); 70 decode([0x82, 0x35, 0x90, 0x37], "\u9FB4", "GB18030-2022 19"); 71 decode([0x82, 0x35, 0x90, 0x38], "\u9FB5", "GB18030-2022 20"); 72 decode([0x82, 0x35, 0x90, 0x39], "\u9FB6", "GB18030-2022 21"); 73 decode([0x82, 0x35, 0x91, 0x30], "\u9FB7", "GB18030-2022 22"); 74 decode([0x82, 0x35, 0x91, 0x31], "\u9FB8", "GB18030-2022 23"); 75 decode([0x82, 0x35, 0x91, 0x32], "\u9FB9", "GB18030-2022 24"); 76 decode([0x82, 0x35, 0x91, 0x33], "\u9FBA", "GB18030-2022 25"); 77 decode([0x82, 0x35, 0x91, 0x34], "\u9FBB", "GB18030-2022 26"); 78 decode([0x84, 0x31, 0x82, 0x36], "\uFE10", "GB18030-2022 27"); 79 decode([0x84, 0x31, 0x82, 0x37], "\uFE11", "GB18030-2022 28"); 80 decode([0x84, 0x31, 0x82, 0x38], "\uFE12", "GB18030-2022 29"); 81 decode([0x84, 0x31, 0x82, 0x39], "\uFE13", "GB18030-2022 30"); 82 decode([0x84, 0x31, 0x83, 0x30], "\uFE14", "GB18030-2022 31"); 83 decode([0x84, 0x31, 0x83, 0x31], "\uFE15", "GB18030-2022 32"); 84 decode([0x84, 0x31, 0x83, 0x32], "\uFE16", "GB18030-2022 33"); 85 decode([0x84, 0x31, 0x83, 0x33], "\uFE17", "GB18030-2022 34"); 86 decode([0x84, 0x31, 0x83, 0x34], "\uFE18", "GB18030-2022 35"); 87 decode([0x84, 0x31, 0x83, 0x35], "\uFE19", "GB18030-2022 36"); 88 89 let i = 0; 90 for (const range of ranges) { 91 const pointer = range[0]; 92 decode( 93 [ 94 Math.floor(pointer / 12600) + 0x81, 95 Math.floor((pointer % 12600) / 1260) + 0x30, 96 Math.floor((pointer % 1260) / 10) + 0x81, 97 pointer % 10 + 0x30, 98 ], 99 range[1], 100 "range " + i++, 101 ); 102 }