gb18030-encoder.html (3396B)
1 <!doctype html> 2 <meta charset=gb18030> 3 <script src=/resources/testharness.js></script> 4 <script src=/resources/testharnessreport.js></script> 5 <script src=resources/ranges.js></script> 6 <script> 7 const encode = (input, output, desc) => { 8 test(function() { 9 const a = document.createElement("a"); // <a> uses document encoding for URL's query 10 a.href = "https://example.com/?" + input; 11 assert_equals(a.search.substr(1), output); // remove leading "?" 12 }, "gb18030 encoder: " + desc); 13 } 14 15 encode("s", "s", "very basic"); 16 encode("\u20AC", "%A2%E3", "Euro"); 17 encode("\u4E02", "%81@", "character"); 18 encode("\uE4C6", "%A1@", "PUA"); 19 encode("\uE4C5", "%FE%FE", "PUA #2"); 20 encode("\uE5E5", "%26%2358853%3B", "PUA #3"); 21 encode("\ud83d\udca9", "%949%DA3", "poo"); 22 encode("\uE7C7", "%815%F47", "Ranges pointer special case"); 23 encode("\uE7C8", "%836%C80", "legacy ICU special case 1"); 24 encode("\u2026", "%A1%AD", "legacy ICU special case 2"); 25 encode("\uFF5E", "%A1%AB", "legacy ICU special case 3"); 26 27 // GB18030-2022 28 encode("\uFE10", "%A6%D9", "GB18030-2022 1"); 29 encode("\uFE12", "%A6%DA", "GB18030-2022 2"); 30 encode("\uFE11", "%A6%DB", "GB18030-2022 3"); 31 encode("\uFE13", "%A6%DC", "GB18030-2022 4"); 32 encode("\uFE14", "%A6%DD", "GB18030-2022 5"); 33 encode("\uFE15", "%A6%DE", "GB18030-2022 6"); 34 encode("\uFE16", "%A6%DF", "GB18030-2022 7"); 35 encode("\uFE17", "%A6%EC", "GB18030-2022 8"); 36 encode("\uFE18", "%A6%ED", "GB18030-2022 9"); 37 encode("\uFE19", "%A6%F3", "GB18030-2022 10"); 38 encode("\u9FB4", "%FEY", "GB18030-2022 11"); 39 encode("\u9FB5", "%FEa", "GB18030-2022 12"); 40 encode("\u9FB6", "%FEf", "GB18030-2022 13"); 41 encode("\u9FB7", "%FEg", "GB18030-2022 14"); 42 encode("\u9FB8", "%FEm", "GB18030-2022 15"); 43 encode("\u9FB9", "%FE~", "GB18030-2022 16"); 44 encode("\u9FBA", "%FE%90", "GB18030-2022 17"); 45 encode("\u9FBB", "%FE%A0", "GB18030-2022 18"); 46 encode("\uE78D", "%A6%D9", "GB18030-2022 19"); 47 encode("\uE78E", "%A6%DA", "GB18030-2022 20"); 48 encode("\uE78F", "%A6%DB", "GB18030-2022 21"); 49 encode("\uE790", "%A6%DC", "GB18030-2022 22"); 50 encode("\uE791", "%A6%DD", "GB18030-2022 23"); 51 encode("\uE792", "%A6%DE", "GB18030-2022 24"); 52 encode("\uE793", "%A6%DF", "GB18030-2022 25"); 53 encode("\uE794", "%A6%EC", "GB18030-2022 26"); 54 encode("\uE795", "%A6%ED", "GB18030-2022 27"); 55 encode("\uE796", "%A6%F3", "GB18030-2022 28"); 56 encode("\uE81E", "%FEY", "GB18030-2022 29"); 57 encode("\uE826", "%FEa", "GB18030-2022 30"); 58 encode("\uE82B", "%FEf", "GB18030-2022 31"); 59 encode("\uE82C", "%FEg", "GB18030-2022 32"); 60 encode("\uE832", "%FEm", "GB18030-2022 33"); 61 encode("\uE843", "%FE~", "GB18030-2022 34"); 62 encode("\uE854", "%FE%90", "GB18030-2022 35"); 63 encode("\uE864", "%FE%A0", "GB18030-2022 36"); 64 65 const upperCaseNibble = x => { 66 return Math.floor(x).toString(16).toUpperCase(); 67 } 68 69 const encodePointer = pointer => { 70 const firstByte = Math.floor(pointer / 12600) + 0x81; 71 const thirdByte = Math.floor((pointer % 1260) / 10) + 0x81; 72 return "%" 73 + upperCaseNibble(firstByte / 16) 74 + upperCaseNibble(firstByte % 16) 75 + String.fromCharCode(Math.floor((pointer % 12600) / 1260) + 0x30) 76 + "%" 77 + upperCaseNibble(thirdByte / 16) 78 + upperCaseNibble(thirdByte % 16) 79 + String.fromCharCode(pointer % 10 + 0x30); 80 } 81 82 let i = 0; 83 for (const range of ranges) { 84 encode(range[1], encodePointer(range[0]), "range " + i++); 85 } 86 </script>