tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

iso2022jp-encoder.js (6017B)


      1 // set up a sparse array of all unicode codepoints listed in the index
      2 // this will be used for lookup in iso2022jpEncoded
      3 var jis0208CPs = []; // index is unicode cp, value is pointer
      4 for (var p = 0; p < jis0208.length; p++) {
      5    if (jis0208[p] != null && jis0208CPs[jis0208[p]] == null) {
      6        jis0208CPs[jis0208[p]] = p;
      7    }
      8 }
      9 
     10 // set up mappings for half/full width katakana
     11 // index is a katakana index pointer, value is Unicode codepoint (dec)
     12 // this is copy-pasted from the json version of the index belonging to the Encoding spec
     13 var iso2022jpkatakana = [
     14    12290,
     15    12300,
     16    12301,
     17    12289,
     18    12539,
     19    12530,
     20    12449,
     21    12451,
     22    12453,
     23    12455,
     24    12457,
     25    12515,
     26    12517,
     27    12519,
     28    12483,
     29    12540,
     30    12450,
     31    12452,
     32    12454,
     33    12456,
     34    12458,
     35    12459,
     36    12461,
     37    12463,
     38    12465,
     39    12467,
     40    12469,
     41    12471,
     42    12473,
     43    12475,
     44    12477,
     45    12479,
     46    12481,
     47    12484,
     48    12486,
     49    12488,
     50    12490,
     51    12491,
     52    12492,
     53    12493,
     54    12494,
     55    12495,
     56    12498,
     57    12501,
     58    12504,
     59    12507,
     60    12510,
     61    12511,
     62    12512,
     63    12513,
     64    12514,
     65    12516,
     66    12518,
     67    12520,
     68    12521,
     69    12522,
     70    12523,
     71    12524,
     72    12525,
     73    12527,
     74    12531,
     75    12443,
     76    12444
     77 ];
     78 
     79 function chars2cps(chars) {
     80    // this is needed because of javascript's handling of supplementary characters
     81    // char: a string of unicode characters
     82    // returns an array of decimal code point values
     83    var haut = 0;
     84    var out = [];
     85    for (var i = 0; i < chars.length; i++) {
     86        var b = chars.charCodeAt(i);
     87        if (b < 0 || b > 0xffff) {
     88            alert(
     89                "Error in chars2cps: byte out of range " + b.toString(16) + "!"
     90            );
     91        }
     92        if (haut != 0) {
     93            if (0xdc00 <= b && b <= 0xdfff) {
     94                out.push(0x10000 + ((haut - 0xd800) << 10) + (b - 0xdc00));
     95                haut = 0;
     96                continue;
     97            } else {
     98                alert(
     99                    "Error in chars2cps: surrogate out of range " +
    100                        haut.toString(16) +
    101                        "!"
    102                );
    103                haut = 0;
    104            }
    105        }
    106        if (0xd800 <= b && b <= 0xdbff) {
    107            haut = b;
    108        } else {
    109            out.push(b);
    110        }
    111    }
    112    return out;
    113 }
    114 
    115 function iso2022jpEncoder(stream) {
    116    var cps = chars2cps(stream);
    117    var endofstream = 2000000;
    118    var out = "";
    119    var encState = "ascii";
    120    var finished = false;
    121    var cp, ptr;
    122 
    123    while (!finished) {
    124        if (cps.length == 0) cp = endofstream;
    125        else cp = cps.shift();
    126        if (cp == endofstream && encState != "ascii") {
    127            cps.unshift(cp);
    128            encState = "ascii";
    129            out += " 1B 28 42";
    130            continue;
    131        }
    132        if (cp == endofstream && encState == "ascii") {
    133            finished = true;
    134            continue;
    135        }
    136        if (
    137            (encState === "ascii" || encState === "roman") &&
    138            (cp === 0x0e || cp === 0x0f || cp === 0x1b)
    139        ) {
    140            //out += ' &#'+cp+';'
    141            // continue
    142            return null;
    143        }
    144        if (encState == "ascii" && cp >= 0x00 && cp <= 0x7f) {
    145            out += " " + cp.toString(16).toUpperCase();
    146            continue;
    147        }
    148        if (
    149            encState == "roman" &&
    150            ((cp >= 0x00 && cp <= 0x7f && cp !== 0x5c && cp !== 0x7e) ||
    151                cp == 0xa5 ||
    152                cp == 0x203e)
    153        ) {
    154            if (cp >= 0x00 && cp <= 0x7f) {
    155                // ASCII
    156                out += " " + cp.toString(16).toUpperCase();
    157                continue;
    158            }
    159            if (cp == 0xa5) {
    160                out += " 5C";
    161                continue;
    162            }
    163            if (cp == 0x203e) {
    164                out += " 7E";
    165                continue;
    166            }
    167        }
    168        if (encState != "ascii" && cp >= 0x00 && cp <= 0x7f) {
    169            cps.unshift(cp);
    170            encState = "ascii";
    171            out += " 1B 28 42";
    172            continue;
    173        }
    174        if ((cp == 0xa5 || cp == 0x203e) && encState != "roman") {
    175            cps.unshift(cp);
    176            encState = "roman";
    177            out += " 1B 28 4A";
    178            continue;
    179        }
    180        if (cp == 0x2212) cp = 0xff0d;
    181        if (cp >= 0xff61 && cp <= 0xff9f) {
    182            cp = iso2022jpkatakana[cp - 0xff61];
    183        }
    184        ptr = jis0208CPs[cp];
    185        if (ptr == null) {
    186            //out += ' &#'+cp+';'
    187            //continue
    188            return null;
    189        }
    190        if (encState != "jis0208") {
    191            cps.unshift(cp);
    192            encState = "jis0208";
    193            out += " 1B 24 42";
    194            continue;
    195        }
    196        var lead = Math.floor(ptr / 94) + 0x21;
    197        var trail = ptr % 94 + 0x21;
    198        out +=
    199            " " +
    200            lead.toString(16).toUpperCase() +
    201            " " +
    202            trail.toString(16).toUpperCase();
    203    }
    204    return out.trim();
    205 }
    206 
    207 function convertToHex(str) {
    208    // converts a string of ASCII characters to hex byte codes
    209    var out = "";
    210    var result;
    211    for (var c = 0; c < str.length; c++) {
    212        result =
    213            str
    214                .charCodeAt(c)
    215                .toString(16)
    216                .toUpperCase() + " ";
    217        out += result;
    218    }
    219    return out;
    220 }
    221 
    222 function normalizeStr(str) {
    223    var out = "";
    224    for (var c = 0; c < str.length; c++) {
    225        if (
    226            str.charAt(c) == "%" &&
    227            str.charAt(c + 1) != "%" &&
    228            str.charAt(c + 2) != "%"
    229        ) {
    230            out += String.fromCodePoint(
    231                parseInt(str.charAt(c + 1) + str.charAt(c + 2), 16)
    232            );
    233            c += 2;
    234        } else out += str.charAt(c);
    235    }
    236    var result = "";
    237    for (var o = 0; o < out.length; o++) {
    238        result +=
    239            "%" +
    240            out
    241                .charCodeAt(o)
    242                .toString(16)
    243                .toUpperCase();
    244    }
    245    return result.replace(/%1B%28%42$/, "");
    246 }