tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

uc-005.js (6134B)


      1 /* -*- indent-tabs-mode: nil; js-indent-level: 2 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 /*
      7 *
      8 * Date:    15 July 2002
      9 * SUMMARY: Testing identifiers with double-byte names
     10 * See http://bugzilla.mozilla.org/show_bug.cgi?id=58274
     11 *
     12 * Here is a sample of the problem:
     13 *
     14 *    js> function f\u02B1 () {}
     15 *
     16 *    js> f\u02B1.toSource();
     17 *    function f¦() {}
     18 *
     19 *    js> f\u02B1.toSource().toSource();
     20 *    (new String("function f\xB1() {}"))
     21 *
     22 *
     23 * See how the high-byte information (the 02) has been lost?
     24 * The same thing was happening with the toString() method:
     25 *
     26 *    js> f\u02B1.toString();
     27 *
     28 *    function f¦() {
     29 *    }
     30 *
     31 *    js> f\u02B1.toString().toSource();
     32 *    (new String("\nfunction f\xB1() {\n}\n"))
     33 *
     34 */
     35 //-----------------------------------------------------------------------------
     36 var UBound = 0;
     37 var BUGNUMBER = 58274;
     38 var summary = 'Testing identifiers with double-byte names';
     39 var status = '';
     40 var statusitems = [];
     41 var actual = '';
     42 var actualvalues = [];
     43 var expect= '';
     44 var expectedvalues = [];
     45 
     46 
     47 /*
     48 * Define a function that uses double-byte identifiers in
     49 * "every possible way"
     50 *
     51 * Then recover each double-byte identifier via f.toString().
     52 * To make this easier, put a 'Z' token before every one.
     53 *
     54 * Our eval string will be:
     55 *
     56 * sEval = "function Z\u02b1(Z\u02b2, b) {
     57 *          try { Z\u02b3 : var Z\u02b4 = Z\u02b1; }
     58 *          catch (Z\u02b5) { for (var Z\u02b6 in Z\u02b5)
     59 *          {for (1; 1<0; Z\u02b7++) {new Array()[Z\u02b6] = 1;} };} }";
     60 *
     61 * It will be helpful to build this string in stages:
     62 */
     63 var s0 =  'function Z';
     64 var s1 =  '\u02b1(Z';
     65 var s2 =  '\u02b2, b) {try { Z';
     66 var s3 =  '\u02b3 : var Z';
     67 var s4 =  '\u02b4 = Z';
     68 var s5 =  '\u02b1; } catch (Z'
     69  var s6 =  '\u02b5) { for (var Z';
     70 var s7 =  '\u02b6 in Z';
     71 var s8 =  '\u02b5){for (1; 1<0; Z';
     72 var s9 =  '\u02b7++) {new Array()[Z';
     73 var s10 = '\u02b6] = 1;} };} }';
     74 
     75 
     76 /*
     77 * Concatenate these and eval() to create the function Z\u02b1
     78 */
     79 var sEval = s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10;
     80 eval(sEval);
     81 
     82 
     83 /*
     84 * Recover all the double-byte identifiers via Z\u02b1.toString().
     85 * We'll recover the 1st one as arrID[1], the 2nd one as arrID[2],
     86 * and so on ...
     87 */
     88 var arrID = getIdentifiers(Z\u02b1);
     89 
     90 
     91 /*
     92 * Now check that we got back what we put in -
     93 */
     94 status = inSection(1);
     95 actual = arrID[1];
     96 expect = s1.charAt(0);
     97 addThis();
     98 
     99 status = inSection(2);
    100 actual = arrID[2];
    101 expect = s2.charAt(0);
    102 addThis();
    103 
    104 status = inSection(3);
    105 actual = arrID[3];
    106 expect = s3.charAt(0);
    107 addThis();
    108 
    109 status = inSection(4);
    110 actual = arrID[4];
    111 expect = s4.charAt(0);
    112 addThis();
    113 
    114 status = inSection(5);
    115 actual = arrID[5];
    116 expect = s5.charAt(0);
    117 addThis();
    118 
    119 status = inSection(6);
    120 actual = arrID[6];
    121 expect = s6.charAt(0);
    122 addThis();
    123 
    124 status = inSection(7);
    125 actual = arrID[7];
    126 expect = s7.charAt(0);
    127 addThis();
    128 
    129 status = inSection(8);
    130 actual = arrID[8];
    131 expect = s8.charAt(0);
    132 addThis();
    133 
    134 status = inSection(9);
    135 actual = arrID[9];
    136 expect = s9.charAt(0);
    137 addThis();
    138 
    139 status = inSection(10);
    140 actual = arrID[10];
    141 expect = s10.charAt(0);
    142 addThis();
    143 
    144 
    145 
    146 
    147 //-----------------------------------------------------------------------------
    148 test();
    149 //-----------------------------------------------------------------------------
    150 
    151 
    152 
    153 /*
    154 * Goal: recover the double-byte identifiers from f.toString()
    155 * by getting the very next character after each 'Z' token.
    156 *
    157 * The return value will be an array |arr| indexed such that
    158 * |arr[1]| is the 1st identifier, |arr[2]| the 2nd, and so on.
    159 *
    160 * Note, however, f.toString() is implementation-independent.
    161 * For example, it may begin with '\nfunction' instead of 'function'.
    162 *
    163 * Rhino uses a Unicode representation for f.toString(); whereas
    164 * SpiderMonkey uses an ASCII representation, putting escape sequences
    165 * for non-ASCII characters. For example, if a function is called f\u02B1,
    166 * then in Rhino the toString() method will present a 2-character Unicode
    167 * string for its name, whereas SpiderMonkey will present a 7-character
    168 * ASCII string for its name: the string literal 'f\u02B1'.
    169 *
    170 * So we force the lexer to condense the string before we use it.
    171 * This will give uniform results in Rhino and SpiderMonkey.
    172 */
    173 function getIdentifiers(f)
    174 {
    175  var str = condenseStr(f.toString());
    176  var arr = str.split('Z');
    177 
    178  /*
    179   * The identifiers are the 1st char of each split substring
    180   * EXCEPT the first one, which is just ('\n' +) 'function '.
    181   *
    182   * Thus note the 1st identifier will be stored in |arr[1]|,
    183   * the 2nd one in |arr[2]|, etc., making the indexing easy -
    184   */
    185  for (i in arr)
    186    arr[i] = arr[i].charAt(0);
    187  return arr;
    188 }
    189 
    190 
    191 /*
    192 * This function is the opposite of a functions like escape(), which take
    193 * Unicode characters and return escape sequences for them. Here, we force
    194 * the lexer to turn escape sequences back into single characters.
    195 *
    196 * Note we can't simply do |eval(str)|, since in practice |str| will be an
    197 * identifier somewhere in the program (e.g. a function name); thus |eval(str)|
    198 * would return the object that the identifier represents: not what we want.
    199 *
    200 * So we surround |str| lexicographically with quotes to force the lexer to
    201 * evaluate it as a string. Have to strip out any linefeeds first, however -
    202 */
    203 function condenseStr(str)
    204 {
    205  /*
    206   * You won't be able to do the next step if |str| has
    207   * any carriage returns or linefeeds in it. For example:
    208   *
    209   *  js> eval("'" + '\nHello' + "'");
    210   *  1: SyntaxError: unterminated string literal:
    211   *  1: '
    212   *  1: ^
    213   *
    214   * So replace them with the empty string -
    215   */
    216  str = str.replace(/[\r\n]/g, '')
    217    return eval("'" + str + "'")
    218    }
    219 
    220 
    221 function addThis()
    222 {
    223  statusitems[UBound] = status;
    224  actualvalues[UBound] = actual;
    225  expectedvalues[UBound] = expect;
    226  UBound++;
    227 }
    228 
    229 
    230 function test()
    231 {
    232  printBugNumber(BUGNUMBER);
    233  printStatus(summary);
    234 
    235  for (var i=0; i<UBound; i++)
    236  {
    237    reportCompare(expectedvalues[i], actualvalues[i], statusitems[i]);
    238  }
    239 }