uc-005.js (6134B)
1 /* -*- indent-tabs-mode: nil; js-indent-level: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 /* 7 * 8 * Date: 15 July 2002 9 * SUMMARY: Testing identifiers with double-byte names 10 * See http://bugzilla.mozilla.org/show_bug.cgi?id=58274 11 * 12 * Here is a sample of the problem: 13 * 14 * js> function f\u02B1 () {} 15 * 16 * js> f\u02B1.toSource(); 17 * function f¦() {} 18 * 19 * js> f\u02B1.toSource().toSource(); 20 * (new String("function f\xB1() {}")) 21 * 22 * 23 * See how the high-byte information (the 02) has been lost? 24 * The same thing was happening with the toString() method: 25 * 26 * js> f\u02B1.toString(); 27 * 28 * function f¦() { 29 * } 30 * 31 * js> f\u02B1.toString().toSource(); 32 * (new String("\nfunction f\xB1() {\n}\n")) 33 * 34 */ 35 //----------------------------------------------------------------------------- 36 var UBound = 0; 37 var BUGNUMBER = 58274; 38 var summary = 'Testing identifiers with double-byte names'; 39 var status = ''; 40 var statusitems = []; 41 var actual = ''; 42 var actualvalues = []; 43 var expect= ''; 44 var expectedvalues = []; 45 46 47 /* 48 * Define a function that uses double-byte identifiers in 49 * "every possible way" 50 * 51 * Then recover each double-byte identifier via f.toString(). 52 * To make this easier, put a 'Z' token before every one. 53 * 54 * Our eval string will be: 55 * 56 * sEval = "function Z\u02b1(Z\u02b2, b) { 57 * try { Z\u02b3 : var Z\u02b4 = Z\u02b1; } 58 * catch (Z\u02b5) { for (var Z\u02b6 in Z\u02b5) 59 * {for (1; 1<0; Z\u02b7++) {new Array()[Z\u02b6] = 1;} };} }"; 60 * 61 * It will be helpful to build this string in stages: 62 */ 63 var s0 = 'function Z'; 64 var s1 = '\u02b1(Z'; 65 var s2 = '\u02b2, b) {try { Z'; 66 var s3 = '\u02b3 : var Z'; 67 var s4 = '\u02b4 = Z'; 68 var s5 = '\u02b1; } catch (Z' 69 var s6 = '\u02b5) { for (var Z'; 70 var s7 = '\u02b6 in Z'; 71 var s8 = '\u02b5){for (1; 1<0; Z'; 72 var s9 = '\u02b7++) {new Array()[Z'; 73 var s10 = '\u02b6] = 1;} };} }'; 74 75 76 /* 77 * Concatenate these and eval() to create the function Z\u02b1 78 */ 79 var sEval = s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10; 80 eval(sEval); 81 82 83 /* 84 * Recover all the double-byte identifiers via Z\u02b1.toString(). 85 * We'll recover the 1st one as arrID[1], the 2nd one as arrID[2], 86 * and so on ... 87 */ 88 var arrID = getIdentifiers(Z\u02b1); 89 90 91 /* 92 * Now check that we got back what we put in - 93 */ 94 status = inSection(1); 95 actual = arrID[1]; 96 expect = s1.charAt(0); 97 addThis(); 98 99 status = inSection(2); 100 actual = arrID[2]; 101 expect = s2.charAt(0); 102 addThis(); 103 104 status = inSection(3); 105 actual = arrID[3]; 106 expect = s3.charAt(0); 107 addThis(); 108 109 status = inSection(4); 110 actual = arrID[4]; 111 expect = s4.charAt(0); 112 addThis(); 113 114 status = inSection(5); 115 actual = arrID[5]; 116 expect = s5.charAt(0); 117 addThis(); 118 119 status = inSection(6); 120 actual = arrID[6]; 121 expect = s6.charAt(0); 122 addThis(); 123 124 status = inSection(7); 125 actual = arrID[7]; 126 expect = s7.charAt(0); 127 addThis(); 128 129 status = inSection(8); 130 actual = arrID[8]; 131 expect = s8.charAt(0); 132 addThis(); 133 134 status = inSection(9); 135 actual = arrID[9]; 136 expect = s9.charAt(0); 137 addThis(); 138 139 status = inSection(10); 140 actual = arrID[10]; 141 expect = s10.charAt(0); 142 addThis(); 143 144 145 146 147 //----------------------------------------------------------------------------- 148 test(); 149 //----------------------------------------------------------------------------- 150 151 152 153 /* 154 * Goal: recover the double-byte identifiers from f.toString() 155 * by getting the very next character after each 'Z' token. 156 * 157 * The return value will be an array |arr| indexed such that 158 * |arr[1]| is the 1st identifier, |arr[2]| the 2nd, and so on. 159 * 160 * Note, however, f.toString() is implementation-independent. 161 * For example, it may begin with '\nfunction' instead of 'function'. 162 * 163 * Rhino uses a Unicode representation for f.toString(); whereas 164 * SpiderMonkey uses an ASCII representation, putting escape sequences 165 * for non-ASCII characters. For example, if a function is called f\u02B1, 166 * then in Rhino the toString() method will present a 2-character Unicode 167 * string for its name, whereas SpiderMonkey will present a 7-character 168 * ASCII string for its name: the string literal 'f\u02B1'. 169 * 170 * So we force the lexer to condense the string before we use it. 171 * This will give uniform results in Rhino and SpiderMonkey. 172 */ 173 function getIdentifiers(f) 174 { 175 var str = condenseStr(f.toString()); 176 var arr = str.split('Z'); 177 178 /* 179 * The identifiers are the 1st char of each split substring 180 * EXCEPT the first one, which is just ('\n' +) 'function '. 181 * 182 * Thus note the 1st identifier will be stored in |arr[1]|, 183 * the 2nd one in |arr[2]|, etc., making the indexing easy - 184 */ 185 for (i in arr) 186 arr[i] = arr[i].charAt(0); 187 return arr; 188 } 189 190 191 /* 192 * This function is the opposite of a functions like escape(), which take 193 * Unicode characters and return escape sequences for them. Here, we force 194 * the lexer to turn escape sequences back into single characters. 195 * 196 * Note we can't simply do |eval(str)|, since in practice |str| will be an 197 * identifier somewhere in the program (e.g. a function name); thus |eval(str)| 198 * would return the object that the identifier represents: not what we want. 199 * 200 * So we surround |str| lexicographically with quotes to force the lexer to 201 * evaluate it as a string. Have to strip out any linefeeds first, however - 202 */ 203 function condenseStr(str) 204 { 205 /* 206 * You won't be able to do the next step if |str| has 207 * any carriage returns or linefeeds in it. For example: 208 * 209 * js> eval("'" + '\nHello' + "'"); 210 * 1: SyntaxError: unterminated string literal: 211 * 1: ' 212 * 1: ^ 213 * 214 * So replace them with the empty string - 215 */ 216 str = str.replace(/[\r\n]/g, '') 217 return eval("'" + str + "'") 218 } 219 220 221 function addThis() 222 { 223 statusitems[UBound] = status; 224 actualvalues[UBound] = actual; 225 expectedvalues[UBound] = expect; 226 UBound++; 227 } 228 229 230 function test() 231 { 232 printBugNumber(BUGNUMBER); 233 printStatus(summary); 234 235 for (var i=0; i<UBound; i++) 236 { 237 reportCompare(expectedvalues[i], actualvalues[i], statusitems[i]); 238 } 239 }