tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

escaped-whitespace.js (2482B)


      1 // Copyright (C) 2024 Leo Balter, Jordan Harband. All rights reserved.
      2 // This code is governed by the BSD license found in the LICENSE file.
      3 
      4 /*---
      5 esid: sec-regexp.escape
      6 description: Escaped WhiteSpace characters (simple assertions)
      7 info: |
      8  EncodeForRegExpEscape ( c )
      9 
     10  ...
     11  3. Let otherPunctuators be the string-concatenation of ",-=<>#&!%:;@~'`" and the code unit 0x0022 (QUOTATION MARK).
     12  4. Let toEscape be StringToCodePoints(otherPunctuators).
     13  5. If toEscape ..., c is matched by WhiteSpace or LineTerminator, ..., then
     14    a. If c ≤ 0xFF, then
     15      i. Let hex be Number::toString(𝔽(c), 16).
     16      ii. Return the string-concatenation of the code unit 0x005C (REVERSE SOLIDUS), "x", and StringPad(hex, 2, "0", START).
     17    b. Let escaped be the empty String.
     18    c. Let codeUnits be UTF16EncodeCodePoint(c).
     19    d. For each code unit cu of codeUnits, do
     20      i. Set escaped to the string-concatenation of escaped and UnicodeEscape(cu).
     21    e. Return escaped.
     22  6. Return UTF16EncodeCodePoint(c).
     23 
     24  WhiteSpace ::
     25    <TAB> U+0009 CHARACTER TABULATION
     26    <VT> U+000B LINE TABULATION
     27    <FF> U+000C FORM FEED (FF)
     28    <ZWNBSP> U+FEFF ZERO WIDTH NO-BREAK SPACE
     29    <USP>
     30 
     31    U+0020 (SPACE) and U+00A0 (NO-BREAK SPACE) code points are part of <USP>
     32    Other USP U+202F NARROW NO-BREAK SPACE
     33 
     34  Exceptions:
     35 
     36    2. If c is the code point listed in some cell of the “Code Point” column of Table 64, then
     37    a. Return the string-concatenation of 0x005C (REVERSE SOLIDUS) and the string in the “ControlEscape” column of the row whose “Code Point” column contains c.
     38 
     39  ControlEscape, Numeric Value, Code Point, Unicode Name, Symbol
     40  t 9 U+0009 CHARACTER TABULATION <HT>
     41  n 10 U+000A LINE FEED (LF) <LF>
     42  v 11 U+000B LINE TABULATION <VT>
     43  f 12 U+000C FORM FEED (FF) <FF>
     44  r 13 U+000D CARRIAGE RETURN (CR) <CR>
     45 features: [RegExp.escape]
     46 ---*/
     47 
     48 const WhiteSpace = '\uFEFF\u0020\u00A0\u202F';
     49 
     50 assert.sameValue(RegExp.escape('\uFEFF'), '\\ufeff', `whitespace \\uFEFF is escaped correctly to \\uFEFF`);
     51 assert.sameValue(RegExp.escape('\u0020'), '\\x20', `whitespace \\u0020 is escaped correctly to \\x20`);
     52 assert.sameValue(RegExp.escape('\u00A0'), '\\xa0', `whitespace \\u00A0 is escaped correctly to \\xA0`);
     53 assert.sameValue(RegExp.escape('\u202F'), '\\u202f', `whitespace \\u202F is escaped correctly to \\u202F`);
     54 
     55 assert.sameValue(RegExp.escape(WhiteSpace), '\\ufeff\\x20\\xa0\\u202f', `whitespaces are escaped correctly`);
     56 
     57 reportCompare(0, 0);