tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

testAtomizeUtf8NonAsciiLatin1CodePoint.cpp (6475B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 #include "mozilla/Maybe.h"  // mozilla::Maybe
      6 #include "mozilla/Utf8.h"  // mozilla::IsTrailingUnit, mozilla::Utf8Unit, mozilla::DecodeOneUtf8CodePoint
      7 
      8 #include <inttypes.h>  // UINT8_MAX
      9 #include <stdint.h>    // uint16_t
     10 
     11 #include "js/Exception.h"   // JS_IsExceptionPending, JS_ClearPendingException
     12 #include "js/RootingAPI.h"  // JS::Rooted, JS::MutableHandle
     13 #include "jsapi-tests/tests.h"  // BEGIN_TEST, END_TEST, CHECK
     14 #include "vm/JSAtomUtils.h"     // js::AtomizeChars, js::AtomizeUTF8Chars
     15 #include "vm/StringType.h"      // JSAtom
     16 
     17 using mozilla::DecodeOneUtf8CodePoint;
     18 using mozilla::IsAscii;
     19 using mozilla::IsTrailingUnit;
     20 using mozilla::Maybe;
     21 using mozilla::Utf8Unit;
     22 
     23 using JS::Latin1Char;
     24 using JS::MutableHandle;
     25 using JS::Rooted;
     26 
     27 BEGIN_TEST(testAtomizeTwoByteUTF8) {
     28  Rooted<JSAtom*> atom16(cx);
     29  Rooted<JSAtom*> atom8(cx);
     30 
     31  for (uint16_t i = 0; i <= UINT8_MAX; i++) {
     32    // Test cases where the first unit is ASCII.
     33    if (IsAscii(char16_t(i))) {
     34      for (uint16_t j = 0; j <= UINT8_MAX; j++) {
     35        if (IsAscii(char16_t(j))) {
     36          // If both units are ASCII, the sequence encodes a two-code point
     37          // string.
     38          if (!shouldBeTwoCodePoints(i, j, &atom16, &atom8)) {
     39            return false;
     40          }
     41        } else {
     42          // ASCII followed by non-ASCII should be invalid.
     43          if (!shouldBeInvalid(i, j)) {
     44            return false;
     45          }
     46        }
     47      }
     48 
     49      continue;
     50    }
     51 
     52    // Test remaining cases where the first unit isn't a two-byte lead.
     53    if ((i & 0b1110'0000) != 0b1100'0000) {
     54      for (uint16_t j = 0; j <= UINT8_MAX; j++) {
     55        // If the first unit isn't a two-byte lead, the sequence is invalid no
     56        // matter what the second unit is.
     57        if (!shouldBeInvalid(i, j)) {
     58          return false;
     59        }
     60      }
     61 
     62      continue;
     63    }
     64 
     65    // Test remaining cases where the first unit is the two-byte lead of a
     66    // non-Latin-1 code point.
     67    if (i >= 0b1100'0100) {
     68      for (uint16_t j = 0; j <= UINT8_MAX; j++) {
     69        if (IsTrailingUnit(Utf8Unit(static_cast<uint8_t>(j)))) {
     70          if (!shouldBeSingleNonLatin1(i, j, &atom16, &atom8)) {
     71            return false;
     72          }
     73        } else {
     74          if (!shouldBeInvalid(i, j)) {
     75            return false;
     76          }
     77        }
     78      }
     79 
     80      continue;
     81    }
     82 
     83    // Test remaining cases where the first unit is the two-byte lead of an
     84    // overlong ASCII code point.
     85    if (i < 0b1100'0010) {
     86      for (uint16_t j = 0; j <= UINT8_MAX; j++) {
     87        if (!shouldBeInvalid(i, j)) {
     88          return false;
     89        }
     90      }
     91 
     92      continue;
     93    }
     94 
     95    // Finally, test remaining cases where the first unit is the two-byte lead
     96    // of a Latin-1 code point.
     97    for (uint16_t j = 0; j <= UINT8_MAX; j++) {
     98      if (IsTrailingUnit(Utf8Unit(static_cast<uint8_t>(j)))) {
     99        if (!shouldBeSingleLatin1(i, j, &atom16, &atom8)) {
    100          return false;
    101        }
    102      } else {
    103        if (!shouldBeInvalid(i, j)) {
    104          return false;
    105        }
    106      }
    107    }
    108  }
    109 
    110  return true;
    111 }
    112 
    113 bool shouldBeTwoCodePoints(uint16_t first, uint16_t second,
    114                           MutableHandle<JSAtom*> atom16,
    115                           MutableHandle<JSAtom*> atom8) {
    116  CHECK(first <= UINT8_MAX);
    117  CHECK(second <= UINT8_MAX);
    118  CHECK(IsAscii(char16_t(first)));
    119  CHECK(IsAscii(char16_t(second)));
    120 
    121  const char16_t utf16[] = {static_cast<char16_t>(first),
    122                            static_cast<char16_t>(second)};
    123  atom16.set(js::AtomizeChars(cx, utf16, 2));
    124  CHECK(atom16);
    125  CHECK(atom16->length() == 2);
    126  CHECK(atom16->latin1OrTwoByteChar(0) == first);
    127  CHECK(atom16->latin1OrTwoByteChar(1) == second);
    128 
    129  const char utf8[] = {static_cast<char>(first), static_cast<char>(second)};
    130  atom8.set(js::AtomizeUTF8Chars(cx, utf8, 2));
    131  CHECK(atom8);
    132  CHECK(atom8->length() == 2);
    133  CHECK(atom8->latin1OrTwoByteChar(0) == first);
    134  CHECK(atom8->latin1OrTwoByteChar(1) == second);
    135 
    136  CHECK(atom16 == atom8);
    137 
    138  return true;
    139 }
    140 
    141 bool shouldBeOneCodePoint(uint16_t first, uint16_t second, char32_t v,
    142                          MutableHandle<JSAtom*> atom16,
    143                          MutableHandle<JSAtom*> atom8) {
    144  CHECK(first <= UINT8_MAX);
    145  CHECK(second <= UINT8_MAX);
    146  CHECK(v <= UINT16_MAX);
    147 
    148  const char16_t utf16[] = {static_cast<char16_t>(v)};
    149  atom16.set(js::AtomizeChars(cx, utf16, 1));
    150  CHECK(atom16);
    151  CHECK(atom16->length() == 1);
    152  CHECK(atom16->latin1OrTwoByteChar(0) == v);
    153 
    154  const char utf8[] = {static_cast<char>(first), static_cast<char>(second)};
    155  atom8.set(js::AtomizeUTF8Chars(cx, utf8, 2));
    156  CHECK(atom8);
    157  CHECK(atom8->length() == 1);
    158  CHECK(atom8->latin1OrTwoByteChar(0) == v);
    159 
    160  CHECK(atom16 == atom8);
    161 
    162  return true;
    163 }
    164 
    165 bool shouldBeSingleNonLatin1(uint16_t first, uint16_t second,
    166                             MutableHandle<JSAtom*> atom16,
    167                             MutableHandle<JSAtom*> atom8) {
    168  CHECK(first <= UINT8_MAX);
    169  CHECK(second <= UINT8_MAX);
    170 
    171  const char bytes[] = {static_cast<char>(first), static_cast<char>(second)};
    172  const char* iter = &bytes[1];
    173  Maybe<char32_t> cp =
    174      DecodeOneUtf8CodePoint(Utf8Unit(bytes[0]), &iter, bytes + 2);
    175  CHECK(cp.isSome());
    176 
    177  char32_t v = cp.value();
    178  CHECK(v > UINT8_MAX);
    179 
    180  return shouldBeOneCodePoint(first, second, v, atom16, atom8);
    181 }
    182 
    183 bool shouldBeSingleLatin1(uint16_t first, uint16_t second,
    184                          MutableHandle<JSAtom*> atom16,
    185                          MutableHandle<JSAtom*> atom8) {
    186  CHECK(first <= UINT8_MAX);
    187  CHECK(second <= UINT8_MAX);
    188 
    189  const char bytes[] = {static_cast<char>(first), static_cast<char>(second)};
    190  const char* iter = &bytes[1];
    191  Maybe<char32_t> cp =
    192      DecodeOneUtf8CodePoint(Utf8Unit(bytes[0]), &iter, bytes + 2);
    193  CHECK(cp.isSome());
    194 
    195  char32_t v = cp.value();
    196  CHECK(v <= UINT8_MAX);
    197 
    198  return shouldBeOneCodePoint(first, second, v, atom16, atom8);
    199 }
    200 
    201 bool shouldBeInvalid(uint16_t first, uint16_t second) {
    202  CHECK(first <= UINT8_MAX);
    203  CHECK(second <= UINT8_MAX);
    204 
    205  const char invalid[] = {static_cast<char>(first), static_cast<char>(second)};
    206  CHECK(!js::AtomizeUTF8Chars(cx, invalid, 2));
    207  CHECK(JS_IsExceptionPending(cx));
    208  JS_ClearPendingException(cx);
    209 
    210  return true;
    211 }
    212 END_TEST(testAtomizeTwoByteUTF8)