tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

testCompileUtf8.cpp (11927B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 #include "mozilla/ArrayUtils.h"
      6 #include "mozilla/TextUtils.h"
      7 #include "mozilla/Utf8.h"
      8 
      9 #include <cstring>
     10 
     11 #include "js/CharacterEncoding.h"
     12 #include "js/CompilationAndEvaluation.h"  // JS::Compile
     13 #include "js/Exception.h"
     14 #include "js/friend/ErrorMessages.h"  // JSMSG_*
     15 #include "js/SourceText.h"
     16 #include "jsapi-tests/tests.h"
     17 #include "util/Text.h"
     18 #include "vm/ErrorReporting.h"
     19 
     20 using mozilla::ArrayEqual;
     21 using mozilla::IsAsciiHexDigit;
     22 using mozilla::Utf8Unit;
     23 
     24 static bool contains(const char* str, const char* substr) {
     25  return std::strstr(str, substr) != nullptr;
     26 }
     27 
     28 BEGIN_TEST(testUtf8BadBytes) {
     29  static const char badLeadingUnit[] = "var x = \x80";
     30  CHECK(testBadUtf8(
     31      badLeadingUnit, JSMSG_BAD_LEADING_UTF8_UNIT,
     32      [this](JS::ConstUTF8CharsZ message) {
     33        const char* chars = message.c_str();
     34        CHECK(startsWith(chars, "0x80"));
     35        CHECK(isBadLeadUnitMessage(chars));
     36        return true;
     37      },
     38      "0x80"));
     39 
     40  static const char badSecondInTwoByte[] = "var x = \xDF\x20";
     41  CHECK(testBadUtf8(
     42      badSecondInTwoByte, JSMSG_BAD_TRAILING_UTF8_UNIT,
     43      [this](JS::ConstUTF8CharsZ message) {
     44        const char* chars = message.c_str();
     45        CHECK(isBadTrailingBytesMessage(chars));
     46        CHECK(contains(chars, "0x20"));
     47        return true;
     48      },
     49      "0xDF 0x20"));
     50 
     51  static const char badSecondInThreeByte[] = "var x = \xEF\x17\xA7";
     52  CHECK(testBadUtf8(
     53      badSecondInThreeByte, JSMSG_BAD_TRAILING_UTF8_UNIT,
     54      [this](JS::ConstUTF8CharsZ message) {
     55        const char* chars = message.c_str();
     56        CHECK(isBadTrailingBytesMessage(chars));
     57        CHECK(contains(chars, "0x17"));
     58        return true;
     59      },
     60      // Validating stops with the first invalid code unit and
     61      // shouldn't go beyond that.
     62      "0xEF 0x17"));
     63 
     64  static const char lengthTwoTooShort[] = "var x = \xDF";
     65  CHECK(testBadUtf8(
     66      lengthTwoTooShort, JSMSG_NOT_ENOUGH_CODE_UNITS,
     67      [this](JS::ConstUTF8CharsZ message) {
     68        const char* chars = message.c_str();
     69        CHECK(isNotEnoughUnitsMessage(chars));
     70        CHECK(contains(chars, "0xDF"));
     71        CHECK(contains(chars, " 1 byte, but 0 bytes were present"));
     72        return true;
     73      },
     74      "0xDF"));
     75 
     76  static const char forbiddenHighSurrogate[] = "var x = \xED\xA2\x87";
     77  CHECK(testBadUtf8(
     78      forbiddenHighSurrogate, JSMSG_FORBIDDEN_UTF8_CODE_POINT,
     79      [this](JS::ConstUTF8CharsZ message) {
     80        const char* chars = message.c_str();
     81        CHECK(isSurrogateMessage(chars));
     82        CHECK(contains(chars, "0xD887"));
     83        return true;
     84      },
     85      "0xED 0xA2 0x87"));
     86 
     87  static const char forbiddenLowSurrogate[] = "var x = \xED\xB7\xAF";
     88  CHECK(testBadUtf8(
     89      forbiddenLowSurrogate, JSMSG_FORBIDDEN_UTF8_CODE_POINT,
     90      [this](JS::ConstUTF8CharsZ message) {
     91        const char* chars = message.c_str();
     92        CHECK(isSurrogateMessage(chars));
     93        CHECK(contains(chars, "0xDDEF"));
     94        return true;
     95      },
     96      "0xED 0xB7 0xAF"));
     97 
     98  static const char oneTooBig[] = "var x = \xF4\x90\x80\x80";
     99  CHECK(testBadUtf8(
    100      oneTooBig, JSMSG_FORBIDDEN_UTF8_CODE_POINT,
    101      [this](JS::ConstUTF8CharsZ message) {
    102        const char* chars = message.c_str();
    103        CHECK(isTooBigMessage(chars));
    104        CHECK(contains(chars, "0x110000"));
    105        return true;
    106      },
    107      "0xF4 0x90 0x80 0x80"));
    108 
    109  static const char notShortestFormZero[] = "var x = \xC0\x80";
    110  CHECK(testBadUtf8(
    111      notShortestFormZero, JSMSG_FORBIDDEN_UTF8_CODE_POINT,
    112      [this](JS::ConstUTF8CharsZ message) {
    113        const char* chars = message.c_str();
    114        CHECK(isNotShortestFormMessage(chars));
    115        CHECK(startsWith(chars, "0x0 isn't "));
    116        return true;
    117      },
    118      "0xC0 0x80"));
    119 
    120  static const char notShortestFormNonzero[] = "var x = \xE0\x87\x80";
    121  CHECK(testBadUtf8(
    122      notShortestFormNonzero, JSMSG_FORBIDDEN_UTF8_CODE_POINT,
    123      [this](JS::ConstUTF8CharsZ message) {
    124        const char* chars = message.c_str();
    125        CHECK(isNotShortestFormMessage(chars));
    126        CHECK(startsWith(chars, "0x1C0 isn't "));
    127        return true;
    128      },
    129      "0xE0 0x87 0x80"));
    130 
    131  return true;
    132 }
    133 
    134 static constexpr size_t LengthOfByte = js_strlen("0xFF");
    135 
    136 static bool startsWithByte(const char* str) {
    137  return str[0] == '0' && str[1] == 'x' && IsAsciiHexDigit(str[2]) &&
    138         IsAsciiHexDigit(str[3]);
    139 }
    140 
    141 static bool startsWith(const char* str, const char* prefix) {
    142  return std::strncmp(prefix, str, strlen(prefix)) == 0;
    143 }
    144 
    145 static bool equals(const char* str, const char* expected) {
    146  return std::strcmp(str, expected) == 0;
    147 }
    148 
    149 static bool isBadLeadUnitMessage(const char* str) {
    150  return startsWithByte(str) &&
    151         equals(str + LengthOfByte,
    152                " byte doesn't begin a valid UTF-8 code point");
    153 }
    154 
    155 static bool isBadTrailingBytesMessage(const char* str) {
    156  return startsWith(str, "bad trailing UTF-8 byte ");
    157 }
    158 
    159 static bool isNotEnoughUnitsMessage(const char* str) {
    160  return startsWithByte(str) &&
    161         startsWith(str + LengthOfByte, " byte in UTF-8 must be followed by ");
    162 }
    163 
    164 static bool isForbiddenCodePointMessage(const char* str) {
    165  return contains(str, "isn't a valid code point because");
    166 }
    167 
    168 static bool isSurrogateMessage(const char* str) {
    169  return isForbiddenCodePointMessage(str) &&
    170         contains(str, " it's a UTF-16 surrogate");
    171 }
    172 
    173 static bool isTooBigMessage(const char* str) {
    174  return isForbiddenCodePointMessage(str) &&
    175         contains(str, "the maximum code point is U+10FFFF");
    176 }
    177 
    178 static bool isNotShortestFormMessage(const char* str) {
    179  return isForbiddenCodePointMessage(str) &&
    180         contains(str, "it wasn't encoded in shortest possible form");
    181 }
    182 
    183 template <size_t N, typename TestMessage>
    184 bool testBadUtf8(const char (&chars)[N], unsigned errorNumber,
    185                 TestMessage testMessage, const char* badBytes) {
    186  JS::Rooted<JSScript*> script(cx);
    187  {
    188    JS::CompileOptions options(cx);
    189 
    190    JS::SourceText<mozilla::Utf8Unit> srcBuf;
    191    CHECK(srcBuf.init(cx, chars, N - 1, JS::SourceOwnership::Borrowed));
    192 
    193    script = JS::Compile(cx, options, srcBuf);
    194    CHECK(!script);
    195  }
    196 
    197  JS::ExceptionStack exnStack(cx);
    198  CHECK(JS::StealPendingExceptionStack(cx, &exnStack));
    199 
    200  JS::ErrorReportBuilder report(cx);
    201  CHECK(report.init(cx, exnStack, JS::ErrorReportBuilder::WithSideEffects));
    202 
    203  const auto* errorReport = report.report();
    204 
    205  CHECK(errorReport->errorNumber == errorNumber);
    206 
    207  CHECK(testMessage(errorReport->message()));
    208 
    209  {
    210    const auto& notes = errorReport->notes;
    211    CHECK(notes != nullptr);
    212 
    213    auto iter = notes->begin();
    214    CHECK(iter != notes->end());
    215 
    216    const char* noteMessage = (*iter)->message().c_str();
    217 
    218    // The prefix ought always be the same.
    219    static constexpr char expectedPrefix[] =
    220        "the code units comprising this invalid code point were: ";
    221    constexpr size_t expectedPrefixLen = js_strlen(expectedPrefix);
    222 
    223    CHECK(startsWith(noteMessage, expectedPrefix));
    224 
    225    // The end of the prefix is the bad bytes.
    226    CHECK(equals(noteMessage + expectedPrefixLen, badBytes));
    227 
    228    ++iter;
    229    CHECK(iter == notes->end());
    230  }
    231 
    232  static constexpr char16_t expectedContext[] = u"var x = ";
    233  constexpr size_t expectedContextLen = js_strlen(expectedContext);
    234 
    235  const char16_t* lineOfContext = errorReport->linebuf();
    236  size_t lineOfContextLength = errorReport->linebufLength();
    237 
    238  CHECK(lineOfContext[lineOfContextLength] == '\0');
    239  CHECK(lineOfContextLength == expectedContextLen);
    240 
    241  CHECK(std::memcmp(lineOfContext, expectedContext,
    242                    expectedContextLen * sizeof(char16_t)) == 0);
    243 
    244  return true;
    245 }
    246 END_TEST(testUtf8BadBytes)
    247 
    248 BEGIN_TEST(testMultiUnitUtf8InWindow) {
    249  static const char firstInWindowIsMultiUnit[] =
    250      "\xCF\x80\xCF\x80 = 6.283185307; @ bad starts HERE:\x80\xFF\xFF";
    251  CHECK(testContext(firstInWindowIsMultiUnit,
    252                    u"ππ = 6.283185307; @ bad starts HERE:"));
    253 
    254  static const char atTokenOffsetIsMulti[] = "var z = 💯";
    255  CHECK(testContext(atTokenOffsetIsMulti, u"var z = 💯"));
    256 
    257  static const char afterTokenOffsetIsMulti[] = "var z = @💯💯💯X";
    258  CHECK(testContext(afterTokenOffsetIsMulti, u"var z = @💯💯💯X"));
    259 
    260  static const char atEndIsMulti[] = "var z = @@💯💯💯";
    261  CHECK(testContext(atEndIsMulti, u"var z = @@💯💯💯"));
    262 
    263  return true;
    264 }
    265 
    266 template <size_t N, size_t ContextLenWithNull>
    267 bool testContext(const char (&chars)[N],
    268                 const char16_t (&expectedContext)[ContextLenWithNull]) {
    269  JS::Rooted<JSScript*> script(cx);
    270  {
    271    JS::CompileOptions options(cx);
    272 
    273    JS::SourceText<mozilla::Utf8Unit> srcBuf;
    274    CHECK(srcBuf.init(cx, chars, N - 1, JS::SourceOwnership::Borrowed));
    275 
    276    script = JS::Compile(cx, options, srcBuf);
    277    CHECK(!script);
    278  }
    279 
    280  JS::ExceptionStack exnStack(cx);
    281  CHECK(JS::StealPendingExceptionStack(cx, &exnStack));
    282 
    283  JS::ErrorReportBuilder report(cx);
    284  CHECK(report.init(cx, exnStack, JS::ErrorReportBuilder::WithSideEffects));
    285 
    286  const auto* errorReport = report.report();
    287 
    288  CHECK(errorReport->errorNumber == JSMSG_ILLEGAL_CHARACTER);
    289 
    290  const char16_t* lineOfContext = errorReport->linebuf();
    291  size_t lineOfContextLength = errorReport->linebufLength();
    292 
    293  CHECK(lineOfContext[lineOfContextLength] == '\0');
    294  CHECK(lineOfContextLength == ContextLenWithNull - 1);
    295 
    296  CHECK(ArrayEqual(lineOfContext, expectedContext, ContextLenWithNull));
    297 
    298  return true;
    299 }
    300 END_TEST(testMultiUnitUtf8InWindow)
    301 
    302 BEGIN_TEST(testCompileJsonModule) {
    303  static const char chars[] = "{ \"a\": 1, \"b\": 2, \"c\": \"foo\" }";
    304  JS::Rooted<JSObject*> module(cx);
    305  {
    306    JS::CompileOptions options(cx);
    307 
    308    JS::SourceText<mozilla::Utf8Unit> srcBuf;
    309    CHECK(srcBuf.init(cx, chars, js_strlen(chars),
    310                      JS::SourceOwnership::Borrowed));
    311 
    312    module = JS::CompileJsonModule(cx, options, srcBuf);
    313    CHECK(module);
    314  }
    315 
    316  return true;
    317 }
    318 END_TEST(testCompileJsonModule)
    319 
    320 BEGIN_TEST(testCompileJsonModuleInvalidJson) {
    321  static const char chars[] = "{ \"a\": 1, \"b\": 2, \"c\":";
    322  JS::Rooted<JSObject*> module(cx);
    323  {
    324    JS::CompileOptions options(cx);
    325 
    326    JS::SourceText<mozilla::Utf8Unit> srcBuf;
    327    CHECK(srcBuf.init(cx, chars, js_strlen(chars),
    328                      JS::SourceOwnership::Borrowed));
    329 
    330    module = JS::CompileJsonModule(cx, options, srcBuf);
    331    CHECK(!module);
    332  }
    333 
    334  JS::ExceptionStack exnStack(cx);
    335  CHECK(JS::StealPendingExceptionStack(cx, &exnStack));
    336 
    337  JS::ErrorReportBuilder report(cx);
    338  CHECK(report.init(cx, exnStack, JS::ErrorReportBuilder::WithSideEffects));
    339 
    340  const auto* errorReport = report.report();
    341  CHECK(errorReport->errorNumber == JSMSG_JSON_BAD_PARSE);
    342  CHECK(contains(errorReport->message().c_str(),
    343                 "JSON.parse: unexpected end of data"));
    344 
    345  return true;
    346 }
    347 END_TEST(testCompileJsonModuleInvalidJson)
    348 
    349 BEGIN_TEST(testCompileJsonModuleBadUtf8) {
    350  static const char chars[] = "{ \"a\": 1, \"b\": 2, \"c\": \"\xDF\x20\" }";
    351  JS::Rooted<JSObject*> module(cx);
    352  {
    353    JS::CompileOptions options(cx);
    354 
    355    JS::SourceText<mozilla::Utf8Unit> srcBuf;
    356    CHECK(srcBuf.init(cx, chars, js_strlen(chars),
    357                      JS::SourceOwnership::Borrowed));
    358 
    359    module = JS::CompileJsonModule(cx, options, srcBuf);
    360    CHECK(!module);
    361  }
    362 
    363  JS::ExceptionStack exnStack(cx);
    364  CHECK(JS::StealPendingExceptionStack(cx, &exnStack));
    365 
    366  JS::ErrorReportBuilder report(cx);
    367  CHECK(report.init(cx, exnStack, JS::ErrorReportBuilder::WithSideEffects));
    368 
    369  const auto* errorReport = report.report();
    370  CHECK(errorReport->errorNumber == JSMSG_MALFORMED_UTF8_CHAR);
    371  CHECK(contains(errorReport->message().c_str(),
    372                 "malformed UTF-8 character sequence at offset"));
    373 
    374  return true;
    375 }
    376 END_TEST(testCompileJsonModuleBadUtf8)