testCompileUtf8.cpp (11927B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 #include "mozilla/ArrayUtils.h" 6 #include "mozilla/TextUtils.h" 7 #include "mozilla/Utf8.h" 8 9 #include <cstring> 10 11 #include "js/CharacterEncoding.h" 12 #include "js/CompilationAndEvaluation.h" // JS::Compile 13 #include "js/Exception.h" 14 #include "js/friend/ErrorMessages.h" // JSMSG_* 15 #include "js/SourceText.h" 16 #include "jsapi-tests/tests.h" 17 #include "util/Text.h" 18 #include "vm/ErrorReporting.h" 19 20 using mozilla::ArrayEqual; 21 using mozilla::IsAsciiHexDigit; 22 using mozilla::Utf8Unit; 23 24 static bool contains(const char* str, const char* substr) { 25 return std::strstr(str, substr) != nullptr; 26 } 27 28 BEGIN_TEST(testUtf8BadBytes) { 29 static const char badLeadingUnit[] = "var x = \x80"; 30 CHECK(testBadUtf8( 31 badLeadingUnit, JSMSG_BAD_LEADING_UTF8_UNIT, 32 [this](JS::ConstUTF8CharsZ message) { 33 const char* chars = message.c_str(); 34 CHECK(startsWith(chars, "0x80")); 35 CHECK(isBadLeadUnitMessage(chars)); 36 return true; 37 }, 38 "0x80")); 39 40 static const char badSecondInTwoByte[] = "var x = \xDF\x20"; 41 CHECK(testBadUtf8( 42 badSecondInTwoByte, JSMSG_BAD_TRAILING_UTF8_UNIT, 43 [this](JS::ConstUTF8CharsZ message) { 44 const char* chars = message.c_str(); 45 CHECK(isBadTrailingBytesMessage(chars)); 46 CHECK(contains(chars, "0x20")); 47 return true; 48 }, 49 "0xDF 0x20")); 50 51 static const char badSecondInThreeByte[] = "var x = \xEF\x17\xA7"; 52 CHECK(testBadUtf8( 53 badSecondInThreeByte, JSMSG_BAD_TRAILING_UTF8_UNIT, 54 [this](JS::ConstUTF8CharsZ message) { 55 const char* chars = message.c_str(); 56 CHECK(isBadTrailingBytesMessage(chars)); 57 CHECK(contains(chars, "0x17")); 58 return true; 59 }, 60 // Validating stops with the first invalid code unit and 61 // shouldn't go beyond that. 62 "0xEF 0x17")); 63 64 static const char lengthTwoTooShort[] = "var x = \xDF"; 65 CHECK(testBadUtf8( 66 lengthTwoTooShort, JSMSG_NOT_ENOUGH_CODE_UNITS, 67 [this](JS::ConstUTF8CharsZ message) { 68 const char* chars = message.c_str(); 69 CHECK(isNotEnoughUnitsMessage(chars)); 70 CHECK(contains(chars, "0xDF")); 71 CHECK(contains(chars, " 1 byte, but 0 bytes were present")); 72 return true; 73 }, 74 "0xDF")); 75 76 static const char forbiddenHighSurrogate[] = "var x = \xED\xA2\x87"; 77 CHECK(testBadUtf8( 78 forbiddenHighSurrogate, JSMSG_FORBIDDEN_UTF8_CODE_POINT, 79 [this](JS::ConstUTF8CharsZ message) { 80 const char* chars = message.c_str(); 81 CHECK(isSurrogateMessage(chars)); 82 CHECK(contains(chars, "0xD887")); 83 return true; 84 }, 85 "0xED 0xA2 0x87")); 86 87 static const char forbiddenLowSurrogate[] = "var x = \xED\xB7\xAF"; 88 CHECK(testBadUtf8( 89 forbiddenLowSurrogate, JSMSG_FORBIDDEN_UTF8_CODE_POINT, 90 [this](JS::ConstUTF8CharsZ message) { 91 const char* chars = message.c_str(); 92 CHECK(isSurrogateMessage(chars)); 93 CHECK(contains(chars, "0xDDEF")); 94 return true; 95 }, 96 "0xED 0xB7 0xAF")); 97 98 static const char oneTooBig[] = "var x = \xF4\x90\x80\x80"; 99 CHECK(testBadUtf8( 100 oneTooBig, JSMSG_FORBIDDEN_UTF8_CODE_POINT, 101 [this](JS::ConstUTF8CharsZ message) { 102 const char* chars = message.c_str(); 103 CHECK(isTooBigMessage(chars)); 104 CHECK(contains(chars, "0x110000")); 105 return true; 106 }, 107 "0xF4 0x90 0x80 0x80")); 108 109 static const char notShortestFormZero[] = "var x = \xC0\x80"; 110 CHECK(testBadUtf8( 111 notShortestFormZero, JSMSG_FORBIDDEN_UTF8_CODE_POINT, 112 [this](JS::ConstUTF8CharsZ message) { 113 const char* chars = message.c_str(); 114 CHECK(isNotShortestFormMessage(chars)); 115 CHECK(startsWith(chars, "0x0 isn't ")); 116 return true; 117 }, 118 "0xC0 0x80")); 119 120 static const char notShortestFormNonzero[] = "var x = \xE0\x87\x80"; 121 CHECK(testBadUtf8( 122 notShortestFormNonzero, JSMSG_FORBIDDEN_UTF8_CODE_POINT, 123 [this](JS::ConstUTF8CharsZ message) { 124 const char* chars = message.c_str(); 125 CHECK(isNotShortestFormMessage(chars)); 126 CHECK(startsWith(chars, "0x1C0 isn't ")); 127 return true; 128 }, 129 "0xE0 0x87 0x80")); 130 131 return true; 132 } 133 134 static constexpr size_t LengthOfByte = js_strlen("0xFF"); 135 136 static bool startsWithByte(const char* str) { 137 return str[0] == '0' && str[1] == 'x' && IsAsciiHexDigit(str[2]) && 138 IsAsciiHexDigit(str[3]); 139 } 140 141 static bool startsWith(const char* str, const char* prefix) { 142 return std::strncmp(prefix, str, strlen(prefix)) == 0; 143 } 144 145 static bool equals(const char* str, const char* expected) { 146 return std::strcmp(str, expected) == 0; 147 } 148 149 static bool isBadLeadUnitMessage(const char* str) { 150 return startsWithByte(str) && 151 equals(str + LengthOfByte, 152 " byte doesn't begin a valid UTF-8 code point"); 153 } 154 155 static bool isBadTrailingBytesMessage(const char* str) { 156 return startsWith(str, "bad trailing UTF-8 byte "); 157 } 158 159 static bool isNotEnoughUnitsMessage(const char* str) { 160 return startsWithByte(str) && 161 startsWith(str + LengthOfByte, " byte in UTF-8 must be followed by "); 162 } 163 164 static bool isForbiddenCodePointMessage(const char* str) { 165 return contains(str, "isn't a valid code point because"); 166 } 167 168 static bool isSurrogateMessage(const char* str) { 169 return isForbiddenCodePointMessage(str) && 170 contains(str, " it's a UTF-16 surrogate"); 171 } 172 173 static bool isTooBigMessage(const char* str) { 174 return isForbiddenCodePointMessage(str) && 175 contains(str, "the maximum code point is U+10FFFF"); 176 } 177 178 static bool isNotShortestFormMessage(const char* str) { 179 return isForbiddenCodePointMessage(str) && 180 contains(str, "it wasn't encoded in shortest possible form"); 181 } 182 183 template <size_t N, typename TestMessage> 184 bool testBadUtf8(const char (&chars)[N], unsigned errorNumber, 185 TestMessage testMessage, const char* badBytes) { 186 JS::Rooted<JSScript*> script(cx); 187 { 188 JS::CompileOptions options(cx); 189 190 JS::SourceText<mozilla::Utf8Unit> srcBuf; 191 CHECK(srcBuf.init(cx, chars, N - 1, JS::SourceOwnership::Borrowed)); 192 193 script = JS::Compile(cx, options, srcBuf); 194 CHECK(!script); 195 } 196 197 JS::ExceptionStack exnStack(cx); 198 CHECK(JS::StealPendingExceptionStack(cx, &exnStack)); 199 200 JS::ErrorReportBuilder report(cx); 201 CHECK(report.init(cx, exnStack, JS::ErrorReportBuilder::WithSideEffects)); 202 203 const auto* errorReport = report.report(); 204 205 CHECK(errorReport->errorNumber == errorNumber); 206 207 CHECK(testMessage(errorReport->message())); 208 209 { 210 const auto& notes = errorReport->notes; 211 CHECK(notes != nullptr); 212 213 auto iter = notes->begin(); 214 CHECK(iter != notes->end()); 215 216 const char* noteMessage = (*iter)->message().c_str(); 217 218 // The prefix ought always be the same. 219 static constexpr char expectedPrefix[] = 220 "the code units comprising this invalid code point were: "; 221 constexpr size_t expectedPrefixLen = js_strlen(expectedPrefix); 222 223 CHECK(startsWith(noteMessage, expectedPrefix)); 224 225 // The end of the prefix is the bad bytes. 226 CHECK(equals(noteMessage + expectedPrefixLen, badBytes)); 227 228 ++iter; 229 CHECK(iter == notes->end()); 230 } 231 232 static constexpr char16_t expectedContext[] = u"var x = "; 233 constexpr size_t expectedContextLen = js_strlen(expectedContext); 234 235 const char16_t* lineOfContext = errorReport->linebuf(); 236 size_t lineOfContextLength = errorReport->linebufLength(); 237 238 CHECK(lineOfContext[lineOfContextLength] == '\0'); 239 CHECK(lineOfContextLength == expectedContextLen); 240 241 CHECK(std::memcmp(lineOfContext, expectedContext, 242 expectedContextLen * sizeof(char16_t)) == 0); 243 244 return true; 245 } 246 END_TEST(testUtf8BadBytes) 247 248 BEGIN_TEST(testMultiUnitUtf8InWindow) { 249 static const char firstInWindowIsMultiUnit[] = 250 "\xCF\x80\xCF\x80 = 6.283185307; @ bad starts HERE:\x80\xFF\xFF"; 251 CHECK(testContext(firstInWindowIsMultiUnit, 252 u"ππ = 6.283185307; @ bad starts HERE:")); 253 254 static const char atTokenOffsetIsMulti[] = "var z = 💯"; 255 CHECK(testContext(atTokenOffsetIsMulti, u"var z = 💯")); 256 257 static const char afterTokenOffsetIsMulti[] = "var z = @💯💯💯X"; 258 CHECK(testContext(afterTokenOffsetIsMulti, u"var z = @💯💯💯X")); 259 260 static const char atEndIsMulti[] = "var z = @@💯💯💯"; 261 CHECK(testContext(atEndIsMulti, u"var z = @@💯💯💯")); 262 263 return true; 264 } 265 266 template <size_t N, size_t ContextLenWithNull> 267 bool testContext(const char (&chars)[N], 268 const char16_t (&expectedContext)[ContextLenWithNull]) { 269 JS::Rooted<JSScript*> script(cx); 270 { 271 JS::CompileOptions options(cx); 272 273 JS::SourceText<mozilla::Utf8Unit> srcBuf; 274 CHECK(srcBuf.init(cx, chars, N - 1, JS::SourceOwnership::Borrowed)); 275 276 script = JS::Compile(cx, options, srcBuf); 277 CHECK(!script); 278 } 279 280 JS::ExceptionStack exnStack(cx); 281 CHECK(JS::StealPendingExceptionStack(cx, &exnStack)); 282 283 JS::ErrorReportBuilder report(cx); 284 CHECK(report.init(cx, exnStack, JS::ErrorReportBuilder::WithSideEffects)); 285 286 const auto* errorReport = report.report(); 287 288 CHECK(errorReport->errorNumber == JSMSG_ILLEGAL_CHARACTER); 289 290 const char16_t* lineOfContext = errorReport->linebuf(); 291 size_t lineOfContextLength = errorReport->linebufLength(); 292 293 CHECK(lineOfContext[lineOfContextLength] == '\0'); 294 CHECK(lineOfContextLength == ContextLenWithNull - 1); 295 296 CHECK(ArrayEqual(lineOfContext, expectedContext, ContextLenWithNull)); 297 298 return true; 299 } 300 END_TEST(testMultiUnitUtf8InWindow) 301 302 BEGIN_TEST(testCompileJsonModule) { 303 static const char chars[] = "{ \"a\": 1, \"b\": 2, \"c\": \"foo\" }"; 304 JS::Rooted<JSObject*> module(cx); 305 { 306 JS::CompileOptions options(cx); 307 308 JS::SourceText<mozilla::Utf8Unit> srcBuf; 309 CHECK(srcBuf.init(cx, chars, js_strlen(chars), 310 JS::SourceOwnership::Borrowed)); 311 312 module = JS::CompileJsonModule(cx, options, srcBuf); 313 CHECK(module); 314 } 315 316 return true; 317 } 318 END_TEST(testCompileJsonModule) 319 320 BEGIN_TEST(testCompileJsonModuleInvalidJson) { 321 static const char chars[] = "{ \"a\": 1, \"b\": 2, \"c\":"; 322 JS::Rooted<JSObject*> module(cx); 323 { 324 JS::CompileOptions options(cx); 325 326 JS::SourceText<mozilla::Utf8Unit> srcBuf; 327 CHECK(srcBuf.init(cx, chars, js_strlen(chars), 328 JS::SourceOwnership::Borrowed)); 329 330 module = JS::CompileJsonModule(cx, options, srcBuf); 331 CHECK(!module); 332 } 333 334 JS::ExceptionStack exnStack(cx); 335 CHECK(JS::StealPendingExceptionStack(cx, &exnStack)); 336 337 JS::ErrorReportBuilder report(cx); 338 CHECK(report.init(cx, exnStack, JS::ErrorReportBuilder::WithSideEffects)); 339 340 const auto* errorReport = report.report(); 341 CHECK(errorReport->errorNumber == JSMSG_JSON_BAD_PARSE); 342 CHECK(contains(errorReport->message().c_str(), 343 "JSON.parse: unexpected end of data")); 344 345 return true; 346 } 347 END_TEST(testCompileJsonModuleInvalidJson) 348 349 BEGIN_TEST(testCompileJsonModuleBadUtf8) { 350 static const char chars[] = "{ \"a\": 1, \"b\": 2, \"c\": \"\xDF\x20\" }"; 351 JS::Rooted<JSObject*> module(cx); 352 { 353 JS::CompileOptions options(cx); 354 355 JS::SourceText<mozilla::Utf8Unit> srcBuf; 356 CHECK(srcBuf.init(cx, chars, js_strlen(chars), 357 JS::SourceOwnership::Borrowed)); 358 359 module = JS::CompileJsonModule(cx, options, srcBuf); 360 CHECK(!module); 361 } 362 363 JS::ExceptionStack exnStack(cx); 364 CHECK(JS::StealPendingExceptionStack(cx, &exnStack)); 365 366 JS::ErrorReportBuilder report(cx); 367 CHECK(report.init(cx, exnStack, JS::ErrorReportBuilder::WithSideEffects)); 368 369 const auto* errorReport = report.report(); 370 CHECK(errorReport->errorNumber == JSMSG_MALFORMED_UTF8_CHAR); 371 CHECK(contains(errorReport->message().c_str(), 372 "malformed UTF-8 character sequence at offset")); 373 374 return true; 375 } 376 END_TEST(testCompileJsonModuleBadUtf8)