Text.cpp (13346B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "util/Text.h" 8 9 #include "mozilla/Assertions.h" 10 #include "mozilla/Maybe.h" 11 #include "mozilla/PodOperations.h" 12 #include "mozilla/Utf8.h" 13 14 #include <stddef.h> 15 #include <stdint.h> 16 17 #include "frontend/FrontendContext.h" // frontend::FrontendContext 18 #include "gc/GC.h" 19 #include "js/GCAPI.h" 20 #include "js/Printer.h" 21 #include "js/Utility.h" // JS::FreePolicy 22 #include "util/Unicode.h" 23 #include "vm/JSContext.h" 24 #include "vm/StringType.h" 25 26 using namespace JS; 27 using namespace js; 28 29 using mozilla::DecodeOneUtf8CodePoint; 30 using mozilla::IsAscii; 31 using mozilla::Maybe; 32 using mozilla::PodCopy; 33 using mozilla::Utf8Unit; 34 35 template <typename CharT> 36 const CharT* js_strchr_limit(const CharT* s, char16_t c, const CharT* limit) { 37 while (s < limit) { 38 if (*s == c) { 39 return s; 40 } 41 s++; 42 } 43 return nullptr; 44 } 45 46 template const Latin1Char* js_strchr_limit(const Latin1Char* s, char16_t c, 47 const Latin1Char* limit); 48 49 template const char16_t* js_strchr_limit(const char16_t* s, char16_t c, 50 const char16_t* limit); 51 52 template <typename AllocT, typename CharT> 53 static UniquePtr<CharT[], JS::FreePolicy> DuplicateStringToArenaImpl( 54 arena_id_t destArenaId, AllocT* alloc, const CharT* s, size_t n) { 55 auto ret = alloc->template make_pod_arena_array<CharT>(destArenaId, n + 1); 56 if (!ret) { 57 return nullptr; 58 } 59 PodCopy(ret.get(), s, n); 60 ret[n] = '\0'; 61 return ret; 62 } 63 64 UniqueChars js::DuplicateStringToArena(arena_id_t destArenaId, JSContext* cx, 65 const char* s, size_t n) { 66 return DuplicateStringToArenaImpl(destArenaId, cx, s, n); 67 } 68 69 static UniqueChars DuplicateStringToArena(arena_id_t destArenaId, 70 FrontendContext* fc, const char* s, 71 size_t n) { 72 return DuplicateStringToArenaImpl(destArenaId, fc->getAllocator(), s, n); 73 } 74 75 UniqueChars js::DuplicateStringToArena(arena_id_t destArenaId, JSContext* cx, 76 const char* s) { 77 return DuplicateStringToArena(destArenaId, cx, s, strlen(s)); 78 } 79 80 static UniqueChars DuplicateStringToArena(arena_id_t destArenaId, 81 FrontendContext* fc, const char* s) { 82 return DuplicateStringToArena(destArenaId, fc, s, strlen(s)); 83 } 84 85 UniqueLatin1Chars js::DuplicateStringToArena(arena_id_t destArenaId, 86 JSContext* cx, 87 const JS::Latin1Char* s, 88 size_t n) { 89 return DuplicateStringToArenaImpl(destArenaId, cx, s, n); 90 } 91 92 UniqueTwoByteChars js::DuplicateStringToArena(arena_id_t destArenaId, 93 JSContext* cx, const char16_t* s, 94 size_t n) { 95 return DuplicateStringToArenaImpl(destArenaId, cx, s, n); 96 } 97 98 static UniqueTwoByteChars DuplicateStringToArena(arena_id_t destArenaId, 99 FrontendContext* fc, 100 const char16_t* s, size_t n) { 101 return DuplicateStringToArenaImpl(destArenaId, fc->getAllocator(), s, n); 102 } 103 104 UniqueTwoByteChars js::DuplicateStringToArena(arena_id_t destArenaId, 105 JSContext* cx, 106 const char16_t* s) { 107 return DuplicateStringToArena(destArenaId, cx, s, js_strlen(s)); 108 } 109 110 static UniqueTwoByteChars DuplicateStringToArena(arena_id_t destArenaId, 111 FrontendContext* fc, 112 const char16_t* s) { 113 return DuplicateStringToArena(destArenaId, fc, s, js_strlen(s)); 114 } 115 116 UniqueChars js::DuplicateStringToArena(arena_id_t destArenaId, const char* s) { 117 return DuplicateStringToArena(destArenaId, s, strlen(s)); 118 } 119 120 UniqueChars js::DuplicateStringToArena(arena_id_t destArenaId, const char* s, 121 size_t n) { 122 UniqueChars ret(js_pod_arena_malloc<char>(destArenaId, n + 1)); 123 if (!ret) { 124 return nullptr; 125 } 126 PodCopy(ret.get(), s, n); 127 ret[n] = '\0'; 128 return ret; 129 } 130 131 UniqueLatin1Chars js::DuplicateStringToArena(arena_id_t destArenaId, 132 const JS::Latin1Char* s, 133 size_t n) { 134 UniqueLatin1Chars ret( 135 js_pod_arena_malloc<JS::Latin1Char>(destArenaId, n + 1)); 136 if (!ret) { 137 return nullptr; 138 } 139 PodCopy(ret.get(), s, n); 140 ret[n] = '\0'; 141 return ret; 142 } 143 144 UniqueTwoByteChars js::DuplicateStringToArena(arena_id_t destArenaId, 145 const char16_t* s) { 146 return DuplicateStringToArena(destArenaId, s, js_strlen(s)); 147 } 148 149 UniqueTwoByteChars js::DuplicateStringToArena(arena_id_t destArenaId, 150 const char16_t* s, size_t n) { 151 UniqueTwoByteChars ret(js_pod_arena_malloc<char16_t>(destArenaId, n + 1)); 152 if (!ret) { 153 return nullptr; 154 } 155 PodCopy(ret.get(), s, n); 156 ret[n] = '\0'; 157 return ret; 158 } 159 160 UniqueChars js::DuplicateString(JSContext* cx, const char* s, size_t n) { 161 return DuplicateStringToArena(js::MallocArena, cx, s, n); 162 } 163 164 UniqueChars js::DuplicateString(JSContext* cx, const char* s) { 165 return DuplicateStringToArena(js::MallocArena, cx, s); 166 } 167 168 UniqueChars js::DuplicateString(FrontendContext* fc, const char* s) { 169 return ::DuplicateStringToArena(js::MallocArena, fc, s); 170 } 171 172 UniqueLatin1Chars js::DuplicateString(JSContext* cx, const JS::Latin1Char* s, 173 size_t n) { 174 return DuplicateStringToArena(js::MallocArena, cx, s, n); 175 } 176 177 UniqueTwoByteChars js::DuplicateString(JSContext* cx, const char16_t* s) { 178 return DuplicateStringToArena(js::MallocArena, cx, s); 179 } 180 181 UniqueTwoByteChars js::DuplicateString(FrontendContext* fc, const char16_t* s) { 182 return ::DuplicateStringToArena(js::MallocArena, fc, s); 183 } 184 185 UniqueTwoByteChars js::DuplicateString(JSContext* cx, const char16_t* s, 186 size_t n) { 187 return DuplicateStringToArena(js::MallocArena, cx, s, n); 188 } 189 190 UniqueChars js::DuplicateString(const char* s) { 191 return DuplicateStringToArena(js::MallocArena, s); 192 } 193 194 UniqueChars js::DuplicateString(const char* s, size_t n) { 195 return DuplicateStringToArena(js::MallocArena, s, n); 196 } 197 198 UniqueLatin1Chars js::DuplicateString(const JS::Latin1Char* s, size_t n) { 199 return DuplicateStringToArena(js::MallocArena, s, n); 200 } 201 202 UniqueTwoByteChars js::DuplicateString(const char16_t* s) { 203 return DuplicateStringToArena(js::MallocArena, s); 204 } 205 206 UniqueTwoByteChars js::DuplicateString(const char16_t* s, size_t n) { 207 return DuplicateStringToArena(js::MallocArena, s, n); 208 } 209 210 char16_t* js::InflateString(JSContext* cx, const char* bytes, size_t length) { 211 char16_t* chars = cx->pod_malloc<char16_t>(length + 1); 212 if (!chars) { 213 return nullptr; 214 } 215 CopyAndInflateChars(chars, bytes, length); 216 chars[length] = '\0'; 217 return chars; 218 } 219 220 /* 221 * Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at 222 * least 4 bytes long. Return the number of UTF-8 bytes of data written. 223 */ 224 uint32_t js::OneUcs4ToUtf8Char(uint8_t* utf8Buffer, char32_t ucs4Char) { 225 MOZ_ASSERT(ucs4Char <= unicode::NonBMPMax); 226 227 if (ucs4Char < 0x80) { 228 utf8Buffer[0] = uint8_t(ucs4Char); 229 return 1; 230 } 231 232 uint32_t a = ucs4Char >> 11; 233 uint32_t utf8Length = 2; 234 while (a) { 235 a >>= 5; 236 utf8Length++; 237 } 238 239 MOZ_ASSERT(utf8Length <= 4); 240 241 uint32_t i = utf8Length; 242 while (--i) { 243 utf8Buffer[i] = uint8_t((ucs4Char & 0x3F) | 0x80); 244 ucs4Char >>= 6; 245 } 246 247 utf8Buffer[0] = uint8_t(0x100 - (1 << (8 - utf8Length)) + ucs4Char); 248 return utf8Length; 249 } 250 251 size_t js::PutEscapedStringImpl(char* buffer, size_t bufferSize, 252 GenericPrinter* out, const JSLinearString* str, 253 uint32_t quote) { 254 size_t len = str->length(); 255 AutoCheckCannotGC nogc; 256 return str->hasLatin1Chars() 257 ? PutEscapedStringImpl(buffer, bufferSize, out, 258 str->latin1Chars(nogc), len, quote) 259 : PutEscapedStringImpl(buffer, bufferSize, out, 260 str->twoByteChars(nogc), len, quote); 261 } 262 263 template <typename CharT> 264 size_t js::PutEscapedStringImpl(char* buffer, size_t bufferSize, 265 GenericPrinter* out, const CharT* chars, 266 size_t length, uint32_t quote) { 267 enum { 268 STOP, 269 FIRST_QUOTE, 270 LAST_QUOTE, 271 CHARS, 272 ESCAPE_START, 273 ESCAPE_MORE 274 } state; 275 276 MOZ_ASSERT(quote == 0 || quote == '\'' || quote == '"'); 277 MOZ_ASSERT_IF(!buffer, bufferSize == 0); 278 MOZ_ASSERT_IF(out, !buffer); 279 280 if (bufferSize == 0) { 281 buffer = nullptr; 282 } else { 283 bufferSize--; 284 } 285 286 const CharT* charsEnd = chars + length; 287 size_t n = 0; 288 state = FIRST_QUOTE; 289 unsigned shift = 0; 290 unsigned hex = 0; 291 unsigned u = 0; 292 char c = 0; /* to quell GCC warnings */ 293 294 for (;;) { 295 switch (state) { 296 case STOP: 297 goto stop; 298 case FIRST_QUOTE: 299 state = CHARS; 300 goto do_quote; 301 case LAST_QUOTE: 302 state = STOP; 303 do_quote: 304 if (quote == 0) { 305 continue; 306 } 307 c = (char)quote; 308 break; 309 case CHARS: 310 if (chars == charsEnd) { 311 state = LAST_QUOTE; 312 continue; 313 } 314 u = *chars++; 315 if (u < ' ') { 316 if (u != 0) { 317 const char* escape = strchr(js_EscapeMap, (int)u); 318 if (escape) { 319 u = escape[1]; 320 goto do_escape; 321 } 322 } 323 goto do_hex_escape; 324 } 325 if (u < 127) { 326 if (u == quote || u == '\\') { 327 goto do_escape; 328 } 329 c = (char)u; 330 } else if (u < 0x100) { 331 goto do_hex_escape; 332 } else { 333 shift = 16; 334 hex = u; 335 u = 'u'; 336 goto do_escape; 337 } 338 break; 339 do_hex_escape: 340 shift = 8; 341 hex = u; 342 u = 'x'; 343 do_escape: 344 c = '\\'; 345 state = ESCAPE_START; 346 break; 347 case ESCAPE_START: 348 MOZ_ASSERT(' ' <= u && u < 127); 349 c = (char)u; 350 state = ESCAPE_MORE; 351 break; 352 case ESCAPE_MORE: 353 if (shift == 0) { 354 state = CHARS; 355 continue; 356 } 357 shift -= 4; 358 u = 0xF & (hex >> shift); 359 c = (char)(u + (u < 10 ? '0' : 'A' - 10)); 360 break; 361 } 362 if (buffer) { 363 MOZ_ASSERT(n <= bufferSize); 364 if (n != bufferSize) { 365 buffer[n] = c; 366 } else { 367 buffer[n] = '\0'; 368 buffer = nullptr; 369 } 370 } else if (out) { 371 out->put(&c, 1); 372 } 373 n++; 374 } 375 stop: 376 if (buffer) { 377 buffer[n] = '\0'; 378 } 379 return n; 380 } 381 382 bool js::ContainsFlag(const char* str, const char* flag) { 383 size_t flaglen = strlen(flag); 384 const char* index = strstr(str, flag); 385 while (index) { 386 if ((index == str || index[-1] == ',') && 387 (index[flaglen] == 0 || index[flaglen] == ',')) { 388 return true; 389 } 390 index = strstr(index + flaglen, flag); 391 } 392 return false; 393 } 394 395 template size_t js::PutEscapedStringImpl(char* buffer, size_t bufferSize, 396 GenericPrinter* out, 397 const Latin1Char* chars, size_t length, 398 uint32_t quote); 399 400 template size_t js::PutEscapedStringImpl(char* buffer, size_t bufferSize, 401 GenericPrinter* out, const char* chars, 402 size_t length, uint32_t quote); 403 404 template size_t js::PutEscapedStringImpl(char* buffer, size_t bufferSize, 405 GenericPrinter* out, 406 const char16_t* chars, size_t length, 407 uint32_t quote); 408 409 template size_t js::PutEscapedString(char* buffer, size_t bufferSize, 410 const Latin1Char* chars, size_t length, 411 uint32_t quote); 412 413 template size_t js::PutEscapedString(char* buffer, size_t bufferSize, 414 const char16_t* chars, size_t length, 415 uint32_t quote); 416 417 size_t js::unicode::CountUTF16CodeUnits(const Utf8Unit* begin, 418 const Utf8Unit* end) { 419 MOZ_ASSERT(begin <= end); 420 421 size_t count = 0; 422 const Utf8Unit* ptr = begin; 423 while (ptr < end) { 424 count++; 425 426 Utf8Unit lead = *ptr++; 427 if (IsAscii(lead)) { 428 continue; 429 } 430 431 Maybe<char32_t> cp = DecodeOneUtf8CodePoint(lead, &ptr, end); 432 MOZ_ASSERT(cp.isSome()); 433 if (*cp > unicode::UTF16Max) { 434 // This uses surrogate pair. 435 count++; 436 } 437 } 438 MOZ_ASSERT(ptr == end, "bad code unit count in line?"); 439 440 return count; 441 }