tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

Text.cpp (13346B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "util/Text.h"
      8 
      9 #include "mozilla/Assertions.h"
     10 #include "mozilla/Maybe.h"
     11 #include "mozilla/PodOperations.h"
     12 #include "mozilla/Utf8.h"
     13 
     14 #include <stddef.h>
     15 #include <stdint.h>
     16 
     17 #include "frontend/FrontendContext.h"  // frontend::FrontendContext
     18 #include "gc/GC.h"
     19 #include "js/GCAPI.h"
     20 #include "js/Printer.h"
     21 #include "js/Utility.h"  // JS::FreePolicy
     22 #include "util/Unicode.h"
     23 #include "vm/JSContext.h"
     24 #include "vm/StringType.h"
     25 
     26 using namespace JS;
     27 using namespace js;
     28 
     29 using mozilla::DecodeOneUtf8CodePoint;
     30 using mozilla::IsAscii;
     31 using mozilla::Maybe;
     32 using mozilla::PodCopy;
     33 using mozilla::Utf8Unit;
     34 
     35 template <typename CharT>
     36 const CharT* js_strchr_limit(const CharT* s, char16_t c, const CharT* limit) {
     37  while (s < limit) {
     38    if (*s == c) {
     39      return s;
     40    }
     41    s++;
     42  }
     43  return nullptr;
     44 }
     45 
     46 template const Latin1Char* js_strchr_limit(const Latin1Char* s, char16_t c,
     47                                           const Latin1Char* limit);
     48 
     49 template const char16_t* js_strchr_limit(const char16_t* s, char16_t c,
     50                                         const char16_t* limit);
     51 
     52 template <typename AllocT, typename CharT>
     53 static UniquePtr<CharT[], JS::FreePolicy> DuplicateStringToArenaImpl(
     54    arena_id_t destArenaId, AllocT* alloc, const CharT* s, size_t n) {
     55  auto ret = alloc->template make_pod_arena_array<CharT>(destArenaId, n + 1);
     56  if (!ret) {
     57    return nullptr;
     58  }
     59  PodCopy(ret.get(), s, n);
     60  ret[n] = '\0';
     61  return ret;
     62 }
     63 
     64 UniqueChars js::DuplicateStringToArena(arena_id_t destArenaId, JSContext* cx,
     65                                       const char* s, size_t n) {
     66  return DuplicateStringToArenaImpl(destArenaId, cx, s, n);
     67 }
     68 
     69 static UniqueChars DuplicateStringToArena(arena_id_t destArenaId,
     70                                          FrontendContext* fc, const char* s,
     71                                          size_t n) {
     72  return DuplicateStringToArenaImpl(destArenaId, fc->getAllocator(), s, n);
     73 }
     74 
     75 UniqueChars js::DuplicateStringToArena(arena_id_t destArenaId, JSContext* cx,
     76                                       const char* s) {
     77  return DuplicateStringToArena(destArenaId, cx, s, strlen(s));
     78 }
     79 
     80 static UniqueChars DuplicateStringToArena(arena_id_t destArenaId,
     81                                          FrontendContext* fc, const char* s) {
     82  return DuplicateStringToArena(destArenaId, fc, s, strlen(s));
     83 }
     84 
     85 UniqueLatin1Chars js::DuplicateStringToArena(arena_id_t destArenaId,
     86                                             JSContext* cx,
     87                                             const JS::Latin1Char* s,
     88                                             size_t n) {
     89  return DuplicateStringToArenaImpl(destArenaId, cx, s, n);
     90 }
     91 
     92 UniqueTwoByteChars js::DuplicateStringToArena(arena_id_t destArenaId,
     93                                              JSContext* cx, const char16_t* s,
     94                                              size_t n) {
     95  return DuplicateStringToArenaImpl(destArenaId, cx, s, n);
     96 }
     97 
     98 static UniqueTwoByteChars DuplicateStringToArena(arena_id_t destArenaId,
     99                                                 FrontendContext* fc,
    100                                                 const char16_t* s, size_t n) {
    101  return DuplicateStringToArenaImpl(destArenaId, fc->getAllocator(), s, n);
    102 }
    103 
    104 UniqueTwoByteChars js::DuplicateStringToArena(arena_id_t destArenaId,
    105                                              JSContext* cx,
    106                                              const char16_t* s) {
    107  return DuplicateStringToArena(destArenaId, cx, s, js_strlen(s));
    108 }
    109 
    110 static UniqueTwoByteChars DuplicateStringToArena(arena_id_t destArenaId,
    111                                                 FrontendContext* fc,
    112                                                 const char16_t* s) {
    113  return DuplicateStringToArena(destArenaId, fc, s, js_strlen(s));
    114 }
    115 
    116 UniqueChars js::DuplicateStringToArena(arena_id_t destArenaId, const char* s) {
    117  return DuplicateStringToArena(destArenaId, s, strlen(s));
    118 }
    119 
    120 UniqueChars js::DuplicateStringToArena(arena_id_t destArenaId, const char* s,
    121                                       size_t n) {
    122  UniqueChars ret(js_pod_arena_malloc<char>(destArenaId, n + 1));
    123  if (!ret) {
    124    return nullptr;
    125  }
    126  PodCopy(ret.get(), s, n);
    127  ret[n] = '\0';
    128  return ret;
    129 }
    130 
    131 UniqueLatin1Chars js::DuplicateStringToArena(arena_id_t destArenaId,
    132                                             const JS::Latin1Char* s,
    133                                             size_t n) {
    134  UniqueLatin1Chars ret(
    135      js_pod_arena_malloc<JS::Latin1Char>(destArenaId, n + 1));
    136  if (!ret) {
    137    return nullptr;
    138  }
    139  PodCopy(ret.get(), s, n);
    140  ret[n] = '\0';
    141  return ret;
    142 }
    143 
    144 UniqueTwoByteChars js::DuplicateStringToArena(arena_id_t destArenaId,
    145                                              const char16_t* s) {
    146  return DuplicateStringToArena(destArenaId, s, js_strlen(s));
    147 }
    148 
    149 UniqueTwoByteChars js::DuplicateStringToArena(arena_id_t destArenaId,
    150                                              const char16_t* s, size_t n) {
    151  UniqueTwoByteChars ret(js_pod_arena_malloc<char16_t>(destArenaId, n + 1));
    152  if (!ret) {
    153    return nullptr;
    154  }
    155  PodCopy(ret.get(), s, n);
    156  ret[n] = '\0';
    157  return ret;
    158 }
    159 
    160 UniqueChars js::DuplicateString(JSContext* cx, const char* s, size_t n) {
    161  return DuplicateStringToArena(js::MallocArena, cx, s, n);
    162 }
    163 
    164 UniqueChars js::DuplicateString(JSContext* cx, const char* s) {
    165  return DuplicateStringToArena(js::MallocArena, cx, s);
    166 }
    167 
    168 UniqueChars js::DuplicateString(FrontendContext* fc, const char* s) {
    169  return ::DuplicateStringToArena(js::MallocArena, fc, s);
    170 }
    171 
    172 UniqueLatin1Chars js::DuplicateString(JSContext* cx, const JS::Latin1Char* s,
    173                                      size_t n) {
    174  return DuplicateStringToArena(js::MallocArena, cx, s, n);
    175 }
    176 
    177 UniqueTwoByteChars js::DuplicateString(JSContext* cx, const char16_t* s) {
    178  return DuplicateStringToArena(js::MallocArena, cx, s);
    179 }
    180 
    181 UniqueTwoByteChars js::DuplicateString(FrontendContext* fc, const char16_t* s) {
    182  return ::DuplicateStringToArena(js::MallocArena, fc, s);
    183 }
    184 
    185 UniqueTwoByteChars js::DuplicateString(JSContext* cx, const char16_t* s,
    186                                       size_t n) {
    187  return DuplicateStringToArena(js::MallocArena, cx, s, n);
    188 }
    189 
    190 UniqueChars js::DuplicateString(const char* s) {
    191  return DuplicateStringToArena(js::MallocArena, s);
    192 }
    193 
    194 UniqueChars js::DuplicateString(const char* s, size_t n) {
    195  return DuplicateStringToArena(js::MallocArena, s, n);
    196 }
    197 
    198 UniqueLatin1Chars js::DuplicateString(const JS::Latin1Char* s, size_t n) {
    199  return DuplicateStringToArena(js::MallocArena, s, n);
    200 }
    201 
    202 UniqueTwoByteChars js::DuplicateString(const char16_t* s) {
    203  return DuplicateStringToArena(js::MallocArena, s);
    204 }
    205 
    206 UniqueTwoByteChars js::DuplicateString(const char16_t* s, size_t n) {
    207  return DuplicateStringToArena(js::MallocArena, s, n);
    208 }
    209 
    210 char16_t* js::InflateString(JSContext* cx, const char* bytes, size_t length) {
    211  char16_t* chars = cx->pod_malloc<char16_t>(length + 1);
    212  if (!chars) {
    213    return nullptr;
    214  }
    215  CopyAndInflateChars(chars, bytes, length);
    216  chars[length] = '\0';
    217  return chars;
    218 }
    219 
    220 /*
    221 * Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
    222 * least 4 bytes long.  Return the number of UTF-8 bytes of data written.
    223 */
    224 uint32_t js::OneUcs4ToUtf8Char(uint8_t* utf8Buffer, char32_t ucs4Char) {
    225  MOZ_ASSERT(ucs4Char <= unicode::NonBMPMax);
    226 
    227  if (ucs4Char < 0x80) {
    228    utf8Buffer[0] = uint8_t(ucs4Char);
    229    return 1;
    230  }
    231 
    232  uint32_t a = ucs4Char >> 11;
    233  uint32_t utf8Length = 2;
    234  while (a) {
    235    a >>= 5;
    236    utf8Length++;
    237  }
    238 
    239  MOZ_ASSERT(utf8Length <= 4);
    240 
    241  uint32_t i = utf8Length;
    242  while (--i) {
    243    utf8Buffer[i] = uint8_t((ucs4Char & 0x3F) | 0x80);
    244    ucs4Char >>= 6;
    245  }
    246 
    247  utf8Buffer[0] = uint8_t(0x100 - (1 << (8 - utf8Length)) + ucs4Char);
    248  return utf8Length;
    249 }
    250 
    251 size_t js::PutEscapedStringImpl(char* buffer, size_t bufferSize,
    252                                GenericPrinter* out, const JSLinearString* str,
    253                                uint32_t quote) {
    254  size_t len = str->length();
    255  AutoCheckCannotGC nogc;
    256  return str->hasLatin1Chars()
    257             ? PutEscapedStringImpl(buffer, bufferSize, out,
    258                                    str->latin1Chars(nogc), len, quote)
    259             : PutEscapedStringImpl(buffer, bufferSize, out,
    260                                    str->twoByteChars(nogc), len, quote);
    261 }
    262 
    263 template <typename CharT>
    264 size_t js::PutEscapedStringImpl(char* buffer, size_t bufferSize,
    265                                GenericPrinter* out, const CharT* chars,
    266                                size_t length, uint32_t quote) {
    267  enum {
    268    STOP,
    269    FIRST_QUOTE,
    270    LAST_QUOTE,
    271    CHARS,
    272    ESCAPE_START,
    273    ESCAPE_MORE
    274  } state;
    275 
    276  MOZ_ASSERT(quote == 0 || quote == '\'' || quote == '"');
    277  MOZ_ASSERT_IF(!buffer, bufferSize == 0);
    278  MOZ_ASSERT_IF(out, !buffer);
    279 
    280  if (bufferSize == 0) {
    281    buffer = nullptr;
    282  } else {
    283    bufferSize--;
    284  }
    285 
    286  const CharT* charsEnd = chars + length;
    287  size_t n = 0;
    288  state = FIRST_QUOTE;
    289  unsigned shift = 0;
    290  unsigned hex = 0;
    291  unsigned u = 0;
    292  char c = 0; /* to quell GCC warnings */
    293 
    294  for (;;) {
    295    switch (state) {
    296      case STOP:
    297        goto stop;
    298      case FIRST_QUOTE:
    299        state = CHARS;
    300        goto do_quote;
    301      case LAST_QUOTE:
    302        state = STOP;
    303      do_quote:
    304        if (quote == 0) {
    305          continue;
    306        }
    307        c = (char)quote;
    308        break;
    309      case CHARS:
    310        if (chars == charsEnd) {
    311          state = LAST_QUOTE;
    312          continue;
    313        }
    314        u = *chars++;
    315        if (u < ' ') {
    316          if (u != 0) {
    317            const char* escape = strchr(js_EscapeMap, (int)u);
    318            if (escape) {
    319              u = escape[1];
    320              goto do_escape;
    321            }
    322          }
    323          goto do_hex_escape;
    324        }
    325        if (u < 127) {
    326          if (u == quote || u == '\\') {
    327            goto do_escape;
    328          }
    329          c = (char)u;
    330        } else if (u < 0x100) {
    331          goto do_hex_escape;
    332        } else {
    333          shift = 16;
    334          hex = u;
    335          u = 'u';
    336          goto do_escape;
    337        }
    338        break;
    339      do_hex_escape:
    340        shift = 8;
    341        hex = u;
    342        u = 'x';
    343      do_escape:
    344        c = '\\';
    345        state = ESCAPE_START;
    346        break;
    347      case ESCAPE_START:
    348        MOZ_ASSERT(' ' <= u && u < 127);
    349        c = (char)u;
    350        state = ESCAPE_MORE;
    351        break;
    352      case ESCAPE_MORE:
    353        if (shift == 0) {
    354          state = CHARS;
    355          continue;
    356        }
    357        shift -= 4;
    358        u = 0xF & (hex >> shift);
    359        c = (char)(u + (u < 10 ? '0' : 'A' - 10));
    360        break;
    361    }
    362    if (buffer) {
    363      MOZ_ASSERT(n <= bufferSize);
    364      if (n != bufferSize) {
    365        buffer[n] = c;
    366      } else {
    367        buffer[n] = '\0';
    368        buffer = nullptr;
    369      }
    370    } else if (out) {
    371      out->put(&c, 1);
    372    }
    373    n++;
    374  }
    375 stop:
    376  if (buffer) {
    377    buffer[n] = '\0';
    378  }
    379  return n;
    380 }
    381 
    382 bool js::ContainsFlag(const char* str, const char* flag) {
    383  size_t flaglen = strlen(flag);
    384  const char* index = strstr(str, flag);
    385  while (index) {
    386    if ((index == str || index[-1] == ',') &&
    387        (index[flaglen] == 0 || index[flaglen] == ',')) {
    388      return true;
    389    }
    390    index = strstr(index + flaglen, flag);
    391  }
    392  return false;
    393 }
    394 
    395 template size_t js::PutEscapedStringImpl(char* buffer, size_t bufferSize,
    396                                         GenericPrinter* out,
    397                                         const Latin1Char* chars, size_t length,
    398                                         uint32_t quote);
    399 
    400 template size_t js::PutEscapedStringImpl(char* buffer, size_t bufferSize,
    401                                         GenericPrinter* out, const char* chars,
    402                                         size_t length, uint32_t quote);
    403 
    404 template size_t js::PutEscapedStringImpl(char* buffer, size_t bufferSize,
    405                                         GenericPrinter* out,
    406                                         const char16_t* chars, size_t length,
    407                                         uint32_t quote);
    408 
    409 template size_t js::PutEscapedString(char* buffer, size_t bufferSize,
    410                                     const Latin1Char* chars, size_t length,
    411                                     uint32_t quote);
    412 
    413 template size_t js::PutEscapedString(char* buffer, size_t bufferSize,
    414                                     const char16_t* chars, size_t length,
    415                                     uint32_t quote);
    416 
    417 size_t js::unicode::CountUTF16CodeUnits(const Utf8Unit* begin,
    418                                        const Utf8Unit* end) {
    419  MOZ_ASSERT(begin <= end);
    420 
    421  size_t count = 0;
    422  const Utf8Unit* ptr = begin;
    423  while (ptr < end) {
    424    count++;
    425 
    426    Utf8Unit lead = *ptr++;
    427    if (IsAscii(lead)) {
    428      continue;
    429    }
    430 
    431    Maybe<char32_t> cp = DecodeOneUtf8CodePoint(lead, &ptr, end);
    432    MOZ_ASSERT(cp.isSome());
    433    if (*cp > unicode::UTF16Max) {
    434      // This uses surrogate pair.
    435      count++;
    436    }
    437  }
    438  MOZ_ASSERT(ptr == end, "bad code unit count in line?");
    439 
    440  return count;
    441 }