tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

LanguageTag.cpp (8592B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "builtin/intl/LanguageTag.h"
      8 
      9 #include "mozilla/intl/Locale.h"
     10 #include "mozilla/Span.h"
     11 
     12 #include "builtin/intl/CommonFunctions.h"
     13 #include "builtin/intl/FormatBuffer.h"
     14 #include "builtin/intl/StringAsciiChars.h"
     15 #include "gc/Tracer.h"
     16 #include "vm/JSAtomState.h"
     17 #include "vm/JSContext.h"
     18 
     19 #include "vm/JSObject-inl.h"
     20 #include "vm/ObjectOperations-inl.h"
     21 
     22 bool js::intl::ParseLocale(JSContext* cx, Handle<JSLinearString*> str,
     23                           mozilla::intl::Locale& result) {
     24  if (StringIsAscii(str)) {
     25    intl::StringAsciiChars chars(str);
     26    if (!chars.init(cx)) {
     27      return false;
     28    }
     29 
     30    if (mozilla::intl::LocaleParser::TryParse(chars, result).isOk()) {
     31      return true;
     32    }
     33  }
     34 
     35  if (UniqueChars localeChars = QuoteString(cx, str, '"')) {
     36    JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
     37                              JSMSG_INVALID_LANGUAGE_TAG, localeChars.get());
     38  }
     39  return false;
     40 }
     41 
     42 bool js::intl::ParseStandaloneLanguageTag(
     43    Handle<JSLinearString*> str, mozilla::intl::LanguageSubtag& result) {
     44  // Tell the analysis the |IsStructurallyValidLanguageTag| function can't GC.
     45  JS::AutoSuppressGCAnalysis nogc;
     46 
     47  if (str->hasLatin1Chars()) {
     48    if (!mozilla::intl::IsStructurallyValidLanguageTag<Latin1Char>(
     49            str->latin1Range(nogc))) {
     50      return false;
     51    }
     52    result.Set<Latin1Char>(str->latin1Range(nogc));
     53  } else {
     54    if (!mozilla::intl::IsStructurallyValidLanguageTag<char16_t>(
     55            str->twoByteRange(nogc))) {
     56      return false;
     57    }
     58    result.Set<char16_t>(str->twoByteRange(nogc));
     59  }
     60  return true;
     61 }
     62 
     63 bool js::intl::ParseStandaloneScriptTag(Handle<JSLinearString*> str,
     64                                        mozilla::intl::ScriptSubtag& result) {
     65  // Tell the analysis the |IsStructurallyValidScriptTag| function can't GC.
     66  JS::AutoSuppressGCAnalysis nogc;
     67 
     68  if (str->hasLatin1Chars()) {
     69    if (!mozilla::intl::IsStructurallyValidScriptTag<Latin1Char>(
     70            str->latin1Range(nogc))) {
     71      return false;
     72    }
     73    result.Set<Latin1Char>(str->latin1Range(nogc));
     74  } else {
     75    if (!mozilla::intl::IsStructurallyValidScriptTag<char16_t>(
     76            str->twoByteRange(nogc))) {
     77      return false;
     78    }
     79    result.Set<char16_t>(str->twoByteRange(nogc));
     80  }
     81  return true;
     82 }
     83 
     84 bool js::intl::ParseStandaloneRegionTag(Handle<JSLinearString*> str,
     85                                        mozilla::intl::RegionSubtag& result) {
     86  // Tell the analysis the |IsStructurallyValidRegionTag| function can't GC.
     87  JS::AutoSuppressGCAnalysis nogc;
     88 
     89  if (str->hasLatin1Chars()) {
     90    if (!mozilla::intl::IsStructurallyValidRegionTag<Latin1Char>(
     91            str->latin1Range(nogc))) {
     92      return false;
     93    }
     94    result.Set<Latin1Char>(str->latin1Range(nogc));
     95  } else {
     96    if (!mozilla::intl::IsStructurallyValidRegionTag<char16_t>(
     97            str->twoByteRange(nogc))) {
     98      return false;
     99    }
    100    result.Set<char16_t>(str->twoByteRange(nogc));
    101  }
    102  return true;
    103 }
    104 
    105 template <typename CharT>
    106 static bool ParseStandaloneVariantTag(
    107    mozilla::Span<const CharT> variantSubtags,
    108    mozilla::intl::Locale::VariantsVector& result, bool* success) {
    109  auto isValidVariantSubtag = [&](auto span) {
    110    // Tell the analysis the |IsStructurallyValidVariantTag| function can't GC.
    111    JS::AutoSuppressGCAnalysis nogc;
    112    return mozilla::intl::IsStructurallyValidVariantTag(span);
    113  };
    114 
    115  size_t start = 0;
    116  for (size_t index = 0; index < variantSubtags.size(); index++) {
    117    if (variantSubtags[index] == '-') {
    118      auto span = variantSubtags.FromTo(start, index);
    119      if (!isValidVariantSubtag(span)) {
    120        *success = false;
    121        return true;
    122      }
    123 
    124      if (!result.emplaceBack(span)) {
    125        return false;
    126      }
    127 
    128      start = index + 1;
    129    }
    130  }
    131 
    132  // Trailing variant subtag.
    133  auto span = variantSubtags.From(start);
    134  if (!isValidVariantSubtag(span)) {
    135    *success = false;
    136    return true;
    137  }
    138 
    139  if (!result.emplaceBack(span)) {
    140    return false;
    141  }
    142 
    143  *success = true;
    144  return true;
    145 }
    146 
    147 bool js::intl::ParseStandaloneVariantTag(
    148    Handle<JSLinearString*> str, mozilla::intl::Locale::VariantsVector& result,
    149    bool* success) {
    150  JS::AutoCheckCannotGC nogc;
    151  return str->hasLatin1Chars()
    152             ? ::ParseStandaloneVariantTag(
    153                   mozilla::Span{str->latin1Range(nogc)}, result, success)
    154             : ::ParseStandaloneVariantTag(
    155                   mozilla::Span{str->twoByteRange(nogc)}, result, success);
    156 }
    157 
    158 template <typename CharT>
    159 static bool IsAsciiLowercaseAlpha(mozilla::Span<const CharT> span) {
    160  // Tell the analysis the |std::all_of| function can't GC.
    161  JS::AutoSuppressGCAnalysis nogc;
    162 
    163  const CharT* ptr = span.data();
    164  size_t length = span.size();
    165  return std::all_of(ptr, ptr + length, mozilla::IsAsciiLowercaseAlpha<CharT>);
    166 }
    167 
    168 static bool IsAsciiLowercaseAlpha(const JSLinearString* str) {
    169  JS::AutoCheckCannotGC nogc;
    170  if (str->hasLatin1Chars()) {
    171    return IsAsciiLowercaseAlpha<JS::Latin1Char>(str->latin1Range(nogc));
    172  }
    173  return IsAsciiLowercaseAlpha<char16_t>(str->twoByteRange(nogc));
    174 }
    175 
    176 template <typename CharT>
    177 static bool IsAsciiAlpha(mozilla::Span<const CharT> span) {
    178  // Tell the analysis the |std::all_of| function can't GC.
    179  JS::AutoSuppressGCAnalysis nogc;
    180 
    181  const CharT* ptr = span.data();
    182  size_t length = span.size();
    183  return std::all_of(ptr, ptr + length, mozilla::IsAsciiAlpha<CharT>);
    184 }
    185 
    186 static bool IsAsciiAlpha(const JSLinearString* str) {
    187  JS::AutoCheckCannotGC nogc;
    188  if (str->hasLatin1Chars()) {
    189    return IsAsciiAlpha<JS::Latin1Char>(str->latin1Range(nogc));
    190  }
    191  return IsAsciiAlpha<char16_t>(str->twoByteRange(nogc));
    192 }
    193 
    194 JS::Result<JSLinearString*> js::intl::ParseStandaloneISO639LanguageTag(
    195    JSContext* cx, Handle<JSLinearString*> str) {
    196  // ISO-639 language codes contain either two or three characters.
    197  size_t length = str->length();
    198  if (length != 2 && length != 3) {
    199    return nullptr;
    200  }
    201 
    202  // We can directly the return the input below if it's in the correct case.
    203  bool isLowerCase = IsAsciiLowercaseAlpha(str);
    204  if (!isLowerCase) {
    205    // Must be an ASCII alpha string.
    206    if (!IsAsciiAlpha(str)) {
    207      return nullptr;
    208    }
    209  }
    210 
    211  mozilla::intl::LanguageSubtag languageTag;
    212  if (str->hasLatin1Chars()) {
    213    JS::AutoCheckCannotGC nogc;
    214    languageTag.Set<Latin1Char>(str->latin1Range(nogc));
    215  } else {
    216    JS::AutoCheckCannotGC nogc;
    217    languageTag.Set<char16_t>(str->twoByteRange(nogc));
    218  }
    219 
    220  if (!isLowerCase) {
    221    // The language subtag is canonicalized to lower case.
    222    languageTag.ToLowerCase();
    223  }
    224 
    225  // Reject the input if the canonical tag contains more than just a single
    226  // language subtag.
    227  if (mozilla::intl::Locale::ComplexLanguageMapping(languageTag)) {
    228    return nullptr;
    229  }
    230 
    231  // Take care to replace deprecated subtags with their preferred values.
    232  JSLinearString* result;
    233  if (mozilla::intl::Locale::LanguageMapping(languageTag) || !isLowerCase) {
    234    result = NewStringCopy<CanGC>(cx, languageTag.Span());
    235  } else {
    236    result = str;
    237  }
    238  if (!result) {
    239    return cx->alreadyReportedOOM();
    240  }
    241  return result;
    242 }
    243 
    244 JS::UniqueChars js::intl::FormatLocale(
    245    JSContext* cx, JS::Handle<JSObject*> internals,
    246    JS::HandleVector<UnicodeExtensionKeyword> keywords) {
    247  RootedValue value(cx);
    248  if (!GetProperty(cx, internals, internals, cx->names().locale, &value)) {
    249    return nullptr;
    250  }
    251 
    252  mozilla::intl::Locale tag;
    253  {
    254    Rooted<JSLinearString*> locale(cx, value.toString()->ensureLinear(cx));
    255    if (!locale) {
    256      return nullptr;
    257    }
    258 
    259    if (!ParseLocale(cx, locale, tag)) {
    260      return nullptr;
    261    }
    262  }
    263 
    264  // |ApplyUnicodeExtensionToTag| applies the new keywords to the front of
    265  // the Unicode extension subtag. We're then relying on ICU to follow RFC
    266  // 6067, which states that any trailing keywords using the same key
    267  // should be ignored.
    268  if (!ApplyUnicodeExtensionToTag(cx, tag, keywords)) {
    269    return nullptr;
    270  }
    271 
    272  FormatBuffer<char> buffer(cx);
    273  if (auto result = tag.ToString(buffer); result.isErr()) {
    274    ReportInternalError(cx, result.unwrapErr());
    275    return nullptr;
    276  }
    277  return buffer.extractStringZ();
    278 }
    279 
    280 void js::intl::UnicodeExtensionKeyword::trace(JSTracer* trc) {
    281  TraceRoot(trc, &type_, "UnicodeExtensionKeyword::type");
    282 }