tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

nsIDNService.cpp (25893B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 #include "MainThreadUtils.h"
      7 #include "mozilla/ClearOnShutdown.h"
      8 #include "mozilla/Preferences.h"
      9 #include "nsIDNService.h"
     10 #include "nsReadableUtils.h"
     11 #include "nsCRT.h"
     12 #include "nsServiceManagerUtils.h"
     13 #include "nsString.h"
     14 #include "nsStringFwd.h"
     15 #include "nsUnicharUtils.h"
     16 #include "nsUnicodeProperties.h"
     17 #include "harfbuzz/hb.h"
     18 #include "mozilla/Casting.h"
     19 #include "mozilla/StaticPrefs_network.h"
     20 #include "mozilla/intl/UnicodeProperties.h"
     21 #include "mozilla/intl/UnicodeScriptCodes.h"
     22 #include "nsNetUtil.h"
     23 #include "nsStandardURL.h"
     24 
     25 using namespace mozilla;
     26 using namespace mozilla::intl;
     27 using namespace mozilla::unicode;
     28 using namespace mozilla::net;
     29 using mozilla::Preferences;
     30 
     31 //-----------------------------------------------------------------------------
     32 
     33 #define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
     34 
     35 template <int N>
     36 static inline bool TLDEqualsLiteral(mozilla::Span<const char32_t> aTLD,
     37                                    const char (&aStr)[N]) {
     38  if (aTLD.Length() != N - 1) {
     39    return false;
     40  }
     41  const char* a = aStr;
     42  for (const char32_t c : aTLD) {
     43    if (c != char32_t(*a)) {
     44      return false;
     45    }
     46    ++a;
     47  }
     48  return true;
     49 }
     50 
     51 template <int N>
     52 static inline bool TLDStartsWith(mozilla::Span<const char32_t> aTLD,
     53                                 const char (&aStr)[N]) {
     54  // Ensure the span is long enough to contain the prefix
     55  if (aTLD.Length() < N - 1) {
     56    return false;
     57  }
     58 
     59  for (size_t i = 0; i < N - 1; ++i) {
     60    if (aTLD[i] != char32_t(aStr[i])) {
     61      return false;
     62    }
     63  }
     64 
     65  return true;
     66 }
     67 
     68 static inline bool isOnlySafeChars(mozilla::Span<const char32_t> aLabel,
     69                                   const nsTArray<BlocklistRange>& aBlocklist) {
     70  if (aBlocklist.IsEmpty()) {
     71    return true;
     72  }
     73  for (const char32_t c : aLabel) {
     74    if (c > 0xFFFF) {
     75      // The blocklist only support BMP!
     76      continue;
     77    }
     78    if (CharInBlocklist(char16_t(c), aBlocklist)) {
     79      return false;
     80    }
     81  }
     82  return true;
     83 }
     84 
     85 static bool isCyrillicDomain(mozilla::Span<const char32_t>& aTLD) {
     86  return TLDEqualsLiteral(aTLD, "bg") || TLDEqualsLiteral(aTLD, "by") ||
     87         TLDEqualsLiteral(aTLD, "kz") || TLDEqualsLiteral(aTLD, "pyc") ||
     88         TLDEqualsLiteral(aTLD, "ru") || TLDEqualsLiteral(aTLD, "su") ||
     89         TLDEqualsLiteral(aTLD, "ua") || TLDEqualsLiteral(aTLD, "uz");
     90 }
     91 
     92 //-----------------------------------------------------------------------------
     93 // nsIDNService
     94 //-----------------------------------------------------------------------------
     95 
     96 /* Implementation file */
     97 NS_IMPL_ISUPPORTS(nsIDNService, nsIIDNService)
     98 
     99 nsresult nsIDNService::Init() {
    100  MOZ_ASSERT(NS_IsMainThread());
    101  InitializeBlocklist(mIDNBlocklist);
    102 
    103  InitCJKSlashConfusables();
    104  InitCJKIdeographs();
    105  InitDigitConfusables();
    106  InitCyrillicLatinConfusables();
    107  InitThaiLatinConfusables();
    108  return NS_OK;
    109 }
    110 
    111 void nsIDNService::InitCJKSlashConfusables() {
    112  mCJKSlashConfusables.Insert(0x30CE);  // ノ
    113  mCJKSlashConfusables.Insert(0x30BD);  // ソ
    114  mCJKSlashConfusables.Insert(0x30BE);  // ゾ
    115  mCJKSlashConfusables.Insert(0x30F3);  // ン
    116  mCJKSlashConfusables.Insert(0x4E36);  // 丶
    117  mCJKSlashConfusables.Insert(0x4E40);  // 乀
    118  mCJKSlashConfusables.Insert(0x4E41);  // 乁
    119  mCJKSlashConfusables.Insert(0x4E3F);  // 丿
    120 }
    121 
    122 void nsIDNService::InitCJKIdeographs() {
    123  mCJKIdeographs.Insert(0x4E00);  // 一
    124  mCJKIdeographs.Insert(0x3127);  // ㄧ
    125  mCJKIdeographs.Insert(0x4E28);  // 丨
    126  mCJKIdeographs.Insert(0x4E5B);  // 乛
    127  mCJKIdeographs.Insert(0x4E03);  // 七
    128  mCJKIdeographs.Insert(0x4E05);  // 丅
    129  mCJKIdeographs.Insert(0x5341);  // 十
    130  mCJKIdeographs.Insert(0x3007);  // 〇
    131  mCJKIdeographs.Insert(0x3112);  // ㄒ
    132  mCJKIdeographs.Insert(0x311A);  // ㄚ
    133  mCJKIdeographs.Insert(0x311F);  // ㄟ
    134  mCJKIdeographs.Insert(0x3128);  // ㄨ
    135  mCJKIdeographs.Insert(0x3129);  // ㄩ
    136  mCJKIdeographs.Insert(0x3108);  // ㄈ
    137  mCJKIdeographs.Insert(0x31BA);  // ㆺ
    138  mCJKIdeographs.Insert(0x31B3);  // ㆳ
    139  mCJKIdeographs.Insert(0x5DE5);  // 工
    140  mCJKIdeographs.Insert(0x31B2);  // ㆲ
    141  mCJKIdeographs.Insert(0x8BA0);  // 讠
    142  mCJKIdeographs.Insert(0x4E01);  // 丁
    143 }
    144 
    145 void nsIDNService::InitDigitConfusables() {
    146  mDigitConfusables.Insert(0x03B8);  // θ
    147  mDigitConfusables.Insert(0x0968);  // २
    148  mDigitConfusables.Insert(0x09E8);  // ২
    149  mDigitConfusables.Insert(0x0A68);  // ੨
    150  mDigitConfusables.Insert(0x0AE8);  // ૨
    151  mDigitConfusables.Insert(0x0CE9);  // ೩
    152  mDigitConfusables.Insert(0x0577);  // շ
    153  mDigitConfusables.Insert(0x0437);  // з
    154  mDigitConfusables.Insert(0x0499);  // ҙ
    155  mDigitConfusables.Insert(0x04E1);  // ӡ
    156  mDigitConfusables.Insert(0x0909);  // उ
    157  mDigitConfusables.Insert(0x0993);  // ও
    158  mDigitConfusables.Insert(0x0A24);  // ਤ
    159  mDigitConfusables.Insert(0x0A69);  // ੩
    160  mDigitConfusables.Insert(0x0AE9);  // ૩
    161  mDigitConfusables.Insert(0x0C69);  // ౩
    162  mDigitConfusables.Insert(0x1012);  // ဒ
    163  mDigitConfusables.Insert(0x10D5);  // ვ
    164  mDigitConfusables.Insert(0x10DE);  // პ
    165  mDigitConfusables.Insert(0x0A5C);  // ੜ
    166  mDigitConfusables.Insert(0x10D9);  // კ
    167  mDigitConfusables.Insert(0x0A6B);  // ੫
    168  mDigitConfusables.Insert(0x4E29);  // 丩
    169  mDigitConfusables.Insert(0x3110);  // ㄐ
    170  mDigitConfusables.Insert(0x0573);  // ճ
    171  mDigitConfusables.Insert(0x09EA);  // ৪
    172  mDigitConfusables.Insert(0x0A6A);  // ੪
    173  mDigitConfusables.Insert(0x0B6B);  // ୫
    174  mDigitConfusables.Insert(0x0AED);  // ૭
    175  mDigitConfusables.Insert(0x0B68);  // ୨
    176  mDigitConfusables.Insert(0x0C68);  // ౨
    177 }
    178 
    179 void nsIDNService::InitCyrillicLatinConfusables() {
    180  mCyrillicLatinConfusables.Insert(0x0430);  // а CYRILLIC SMALL LETTER A
    181  mCyrillicLatinConfusables.Insert(0x044B);  // ы CYRILLIC SMALL LETTER YERU
    182  mCyrillicLatinConfusables.Insert(0x0441);  // с CYRILLIC SMALL LETTER ES
    183  mCyrillicLatinConfusables.Insert(0x0501);  // ԁ CYRILLIC SMALL LETTER KOMI DE
    184  mCyrillicLatinConfusables.Insert(0x0435);  // е CYRILLIC SMALL LETTER IE
    185  mCyrillicLatinConfusables.Insert(0x050D);  // ԍ CYRILLIC SMALL LETTER KOMI SJE
    186  mCyrillicLatinConfusables.Insert(0x04BB);  // һ CYRILLIC SMALL LETTER SHHA
    187  mCyrillicLatinConfusables.Insert(
    188      0x0456);  // і CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I {Old
    189                // Cyrillic i}
    190  mCyrillicLatinConfusables.Insert(0x044E);  // ю CYRILLIC SMALL LETTER YU
    191  mCyrillicLatinConfusables.Insert(0x043A);  // к CYRILLIC SMALL LETTER KA
    192  mCyrillicLatinConfusables.Insert(0x0458);  // ј CYRILLIC SMALL LETTER JE
    193  mCyrillicLatinConfusables.Insert(0x04CF);  // ӏ CYRILLIC SMALL LETTER PALOCHKA
    194  mCyrillicLatinConfusables.Insert(0x043C);  // м CYRILLIC SMALL LETTER EM
    195  mCyrillicLatinConfusables.Insert(0x043E);  // о CYRILLIC SMALL LETTER O
    196  mCyrillicLatinConfusables.Insert(0x0440);  // р CYRILLIC SMALL LETTER ER
    197  mCyrillicLatinConfusables.Insert(
    198      0x0517);  // ԗ CYRILLIC SMALL LETTER RHA {voiceless r}
    199  mCyrillicLatinConfusables.Insert(0x051B);  // ԛ CYRILLIC SMALL LETTER QA
    200  mCyrillicLatinConfusables.Insert(0x0455);  // ѕ CYRILLIC SMALL LETTER DZE
    201  mCyrillicLatinConfusables.Insert(0x051D);  // ԝ CYRILLIC SMALL LETTER WE
    202  mCyrillicLatinConfusables.Insert(0x0445);  // х CYRILLIC SMALL LETTER HA
    203  mCyrillicLatinConfusables.Insert(0x0443);  // у CYRILLIC SMALL LETTER U
    204  mCyrillicLatinConfusables.Insert(
    205      0x044A);  // ъ CYRILLIC SMALL LETTER HARD SIGN
    206  mCyrillicLatinConfusables.Insert(
    207      0x044C);  // ь CYRILLIC SMALL LETTER SOFT SIGN
    208  mCyrillicLatinConfusables.Insert(
    209      0x04BD);  // ҽ CYRILLIC SMALL LETTER ABKHASIAN CHE
    210  mCyrillicLatinConfusables.Insert(0x043F);  // п CYRILLIC SMALL LETTER PE
    211  mCyrillicLatinConfusables.Insert(0x0433);  // г CYRILLIC SMALL LETTER GHE
    212  mCyrillicLatinConfusables.Insert(0x0475);  // ѵ CYRILLIC SMALL LETTER IZHITSA
    213  mCyrillicLatinConfusables.Insert(0x0461);  // ѡ CYRILLIC SMALL LETTER OMEGA
    214 }
    215 
    216 void nsIDNService::InitThaiLatinConfusables() {
    217  // Some of the Thai characters are only confusable on Linux.
    218 #if defined(XP_LINUX) && !defined(ANDROID)
    219  mThaiLatinConfusables.Insert(0x0E14);  // ด
    220  mThaiLatinConfusables.Insert(0x0E17);  // ท
    221  mThaiLatinConfusables.Insert(0x0E19);  // น
    222  mThaiLatinConfusables.Insert(0x0E1B);  // ป
    223  mThaiLatinConfusables.Insert(0x0E21);  // ม
    224  mThaiLatinConfusables.Insert(0x0E25);  // ล
    225  mThaiLatinConfusables.Insert(0x0E2B);  // ห
    226 #endif
    227 
    228  mThaiLatinConfusables.Insert(0x0E1A);  // บ
    229  mThaiLatinConfusables.Insert(0x0E1E);  // พ
    230  mThaiLatinConfusables.Insert(0x0E1F);  // ฟ
    231  mThaiLatinConfusables.Insert(0x0E23);  // ร
    232  mThaiLatinConfusables.Insert(0x0E40);  // เ
    233  mThaiLatinConfusables.Insert(0x0E41);  // แ
    234  mThaiLatinConfusables.Insert(0x0E50);  // ๐
    235 }
    236 
    237 nsIDNService::nsIDNService() { MOZ_ASSERT(NS_IsMainThread()); }
    238 
    239 nsIDNService::~nsIDNService() = default;
    240 
    241 NS_IMETHODIMP nsIDNService::DomainToASCII(const nsACString& input,
    242                                          nsACString& ace) {
    243  return NS_DomainToASCII(input, ace);
    244 }
    245 
    246 NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString& input,
    247                                             nsACString& ace) {
    248  return NS_DomainToASCIIAllowAnyGlyphfulASCII(input, ace);
    249 }
    250 
    251 NS_IMETHODIMP nsIDNService::ConvertACEtoUTF8(const nsACString& input,
    252                                             nsACString& _retval) {
    253  return NS_DomainToUnicodeAllowAnyGlyphfulASCII(input, _retval);
    254 }
    255 
    256 NS_IMETHODIMP nsIDNService::DomainToDisplay(const nsACString& input,
    257                                            nsACString& _retval) {
    258  nsresult rv = NS_DomainToDisplay(input, _retval);
    259  return rv;
    260 }
    261 
    262 NS_IMETHODIMP nsIDNService::ConvertToDisplayIDN(const nsACString& input,
    263                                                nsACString& _retval) {
    264  nsresult rv = NS_DomainToDisplayAllowAnyGlyphfulASCII(input, _retval);
    265  return rv;
    266 }
    267 
    268 //-----------------------------------------------------------------------------
    269 
    270 namespace mozilla::net {
    271 
    272 enum ScriptCombo : int32_t {
    273  UNSET = -1,
    274  BOPO = 0,
    275  CYRL = 1,
    276  GREK = 2,
    277  HANG = 3,
    278  HANI = 4,
    279  HIRA = 5,
    280  KATA = 6,
    281  LATN = 7,
    282  OTHR = 8,
    283  JPAN = 9,   // Latin + Han + Hiragana + Katakana
    284  CHNA = 10,  // Latin + Han + Bopomofo
    285  KORE = 11,  // Latin + Han + Hangul
    286  HNLT = 12,  // Latin + Han (could be any of the above combinations)
    287  FAIL = 13,
    288 };
    289 
    290 // Ignore - set if the label contains a character that makes it
    291 // obvious it's not a lookalike.
    292 // Safe - set if the label contains no lookalike characters.
    293 // Block - set if the label contains lookalike characters.
    294 enum class LookalikeStatus { Ignore, Safe, Block };
    295 
    296 class MOZ_STACK_CLASS LookalikeStatusChecker {
    297 public:
    298  // Constructor for Script Confusable Checkers (Cyrillic, Thai, etc)
    299  LookalikeStatusChecker(nsTHashSet<char32_t>& aConfusables,
    300                         mozilla::Span<const char32_t>& aTLD, Script aTLDScript,
    301                         bool aValidTLD)
    302      : mConfusables(aConfusables),
    303        mStatus(aValidTLD ? LookalikeStatus::Ignore : LookalikeStatus::Safe),
    304        mTLDMatchesScript(doesTLDScriptMatch(aTLD, aTLDScript)),
    305        mTLDScript(aTLDScript) {}
    306 
    307  // Constructor that DigitLookalikeStatusChecker inherits
    308  explicit LookalikeStatusChecker(nsTHashSet<char32_t>& aConfusables)
    309      : mConfusables(aConfusables), mStatus(LookalikeStatus::Safe) {}
    310 
    311  // For the Script Confusable Checkers
    312  virtual void CheckCharacter(char32_t aChar, Script aScript) {
    313    if (mStatus != LookalikeStatus::Ignore && !mTLDMatchesScript &&
    314        aScript == mTLDScript) {
    315      mStatus = mConfusables.Contains(aChar) ? LookalikeStatus::Block
    316                                             : LookalikeStatus::Ignore;
    317    }
    318  }
    319 
    320  virtual LookalikeStatus Status() { return mStatus; }
    321 
    322 protected:
    323  // A hash set containing confusable characters
    324  nsTHashSet<char32_t>& mConfusables;
    325 
    326  // The current lookalike status
    327  LookalikeStatus mStatus;
    328 
    329  bool doesTLDScriptMatch(mozilla::Span<const char32_t>& aTLD, Script aScript) {
    330    mozilla::Span<const char32_t>::const_iterator current = aTLD.cbegin();
    331    mozilla::Span<const char32_t>::const_iterator end = aTLD.cend();
    332 
    333    while (current != end) {
    334      char32_t ch = *current++;
    335      if (UnicodeProperties::GetScriptCode(ch) == aScript) {
    336        return true;
    337      }
    338    }
    339 
    340    return false;
    341  }
    342 
    343 private:
    344  // Indicates whether the TLD matches the given script
    345  bool mTLDMatchesScript{false};
    346 
    347  // The script associated with the TLD to be matched
    348  Script mTLDScript{Script::INVALID};
    349 };
    350 
    351 // Overrides the CheckCharacter method to validate digits
    352 class DigitLookalikeStatusChecker : public LookalikeStatusChecker {
    353 public:
    354  explicit DigitLookalikeStatusChecker(nsTHashSet<char32_t>& aConfusables)
    355      : LookalikeStatusChecker(aConfusables) {}
    356 
    357  // Note: aScript is not used in this override.
    358  void CheckCharacter(char32_t aChar, Script aScript) override {
    359    if (mStatus == LookalikeStatus::Ignore) {
    360      return;
    361    }
    362 
    363    // If the character is not a numeric digit, check whether it is confusable
    364    // or not.
    365    if (!ISDIGIT(aChar)) {
    366      mStatus = mConfusables.Contains(aChar) ? LookalikeStatus::Block
    367                                             : LookalikeStatus::Ignore;
    368    }
    369  }
    370 };
    371 
    372 }  // namespace mozilla::net
    373 
    374 bool nsIDNService::IsLabelSafe(mozilla::Span<const char32_t> aLabel,
    375                               mozilla::Span<const char32_t> aTLD) {
    376  if (StaticPrefs::network_IDN_show_punycode()) {
    377    return false;
    378  }
    379 
    380  if (!isOnlySafeChars(aLabel, mIDNBlocklist)) {
    381    return false;
    382  }
    383 
    384  // Bug 1917119 - Avoid bypassing the doesTLDScriptMatch check
    385  // aTLD should be a decoded label, but in the case of invalid labels such as
    386  // `xn--xn--d--fg4n` we might end up with something that starts with `xn--`.
    387  // Treat those as unsafe just in case.
    388  if (TLDStartsWith(aTLD, "xn--")) {
    389    return false;
    390  }
    391 
    392  mozilla::Span<const char32_t>::const_iterator current = aLabel.cbegin();
    393  mozilla::Span<const char32_t>::const_iterator end = aLabel.cend();
    394 
    395  Script lastScript = Script::INVALID;
    396  char32_t previousChar = 0;
    397  char32_t baseChar = 0;  // last non-diacritic seen (base char for marks)
    398  char32_t savedNumberingSystem = 0;
    399 
    400  // Ignore digit confusables if there is a non-digit and non-digit confusable
    401  // character. If aLabel only consists of digits and digit confusables or
    402  // digit confusables, return false.
    403  DigitLookalikeStatusChecker digitStatusChecker(mDigitConfusables);
    404  // Check if all the cyrillic letters in the label are confusables
    405  LookalikeStatusChecker cyrillicStatusChecker(mCyrillicLatinConfusables, aTLD,
    406                                               Script::CYRILLIC,
    407                                               isCyrillicDomain(aTLD));
    408  // Check if all the Thai letters in the label are confusables
    409  LookalikeStatusChecker thaiStatusChecker(
    410      mThaiLatinConfusables, aTLD, Script::THAI, TLDEqualsLiteral(aTLD, "th"));
    411 
    412 // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
    413 #if 0
    414  HanVariantType savedHanVariant = HVT_NotHan;
    415 #endif
    416 
    417  ScriptCombo savedScript = ScriptCombo::UNSET;
    418 
    419  while (current != end) {
    420    char32_t ch = *current++;
    421 
    422    IdentifierType idType = GetIdentifierType(ch);
    423    if (idType == IDTYPE_RESTRICTED) {
    424      return false;
    425    }
    426    MOZ_ASSERT(idType == IDTYPE_ALLOWED);
    427 
    428    // Check for mixed script
    429    Script script = UnicodeProperties::GetScriptCode(ch);
    430    if (script != Script::COMMON && script != Script::INHERITED &&
    431        script != lastScript) {
    432      if (illegalScriptCombo(script, savedScript)) {
    433        return false;
    434      }
    435    }
    436 
    437 #ifdef XP_MACOSX
    438    // U+0620, U+0f8c, U+0f8d, U+0f8e, U+0f8f and are blocked due to a font
    439    // issue on macOS
    440    if (ch == 0x620 || ch == 0xf8c || ch == 0xf8d || ch == 0xf8e ||
    441        ch == 0xf8f) {
    442      return false;
    443    }
    444 #endif
    445 
    446    // U+30FC should be preceded by a Hiragana/Katakana.
    447    if (ch == 0x30fc && lastScript != Script::HIRAGANA &&
    448        lastScript != Script::KATAKANA) {
    449      return false;
    450    }
    451 
    452    Script nextScript = Script::INVALID;
    453    if (current != end) {
    454      nextScript = UnicodeProperties::GetScriptCode(*current);
    455    }
    456 
    457    // U+3078 to U+307A (へ, べ, ぺ) in Hiragana mixed with Katakana should be
    458    // unsafe
    459    if (ch >= 0x3078 && ch <= 0x307A &&
    460        (lastScript == Script::KATAKANA || nextScript == Script::KATAKANA)) {
    461      return false;
    462    }
    463    // U+30D8 to U+30DA (ヘ, ベ, ペ) in Katakana mixed with Hiragana should be
    464    // unsafe
    465    if (ch >= 0x30D8 && ch <= 0x30DA &&
    466        (lastScript == Script::HIRAGANA || nextScript == Script::HIRAGANA)) {
    467      return false;
    468    }
    469    // U+30FD and U+30FE are allowed only after Katakana
    470    if ((ch == 0x30FD || ch == 0x30FE) && lastScript != Script::KATAKANA) {
    471      return false;
    472    }
    473 
    474    // Slash confusables not enclosed by {Han,Hiragana,Katakana} should be
    475    // unsafe but by itself should be allowed.
    476    if (isCJKSlashConfusable(ch) && aLabel.Length() > 1 &&
    477        lastScript != Script::HAN && lastScript != Script::HIRAGANA &&
    478        lastScript != Script::KATAKANA && nextScript != Script::HAN &&
    479        nextScript != Script::HIRAGANA && nextScript != Script::KATAKANA) {
    480      return false;
    481    }
    482 
    483    if (ch == 0x30FB &&
    484        (lastScript == Script::LATIN || nextScript == Script::LATIN)) {
    485      return false;
    486    }
    487 
    488    // Combining Diacritic marks (U+0300-U+0339) after a script other than
    489    // Latin-Greek-Cyrillic is unsafe
    490    if (ch >= 0x300 && ch <= 0x339 && lastScript != Script::LATIN &&
    491        lastScript != Script::GREEK && lastScript != Script::CYRILLIC) {
    492      return false;
    493    }
    494 
    495    if (ch == 0x307 &&
    496        (previousChar == 'i' || previousChar == 'j' || previousChar == 'l')) {
    497      return false;
    498    }
    499 
    500    // U+00B7 is only allowed on Catalan domains between two l's.
    501    if (ch == 0xB7 && (!TLDEqualsLiteral(aTLD, "cat") || previousChar != 'l' ||
    502                       current == end || *current != 'l')) {
    503      return false;
    504    }
    505 
    506    // Disallow Icelandic confusables for domains outside Icelandic and Faroese
    507    // ccTLD (.is, .fo)
    508    if ((ch == 0xFE || ch == 0xF0) && !TLDEqualsLiteral(aTLD, "is") &&
    509        !TLDEqualsLiteral(aTLD, "fo")) {
    510      return false;
    511    }
    512 
    513    // Disallow U+0259 for domains outside Azerbaijani ccTLD (.az)
    514    if (ch == 0x259 && !TLDEqualsLiteral(aTLD, "az")) {
    515      return false;
    516    }
    517 
    518    // Block single/double-quote-like characters.
    519    if (ch == 0x2BB || ch == 0x2BC) {
    520      return false;
    521    }
    522 
    523    // Update the status based on whether the current character is a confusable
    524    // or not and determine if it should be blocked or ignored.
    525    // Note: script is not used for digitStatusChecker
    526    digitStatusChecker.CheckCharacter(ch, script);
    527    cyrillicStatusChecker.CheckCharacter(ch, script);
    528    thaiStatusChecker.CheckCharacter(ch, script);
    529 
    530    // Block these CJK ideographs if they are adjacent to non-CJK characters.
    531    // These characters can be used to spoof Latin characters/punctuation marks.
    532    if (isCJKIdeograph(ch)) {
    533      // Check if there is a non-Bopomofo, non-Hiragana, non-Katakana, non-Han,
    534      // and non-Numeric character on the left. previousChar is 0 when ch is the
    535      // first character.
    536      if (lastScript != Script::BOPOMOFO && lastScript != Script::HIRAGANA &&
    537          lastScript != Script::KATAKANA && lastScript != Script::HAN &&
    538          previousChar && !ISDIGIT(previousChar)) {
    539        return false;
    540      }
    541      // Check if there is a non-Bopomofo, non-Hiragana, non-Katakana, non-Han,
    542      // and non-Numeric character on the right.
    543      if (nextScript != Script::BOPOMOFO && nextScript != Script::HIRAGANA &&
    544          nextScript != Script::KATAKANA && nextScript != Script::HAN &&
    545          current != aLabel.end() && !ISDIGIT(*current)) {
    546        return false;
    547      }
    548    }
    549 
    550    // Check for mixed numbering systems
    551    auto genCat = GetGeneralCategory(ch);
    552    if (genCat == HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) {
    553      uint32_t zeroCharacter =
    554          ch - mozilla::intl::UnicodeProperties::GetNumericValue(ch);
    555      if (savedNumberingSystem == 0) {
    556        // If we encounter a decimal number, save the zero character from that
    557        // numbering system.
    558        savedNumberingSystem = zeroCharacter;
    559      } else if (zeroCharacter != savedNumberingSystem) {
    560        return false;
    561      }
    562    }
    563 
    564    if (genCat == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) {
    565      // Check for consecutive non-spacing marks.
    566      if (previousChar != 0 && previousChar == ch) {
    567        return false;
    568      }
    569      // Check for marks whose expected script doesn't match the base script.
    570      if (lastScript != Script::INVALID) {
    571        UnicodeProperties::ScriptExtensionVector scripts;
    572        auto extResult = UnicodeProperties::GetExtensions(ch, scripts);
    573        MOZ_ASSERT(extResult.isOk());
    574        if (extResult.isErr()) {
    575          return false;
    576        }
    577 
    578        int nScripts = AssertedCast<int>(scripts.length());
    579 
    580        // nScripts will always be >= 1, because even for undefined characters
    581        // it will return Script::INVALID.
    582        // If the mark just has script=COMMON or INHERITED, we can't check any
    583        // more carefully, but if it has specific scriptExtension codes, then
    584        // assume those are the only valid scripts to use it with.
    585        if (nScripts > 1 || (Script(scripts[0]) != Script::COMMON &&
    586                             Script(scripts[0]) != Script::INHERITED)) {
    587          while (--nScripts >= 0) {
    588            if (Script(scripts[nScripts]) == lastScript) {
    589              break;
    590            }
    591          }
    592          if (nScripts == -1) {
    593            return false;
    594          }
    595        }
    596      }
    597      // Check for diacritics on dotless-i, which would be indistinguishable
    598      // from normal accented letter i.
    599      if (baseChar == 0x0131 &&
    600          ((ch >= 0x0300 && ch <= 0x0314) || ch == 0x031a)) {
    601        return false;
    602      }
    603    } else {
    604      baseChar = ch;
    605    }
    606 
    607    if (script != Script::COMMON && script != Script::INHERITED) {
    608      lastScript = script;
    609    }
    610 
    611    // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
    612 #if 0
    613 
    614    // Check for both simplified-only and traditional-only Chinese characters
    615    HanVariantType hanVariant = GetHanVariant(ch);
    616    if (hanVariant == HVT_SimplifiedOnly || hanVariant == HVT_TraditionalOnly) {
    617      if (savedHanVariant == HVT_NotHan) {
    618        savedHanVariant = hanVariant;
    619      } else if (hanVariant != savedHanVariant)  {
    620        return false;
    621      }
    622    }
    623 #endif
    624 
    625    previousChar = ch;
    626  }
    627  return digitStatusChecker.Status() != LookalikeStatus::Block &&
    628         (!StaticPrefs::network_idn_punycode_cyrillic_confusables() ||
    629          cyrillicStatusChecker.Status() != LookalikeStatus::Block) &&
    630         thaiStatusChecker.Status() != LookalikeStatus::Block;
    631 }
    632 
    633 // Scripts that we care about in illegalScriptCombo
    634 static inline ScriptCombo findScriptIndex(Script aScript) {
    635  switch (aScript) {
    636    case Script::BOPOMOFO:
    637      return ScriptCombo::BOPO;
    638    case Script::CYRILLIC:
    639      return ScriptCombo::CYRL;
    640    case Script::GREEK:
    641      return ScriptCombo::GREK;
    642    case Script::HANGUL:
    643      return ScriptCombo::HANG;
    644    case Script::HAN:
    645      return ScriptCombo::HANI;
    646    case Script::HIRAGANA:
    647      return ScriptCombo::HIRA;
    648    case Script::KATAKANA:
    649      return ScriptCombo::KATA;
    650    case Script::LATIN:
    651      return ScriptCombo::LATN;
    652    default:
    653      return ScriptCombo::OTHR;
    654  }
    655 }
    656 
    657 static const ScriptCombo scriptComboTable[13][9] = {
    658    /* thisScript: BOPO  CYRL  GREK  HANG  HANI  HIRA  KATA  LATN  OTHR
    659     * savedScript */
    660    /* BOPO */ {BOPO, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL},
    661    /* CYRL */ {FAIL, CYRL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL},
    662    /* GREK */ {FAIL, FAIL, GREK, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL},
    663    /* HANG */ {FAIL, FAIL, FAIL, HANG, KORE, FAIL, FAIL, KORE, FAIL},
    664    /* HANI */ {CHNA, FAIL, FAIL, KORE, HANI, JPAN, JPAN, HNLT, FAIL},
    665    /* HIRA */ {FAIL, FAIL, FAIL, FAIL, JPAN, HIRA, JPAN, JPAN, FAIL},
    666    /* KATA */ {FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, KATA, JPAN, FAIL},
    667    /* LATN */ {CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, LATN, OTHR},
    668    /* OTHR */ {FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, OTHR, FAIL},
    669    /* JPAN */ {FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, JPAN, JPAN, FAIL},
    670    /* CHNA */ {CHNA, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL},
    671    /* KORE */ {FAIL, FAIL, FAIL, KORE, KORE, FAIL, FAIL, KORE, FAIL},
    672    /* HNLT */ {CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, HNLT, FAIL}};
    673 
    674 bool nsIDNService::illegalScriptCombo(Script script, ScriptCombo& savedScript) {
    675  if (savedScript == ScriptCombo::UNSET) {
    676    savedScript = findScriptIndex(script);
    677    return false;
    678  }
    679 
    680  savedScript = scriptComboTable[savedScript][findScriptIndex(script)];
    681 
    682  return savedScript == OTHR || savedScript == FAIL;
    683 }
    684 
    685 extern "C" MOZ_EXPORT bool mozilla_net_is_label_safe(const char32_t* aLabel,
    686                                                     size_t aLabelLen,
    687                                                     const char32_t* aTld,
    688                                                     size_t aTldLen) {
    689  return static_cast<nsIDNService*>(nsStandardURL::GetIDNService())
    690      ->IsLabelSafe(mozilla::Span<const char32_t>(aLabel, aLabelLen),
    691                    mozilla::Span<const char32_t>(aTld, aTldLen));
    692 }
    693 
    694 bool nsIDNService::isCJKSlashConfusable(char32_t aChar) {
    695  return mCJKSlashConfusables.Contains(aChar);
    696 }
    697 
    698 bool nsIDNService::isCJKIdeograph(char32_t aChar) {
    699  return mCJKIdeographs.Contains(aChar);
    700 }