nsIDNService.cpp (25893B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 #include "MainThreadUtils.h" 7 #include "mozilla/ClearOnShutdown.h" 8 #include "mozilla/Preferences.h" 9 #include "nsIDNService.h" 10 #include "nsReadableUtils.h" 11 #include "nsCRT.h" 12 #include "nsServiceManagerUtils.h" 13 #include "nsString.h" 14 #include "nsStringFwd.h" 15 #include "nsUnicharUtils.h" 16 #include "nsUnicodeProperties.h" 17 #include "harfbuzz/hb.h" 18 #include "mozilla/Casting.h" 19 #include "mozilla/StaticPrefs_network.h" 20 #include "mozilla/intl/UnicodeProperties.h" 21 #include "mozilla/intl/UnicodeScriptCodes.h" 22 #include "nsNetUtil.h" 23 #include "nsStandardURL.h" 24 25 using namespace mozilla; 26 using namespace mozilla::intl; 27 using namespace mozilla::unicode; 28 using namespace mozilla::net; 29 using mozilla::Preferences; 30 31 //----------------------------------------------------------------------------- 32 33 #define ISDIGIT(c) ((c) >= '0' && (c) <= '9') 34 35 template <int N> 36 static inline bool TLDEqualsLiteral(mozilla::Span<const char32_t> aTLD, 37 const char (&aStr)[N]) { 38 if (aTLD.Length() != N - 1) { 39 return false; 40 } 41 const char* a = aStr; 42 for (const char32_t c : aTLD) { 43 if (c != char32_t(*a)) { 44 return false; 45 } 46 ++a; 47 } 48 return true; 49 } 50 51 template <int N> 52 static inline bool TLDStartsWith(mozilla::Span<const char32_t> aTLD, 53 const char (&aStr)[N]) { 54 // Ensure the span is long enough to contain the prefix 55 if (aTLD.Length() < N - 1) { 56 return false; 57 } 58 59 for (size_t i = 0; i < N - 1; ++i) { 60 if (aTLD[i] != char32_t(aStr[i])) { 61 return false; 62 } 63 } 64 65 return true; 66 } 67 68 static inline bool isOnlySafeChars(mozilla::Span<const char32_t> aLabel, 69 const nsTArray<BlocklistRange>& aBlocklist) { 70 if (aBlocklist.IsEmpty()) { 71 return true; 72 } 73 for (const char32_t c : aLabel) { 74 if (c > 0xFFFF) { 75 // The blocklist only support BMP! 76 continue; 77 } 78 if (CharInBlocklist(char16_t(c), aBlocklist)) { 79 return false; 80 } 81 } 82 return true; 83 } 84 85 static bool isCyrillicDomain(mozilla::Span<const char32_t>& aTLD) { 86 return TLDEqualsLiteral(aTLD, "bg") || TLDEqualsLiteral(aTLD, "by") || 87 TLDEqualsLiteral(aTLD, "kz") || TLDEqualsLiteral(aTLD, "pyc") || 88 TLDEqualsLiteral(aTLD, "ru") || TLDEqualsLiteral(aTLD, "su") || 89 TLDEqualsLiteral(aTLD, "ua") || TLDEqualsLiteral(aTLD, "uz"); 90 } 91 92 //----------------------------------------------------------------------------- 93 // nsIDNService 94 //----------------------------------------------------------------------------- 95 96 /* Implementation file */ 97 NS_IMPL_ISUPPORTS(nsIDNService, nsIIDNService) 98 99 nsresult nsIDNService::Init() { 100 MOZ_ASSERT(NS_IsMainThread()); 101 InitializeBlocklist(mIDNBlocklist); 102 103 InitCJKSlashConfusables(); 104 InitCJKIdeographs(); 105 InitDigitConfusables(); 106 InitCyrillicLatinConfusables(); 107 InitThaiLatinConfusables(); 108 return NS_OK; 109 } 110 111 void nsIDNService::InitCJKSlashConfusables() { 112 mCJKSlashConfusables.Insert(0x30CE); // ノ 113 mCJKSlashConfusables.Insert(0x30BD); // ソ 114 mCJKSlashConfusables.Insert(0x30BE); // ゾ 115 mCJKSlashConfusables.Insert(0x30F3); // ン 116 mCJKSlashConfusables.Insert(0x4E36); // 丶 117 mCJKSlashConfusables.Insert(0x4E40); // 乀 118 mCJKSlashConfusables.Insert(0x4E41); // 乁 119 mCJKSlashConfusables.Insert(0x4E3F); // 丿 120 } 121 122 void nsIDNService::InitCJKIdeographs() { 123 mCJKIdeographs.Insert(0x4E00); // 一 124 mCJKIdeographs.Insert(0x3127); // ㄧ 125 mCJKIdeographs.Insert(0x4E28); // 丨 126 mCJKIdeographs.Insert(0x4E5B); // 乛 127 mCJKIdeographs.Insert(0x4E03); // 七 128 mCJKIdeographs.Insert(0x4E05); // 丅 129 mCJKIdeographs.Insert(0x5341); // 十 130 mCJKIdeographs.Insert(0x3007); // 〇 131 mCJKIdeographs.Insert(0x3112); // ㄒ 132 mCJKIdeographs.Insert(0x311A); // ㄚ 133 mCJKIdeographs.Insert(0x311F); // ㄟ 134 mCJKIdeographs.Insert(0x3128); // ㄨ 135 mCJKIdeographs.Insert(0x3129); // ㄩ 136 mCJKIdeographs.Insert(0x3108); // ㄈ 137 mCJKIdeographs.Insert(0x31BA); // ㆺ 138 mCJKIdeographs.Insert(0x31B3); // ㆳ 139 mCJKIdeographs.Insert(0x5DE5); // 工 140 mCJKIdeographs.Insert(0x31B2); // ㆲ 141 mCJKIdeographs.Insert(0x8BA0); // 讠 142 mCJKIdeographs.Insert(0x4E01); // 丁 143 } 144 145 void nsIDNService::InitDigitConfusables() { 146 mDigitConfusables.Insert(0x03B8); // θ 147 mDigitConfusables.Insert(0x0968); // २ 148 mDigitConfusables.Insert(0x09E8); // ২ 149 mDigitConfusables.Insert(0x0A68); // ੨ 150 mDigitConfusables.Insert(0x0AE8); // ૨ 151 mDigitConfusables.Insert(0x0CE9); // ೩ 152 mDigitConfusables.Insert(0x0577); // շ 153 mDigitConfusables.Insert(0x0437); // з 154 mDigitConfusables.Insert(0x0499); // ҙ 155 mDigitConfusables.Insert(0x04E1); // ӡ 156 mDigitConfusables.Insert(0x0909); // उ 157 mDigitConfusables.Insert(0x0993); // ও 158 mDigitConfusables.Insert(0x0A24); // ਤ 159 mDigitConfusables.Insert(0x0A69); // ੩ 160 mDigitConfusables.Insert(0x0AE9); // ૩ 161 mDigitConfusables.Insert(0x0C69); // ౩ 162 mDigitConfusables.Insert(0x1012); // ဒ 163 mDigitConfusables.Insert(0x10D5); // ვ 164 mDigitConfusables.Insert(0x10DE); // პ 165 mDigitConfusables.Insert(0x0A5C); // ੜ 166 mDigitConfusables.Insert(0x10D9); // კ 167 mDigitConfusables.Insert(0x0A6B); // ੫ 168 mDigitConfusables.Insert(0x4E29); // 丩 169 mDigitConfusables.Insert(0x3110); // ㄐ 170 mDigitConfusables.Insert(0x0573); // ճ 171 mDigitConfusables.Insert(0x09EA); // ৪ 172 mDigitConfusables.Insert(0x0A6A); // ੪ 173 mDigitConfusables.Insert(0x0B6B); // ୫ 174 mDigitConfusables.Insert(0x0AED); // ૭ 175 mDigitConfusables.Insert(0x0B68); // ୨ 176 mDigitConfusables.Insert(0x0C68); // ౨ 177 } 178 179 void nsIDNService::InitCyrillicLatinConfusables() { 180 mCyrillicLatinConfusables.Insert(0x0430); // а CYRILLIC SMALL LETTER A 181 mCyrillicLatinConfusables.Insert(0x044B); // ы CYRILLIC SMALL LETTER YERU 182 mCyrillicLatinConfusables.Insert(0x0441); // с CYRILLIC SMALL LETTER ES 183 mCyrillicLatinConfusables.Insert(0x0501); // ԁ CYRILLIC SMALL LETTER KOMI DE 184 mCyrillicLatinConfusables.Insert(0x0435); // е CYRILLIC SMALL LETTER IE 185 mCyrillicLatinConfusables.Insert(0x050D); // ԍ CYRILLIC SMALL LETTER KOMI SJE 186 mCyrillicLatinConfusables.Insert(0x04BB); // һ CYRILLIC SMALL LETTER SHHA 187 mCyrillicLatinConfusables.Insert( 188 0x0456); // і CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I {Old 189 // Cyrillic i} 190 mCyrillicLatinConfusables.Insert(0x044E); // ю CYRILLIC SMALL LETTER YU 191 mCyrillicLatinConfusables.Insert(0x043A); // к CYRILLIC SMALL LETTER KA 192 mCyrillicLatinConfusables.Insert(0x0458); // ј CYRILLIC SMALL LETTER JE 193 mCyrillicLatinConfusables.Insert(0x04CF); // ӏ CYRILLIC SMALL LETTER PALOCHKA 194 mCyrillicLatinConfusables.Insert(0x043C); // м CYRILLIC SMALL LETTER EM 195 mCyrillicLatinConfusables.Insert(0x043E); // о CYRILLIC SMALL LETTER O 196 mCyrillicLatinConfusables.Insert(0x0440); // р CYRILLIC SMALL LETTER ER 197 mCyrillicLatinConfusables.Insert( 198 0x0517); // ԗ CYRILLIC SMALL LETTER RHA {voiceless r} 199 mCyrillicLatinConfusables.Insert(0x051B); // ԛ CYRILLIC SMALL LETTER QA 200 mCyrillicLatinConfusables.Insert(0x0455); // ѕ CYRILLIC SMALL LETTER DZE 201 mCyrillicLatinConfusables.Insert(0x051D); // ԝ CYRILLIC SMALL LETTER WE 202 mCyrillicLatinConfusables.Insert(0x0445); // х CYRILLIC SMALL LETTER HA 203 mCyrillicLatinConfusables.Insert(0x0443); // у CYRILLIC SMALL LETTER U 204 mCyrillicLatinConfusables.Insert( 205 0x044A); // ъ CYRILLIC SMALL LETTER HARD SIGN 206 mCyrillicLatinConfusables.Insert( 207 0x044C); // ь CYRILLIC SMALL LETTER SOFT SIGN 208 mCyrillicLatinConfusables.Insert( 209 0x04BD); // ҽ CYRILLIC SMALL LETTER ABKHASIAN CHE 210 mCyrillicLatinConfusables.Insert(0x043F); // п CYRILLIC SMALL LETTER PE 211 mCyrillicLatinConfusables.Insert(0x0433); // г CYRILLIC SMALL LETTER GHE 212 mCyrillicLatinConfusables.Insert(0x0475); // ѵ CYRILLIC SMALL LETTER IZHITSA 213 mCyrillicLatinConfusables.Insert(0x0461); // ѡ CYRILLIC SMALL LETTER OMEGA 214 } 215 216 void nsIDNService::InitThaiLatinConfusables() { 217 // Some of the Thai characters are only confusable on Linux. 218 #if defined(XP_LINUX) && !defined(ANDROID) 219 mThaiLatinConfusables.Insert(0x0E14); // ด 220 mThaiLatinConfusables.Insert(0x0E17); // ท 221 mThaiLatinConfusables.Insert(0x0E19); // น 222 mThaiLatinConfusables.Insert(0x0E1B); // ป 223 mThaiLatinConfusables.Insert(0x0E21); // ม 224 mThaiLatinConfusables.Insert(0x0E25); // ล 225 mThaiLatinConfusables.Insert(0x0E2B); // ห 226 #endif 227 228 mThaiLatinConfusables.Insert(0x0E1A); // บ 229 mThaiLatinConfusables.Insert(0x0E1E); // พ 230 mThaiLatinConfusables.Insert(0x0E1F); // ฟ 231 mThaiLatinConfusables.Insert(0x0E23); // ร 232 mThaiLatinConfusables.Insert(0x0E40); // เ 233 mThaiLatinConfusables.Insert(0x0E41); // แ 234 mThaiLatinConfusables.Insert(0x0E50); // ๐ 235 } 236 237 nsIDNService::nsIDNService() { MOZ_ASSERT(NS_IsMainThread()); } 238 239 nsIDNService::~nsIDNService() = default; 240 241 NS_IMETHODIMP nsIDNService::DomainToASCII(const nsACString& input, 242 nsACString& ace) { 243 return NS_DomainToASCII(input, ace); 244 } 245 246 NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString& input, 247 nsACString& ace) { 248 return NS_DomainToASCIIAllowAnyGlyphfulASCII(input, ace); 249 } 250 251 NS_IMETHODIMP nsIDNService::ConvertACEtoUTF8(const nsACString& input, 252 nsACString& _retval) { 253 return NS_DomainToUnicodeAllowAnyGlyphfulASCII(input, _retval); 254 } 255 256 NS_IMETHODIMP nsIDNService::DomainToDisplay(const nsACString& input, 257 nsACString& _retval) { 258 nsresult rv = NS_DomainToDisplay(input, _retval); 259 return rv; 260 } 261 262 NS_IMETHODIMP nsIDNService::ConvertToDisplayIDN(const nsACString& input, 263 nsACString& _retval) { 264 nsresult rv = NS_DomainToDisplayAllowAnyGlyphfulASCII(input, _retval); 265 return rv; 266 } 267 268 //----------------------------------------------------------------------------- 269 270 namespace mozilla::net { 271 272 enum ScriptCombo : int32_t { 273 UNSET = -1, 274 BOPO = 0, 275 CYRL = 1, 276 GREK = 2, 277 HANG = 3, 278 HANI = 4, 279 HIRA = 5, 280 KATA = 6, 281 LATN = 7, 282 OTHR = 8, 283 JPAN = 9, // Latin + Han + Hiragana + Katakana 284 CHNA = 10, // Latin + Han + Bopomofo 285 KORE = 11, // Latin + Han + Hangul 286 HNLT = 12, // Latin + Han (could be any of the above combinations) 287 FAIL = 13, 288 }; 289 290 // Ignore - set if the label contains a character that makes it 291 // obvious it's not a lookalike. 292 // Safe - set if the label contains no lookalike characters. 293 // Block - set if the label contains lookalike characters. 294 enum class LookalikeStatus { Ignore, Safe, Block }; 295 296 class MOZ_STACK_CLASS LookalikeStatusChecker { 297 public: 298 // Constructor for Script Confusable Checkers (Cyrillic, Thai, etc) 299 LookalikeStatusChecker(nsTHashSet<char32_t>& aConfusables, 300 mozilla::Span<const char32_t>& aTLD, Script aTLDScript, 301 bool aValidTLD) 302 : mConfusables(aConfusables), 303 mStatus(aValidTLD ? LookalikeStatus::Ignore : LookalikeStatus::Safe), 304 mTLDMatchesScript(doesTLDScriptMatch(aTLD, aTLDScript)), 305 mTLDScript(aTLDScript) {} 306 307 // Constructor that DigitLookalikeStatusChecker inherits 308 explicit LookalikeStatusChecker(nsTHashSet<char32_t>& aConfusables) 309 : mConfusables(aConfusables), mStatus(LookalikeStatus::Safe) {} 310 311 // For the Script Confusable Checkers 312 virtual void CheckCharacter(char32_t aChar, Script aScript) { 313 if (mStatus != LookalikeStatus::Ignore && !mTLDMatchesScript && 314 aScript == mTLDScript) { 315 mStatus = mConfusables.Contains(aChar) ? LookalikeStatus::Block 316 : LookalikeStatus::Ignore; 317 } 318 } 319 320 virtual LookalikeStatus Status() { return mStatus; } 321 322 protected: 323 // A hash set containing confusable characters 324 nsTHashSet<char32_t>& mConfusables; 325 326 // The current lookalike status 327 LookalikeStatus mStatus; 328 329 bool doesTLDScriptMatch(mozilla::Span<const char32_t>& aTLD, Script aScript) { 330 mozilla::Span<const char32_t>::const_iterator current = aTLD.cbegin(); 331 mozilla::Span<const char32_t>::const_iterator end = aTLD.cend(); 332 333 while (current != end) { 334 char32_t ch = *current++; 335 if (UnicodeProperties::GetScriptCode(ch) == aScript) { 336 return true; 337 } 338 } 339 340 return false; 341 } 342 343 private: 344 // Indicates whether the TLD matches the given script 345 bool mTLDMatchesScript{false}; 346 347 // The script associated with the TLD to be matched 348 Script mTLDScript{Script::INVALID}; 349 }; 350 351 // Overrides the CheckCharacter method to validate digits 352 class DigitLookalikeStatusChecker : public LookalikeStatusChecker { 353 public: 354 explicit DigitLookalikeStatusChecker(nsTHashSet<char32_t>& aConfusables) 355 : LookalikeStatusChecker(aConfusables) {} 356 357 // Note: aScript is not used in this override. 358 void CheckCharacter(char32_t aChar, Script aScript) override { 359 if (mStatus == LookalikeStatus::Ignore) { 360 return; 361 } 362 363 // If the character is not a numeric digit, check whether it is confusable 364 // or not. 365 if (!ISDIGIT(aChar)) { 366 mStatus = mConfusables.Contains(aChar) ? LookalikeStatus::Block 367 : LookalikeStatus::Ignore; 368 } 369 } 370 }; 371 372 } // namespace mozilla::net 373 374 bool nsIDNService::IsLabelSafe(mozilla::Span<const char32_t> aLabel, 375 mozilla::Span<const char32_t> aTLD) { 376 if (StaticPrefs::network_IDN_show_punycode()) { 377 return false; 378 } 379 380 if (!isOnlySafeChars(aLabel, mIDNBlocklist)) { 381 return false; 382 } 383 384 // Bug 1917119 - Avoid bypassing the doesTLDScriptMatch check 385 // aTLD should be a decoded label, but in the case of invalid labels such as 386 // `xn--xn--d--fg4n` we might end up with something that starts with `xn--`. 387 // Treat those as unsafe just in case. 388 if (TLDStartsWith(aTLD, "xn--")) { 389 return false; 390 } 391 392 mozilla::Span<const char32_t>::const_iterator current = aLabel.cbegin(); 393 mozilla::Span<const char32_t>::const_iterator end = aLabel.cend(); 394 395 Script lastScript = Script::INVALID; 396 char32_t previousChar = 0; 397 char32_t baseChar = 0; // last non-diacritic seen (base char for marks) 398 char32_t savedNumberingSystem = 0; 399 400 // Ignore digit confusables if there is a non-digit and non-digit confusable 401 // character. If aLabel only consists of digits and digit confusables or 402 // digit confusables, return false. 403 DigitLookalikeStatusChecker digitStatusChecker(mDigitConfusables); 404 // Check if all the cyrillic letters in the label are confusables 405 LookalikeStatusChecker cyrillicStatusChecker(mCyrillicLatinConfusables, aTLD, 406 Script::CYRILLIC, 407 isCyrillicDomain(aTLD)); 408 // Check if all the Thai letters in the label are confusables 409 LookalikeStatusChecker thaiStatusChecker( 410 mThaiLatinConfusables, aTLD, Script::THAI, TLDEqualsLiteral(aTLD, "th")); 411 412 // Simplified/Traditional Chinese check temporarily disabled -- bug 857481 413 #if 0 414 HanVariantType savedHanVariant = HVT_NotHan; 415 #endif 416 417 ScriptCombo savedScript = ScriptCombo::UNSET; 418 419 while (current != end) { 420 char32_t ch = *current++; 421 422 IdentifierType idType = GetIdentifierType(ch); 423 if (idType == IDTYPE_RESTRICTED) { 424 return false; 425 } 426 MOZ_ASSERT(idType == IDTYPE_ALLOWED); 427 428 // Check for mixed script 429 Script script = UnicodeProperties::GetScriptCode(ch); 430 if (script != Script::COMMON && script != Script::INHERITED && 431 script != lastScript) { 432 if (illegalScriptCombo(script, savedScript)) { 433 return false; 434 } 435 } 436 437 #ifdef XP_MACOSX 438 // U+0620, U+0f8c, U+0f8d, U+0f8e, U+0f8f and are blocked due to a font 439 // issue on macOS 440 if (ch == 0x620 || ch == 0xf8c || ch == 0xf8d || ch == 0xf8e || 441 ch == 0xf8f) { 442 return false; 443 } 444 #endif 445 446 // U+30FC should be preceded by a Hiragana/Katakana. 447 if (ch == 0x30fc && lastScript != Script::HIRAGANA && 448 lastScript != Script::KATAKANA) { 449 return false; 450 } 451 452 Script nextScript = Script::INVALID; 453 if (current != end) { 454 nextScript = UnicodeProperties::GetScriptCode(*current); 455 } 456 457 // U+3078 to U+307A (へ, べ, ぺ) in Hiragana mixed with Katakana should be 458 // unsafe 459 if (ch >= 0x3078 && ch <= 0x307A && 460 (lastScript == Script::KATAKANA || nextScript == Script::KATAKANA)) { 461 return false; 462 } 463 // U+30D8 to U+30DA (ヘ, ベ, ペ) in Katakana mixed with Hiragana should be 464 // unsafe 465 if (ch >= 0x30D8 && ch <= 0x30DA && 466 (lastScript == Script::HIRAGANA || nextScript == Script::HIRAGANA)) { 467 return false; 468 } 469 // U+30FD and U+30FE are allowed only after Katakana 470 if ((ch == 0x30FD || ch == 0x30FE) && lastScript != Script::KATAKANA) { 471 return false; 472 } 473 474 // Slash confusables not enclosed by {Han,Hiragana,Katakana} should be 475 // unsafe but by itself should be allowed. 476 if (isCJKSlashConfusable(ch) && aLabel.Length() > 1 && 477 lastScript != Script::HAN && lastScript != Script::HIRAGANA && 478 lastScript != Script::KATAKANA && nextScript != Script::HAN && 479 nextScript != Script::HIRAGANA && nextScript != Script::KATAKANA) { 480 return false; 481 } 482 483 if (ch == 0x30FB && 484 (lastScript == Script::LATIN || nextScript == Script::LATIN)) { 485 return false; 486 } 487 488 // Combining Diacritic marks (U+0300-U+0339) after a script other than 489 // Latin-Greek-Cyrillic is unsafe 490 if (ch >= 0x300 && ch <= 0x339 && lastScript != Script::LATIN && 491 lastScript != Script::GREEK && lastScript != Script::CYRILLIC) { 492 return false; 493 } 494 495 if (ch == 0x307 && 496 (previousChar == 'i' || previousChar == 'j' || previousChar == 'l')) { 497 return false; 498 } 499 500 // U+00B7 is only allowed on Catalan domains between two l's. 501 if (ch == 0xB7 && (!TLDEqualsLiteral(aTLD, "cat") || previousChar != 'l' || 502 current == end || *current != 'l')) { 503 return false; 504 } 505 506 // Disallow Icelandic confusables for domains outside Icelandic and Faroese 507 // ccTLD (.is, .fo) 508 if ((ch == 0xFE || ch == 0xF0) && !TLDEqualsLiteral(aTLD, "is") && 509 !TLDEqualsLiteral(aTLD, "fo")) { 510 return false; 511 } 512 513 // Disallow U+0259 for domains outside Azerbaijani ccTLD (.az) 514 if (ch == 0x259 && !TLDEqualsLiteral(aTLD, "az")) { 515 return false; 516 } 517 518 // Block single/double-quote-like characters. 519 if (ch == 0x2BB || ch == 0x2BC) { 520 return false; 521 } 522 523 // Update the status based on whether the current character is a confusable 524 // or not and determine if it should be blocked or ignored. 525 // Note: script is not used for digitStatusChecker 526 digitStatusChecker.CheckCharacter(ch, script); 527 cyrillicStatusChecker.CheckCharacter(ch, script); 528 thaiStatusChecker.CheckCharacter(ch, script); 529 530 // Block these CJK ideographs if they are adjacent to non-CJK characters. 531 // These characters can be used to spoof Latin characters/punctuation marks. 532 if (isCJKIdeograph(ch)) { 533 // Check if there is a non-Bopomofo, non-Hiragana, non-Katakana, non-Han, 534 // and non-Numeric character on the left. previousChar is 0 when ch is the 535 // first character. 536 if (lastScript != Script::BOPOMOFO && lastScript != Script::HIRAGANA && 537 lastScript != Script::KATAKANA && lastScript != Script::HAN && 538 previousChar && !ISDIGIT(previousChar)) { 539 return false; 540 } 541 // Check if there is a non-Bopomofo, non-Hiragana, non-Katakana, non-Han, 542 // and non-Numeric character on the right. 543 if (nextScript != Script::BOPOMOFO && nextScript != Script::HIRAGANA && 544 nextScript != Script::KATAKANA && nextScript != Script::HAN && 545 current != aLabel.end() && !ISDIGIT(*current)) { 546 return false; 547 } 548 } 549 550 // Check for mixed numbering systems 551 auto genCat = GetGeneralCategory(ch); 552 if (genCat == HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) { 553 uint32_t zeroCharacter = 554 ch - mozilla::intl::UnicodeProperties::GetNumericValue(ch); 555 if (savedNumberingSystem == 0) { 556 // If we encounter a decimal number, save the zero character from that 557 // numbering system. 558 savedNumberingSystem = zeroCharacter; 559 } else if (zeroCharacter != savedNumberingSystem) { 560 return false; 561 } 562 } 563 564 if (genCat == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) { 565 // Check for consecutive non-spacing marks. 566 if (previousChar != 0 && previousChar == ch) { 567 return false; 568 } 569 // Check for marks whose expected script doesn't match the base script. 570 if (lastScript != Script::INVALID) { 571 UnicodeProperties::ScriptExtensionVector scripts; 572 auto extResult = UnicodeProperties::GetExtensions(ch, scripts); 573 MOZ_ASSERT(extResult.isOk()); 574 if (extResult.isErr()) { 575 return false; 576 } 577 578 int nScripts = AssertedCast<int>(scripts.length()); 579 580 // nScripts will always be >= 1, because even for undefined characters 581 // it will return Script::INVALID. 582 // If the mark just has script=COMMON or INHERITED, we can't check any 583 // more carefully, but if it has specific scriptExtension codes, then 584 // assume those are the only valid scripts to use it with. 585 if (nScripts > 1 || (Script(scripts[0]) != Script::COMMON && 586 Script(scripts[0]) != Script::INHERITED)) { 587 while (--nScripts >= 0) { 588 if (Script(scripts[nScripts]) == lastScript) { 589 break; 590 } 591 } 592 if (nScripts == -1) { 593 return false; 594 } 595 } 596 } 597 // Check for diacritics on dotless-i, which would be indistinguishable 598 // from normal accented letter i. 599 if (baseChar == 0x0131 && 600 ((ch >= 0x0300 && ch <= 0x0314) || ch == 0x031a)) { 601 return false; 602 } 603 } else { 604 baseChar = ch; 605 } 606 607 if (script != Script::COMMON && script != Script::INHERITED) { 608 lastScript = script; 609 } 610 611 // Simplified/Traditional Chinese check temporarily disabled -- bug 857481 612 #if 0 613 614 // Check for both simplified-only and traditional-only Chinese characters 615 HanVariantType hanVariant = GetHanVariant(ch); 616 if (hanVariant == HVT_SimplifiedOnly || hanVariant == HVT_TraditionalOnly) { 617 if (savedHanVariant == HVT_NotHan) { 618 savedHanVariant = hanVariant; 619 } else if (hanVariant != savedHanVariant) { 620 return false; 621 } 622 } 623 #endif 624 625 previousChar = ch; 626 } 627 return digitStatusChecker.Status() != LookalikeStatus::Block && 628 (!StaticPrefs::network_idn_punycode_cyrillic_confusables() || 629 cyrillicStatusChecker.Status() != LookalikeStatus::Block) && 630 thaiStatusChecker.Status() != LookalikeStatus::Block; 631 } 632 633 // Scripts that we care about in illegalScriptCombo 634 static inline ScriptCombo findScriptIndex(Script aScript) { 635 switch (aScript) { 636 case Script::BOPOMOFO: 637 return ScriptCombo::BOPO; 638 case Script::CYRILLIC: 639 return ScriptCombo::CYRL; 640 case Script::GREEK: 641 return ScriptCombo::GREK; 642 case Script::HANGUL: 643 return ScriptCombo::HANG; 644 case Script::HAN: 645 return ScriptCombo::HANI; 646 case Script::HIRAGANA: 647 return ScriptCombo::HIRA; 648 case Script::KATAKANA: 649 return ScriptCombo::KATA; 650 case Script::LATIN: 651 return ScriptCombo::LATN; 652 default: 653 return ScriptCombo::OTHR; 654 } 655 } 656 657 static const ScriptCombo scriptComboTable[13][9] = { 658 /* thisScript: BOPO CYRL GREK HANG HANI HIRA KATA LATN OTHR 659 * savedScript */ 660 /* BOPO */ {BOPO, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL}, 661 /* CYRL */ {FAIL, CYRL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL}, 662 /* GREK */ {FAIL, FAIL, GREK, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL}, 663 /* HANG */ {FAIL, FAIL, FAIL, HANG, KORE, FAIL, FAIL, KORE, FAIL}, 664 /* HANI */ {CHNA, FAIL, FAIL, KORE, HANI, JPAN, JPAN, HNLT, FAIL}, 665 /* HIRA */ {FAIL, FAIL, FAIL, FAIL, JPAN, HIRA, JPAN, JPAN, FAIL}, 666 /* KATA */ {FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, KATA, JPAN, FAIL}, 667 /* LATN */ {CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, LATN, OTHR}, 668 /* OTHR */ {FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, OTHR, FAIL}, 669 /* JPAN */ {FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, JPAN, JPAN, FAIL}, 670 /* CHNA */ {CHNA, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL}, 671 /* KORE */ {FAIL, FAIL, FAIL, KORE, KORE, FAIL, FAIL, KORE, FAIL}, 672 /* HNLT */ {CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, HNLT, FAIL}}; 673 674 bool nsIDNService::illegalScriptCombo(Script script, ScriptCombo& savedScript) { 675 if (savedScript == ScriptCombo::UNSET) { 676 savedScript = findScriptIndex(script); 677 return false; 678 } 679 680 savedScript = scriptComboTable[savedScript][findScriptIndex(script)]; 681 682 return savedScript == OTHR || savedScript == FAIL; 683 } 684 685 extern "C" MOZ_EXPORT bool mozilla_net_is_label_safe(const char32_t* aLabel, 686 size_t aLabelLen, 687 const char32_t* aTld, 688 size_t aTldLen) { 689 return static_cast<nsIDNService*>(nsStandardURL::GetIDNService()) 690 ->IsLabelSafe(mozilla::Span<const char32_t>(aLabel, aLabelLen), 691 mozilla::Span<const char32_t>(aTld, aTldLen)); 692 } 693 694 bool nsIDNService::isCJKSlashConfusable(char32_t aChar) { 695 return mCJKSlashConfusables.Contains(aChar); 696 } 697 698 bool nsIDNService::isCJKIdeograph(char32_t aChar) { 699 return mCJKIdeographs.Contains(aChar); 700 }