test_idn_spoof.js (48749B)
1 // Copyright 2015 The Chromium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // https://source.chromium.org/chromium/chromium/src/+/main:LICENSE 5 6 // Tests nsIIDNService 7 // Imported from https://source.chromium.org/chromium/chromium/src/+/main:components/url_formatter/spoof_checks/idn_spoof_checker_unittest.cc;drc=e544837967287f956ba69af3b228b202e8e7cf1a 8 9 "use strict"; 10 11 const idnService = Cc["@mozilla.org/network/idn-service;1"].getService( 12 Ci.nsIIDNService 13 ); 14 15 const { AppConstants } = ChromeUtils.importESModule( 16 "resource://gre/modules/AppConstants.sys.mjs" 17 ); 18 19 const kSafe = 1; 20 const kUnsafe = 2; 21 const kInvalid = 3; 22 23 function cyrillicConfusableExpectation() { 24 if (Services.prefs.getBoolPref("network.idn.punycode_cyrillic_confusables")) { 25 return undefined; 26 } 27 return "DISABLED"; 28 } 29 30 // prettier-ignore 31 let testCases = [ 32 // No IDN 33 ["www.google.com", "www.google.com", kSafe], 34 ["www.google.com.", "www.google.com.", kSafe], 35 [".", ".", kSafe], 36 ["", "", kSafe], 37 // Invalid IDN 38 ["xn--example-.com", "xn--example-.com", kInvalid], 39 // IDN 40 // Hanzi (Traditional Chinese) 41 ["xn--1lq90ic7f1rc.cn", "\u5317\u4eac\u5927\u5b78.cn", kSafe], 42 // Hanzi ('video' in Simplified Chinese) 43 ["xn--cy2a840a.com", "\u89c6\u9891.com", kSafe], 44 // Hanzi + '123' 45 ["www.xn--123-p18d.com", "www.\u4e00123.com", kSafe], 46 // Hanzi + Latin : U+56FD is simplified 47 ["www.xn--hello-9n1hm04c.com", "www.hello\u4e2d\u56fd.com", kSafe], 48 // Kanji + Kana (Japanese) 49 ["xn--l8jvb1ey91xtjb.jp", "\u671d\u65e5\u3042\u3055\u3072.jp", kSafe], 50 // Katakana including U+30FC 51 ["xn--tckm4i2e.jp", "\u30b3\u30de\u30fc\u30b9.jp", kSafe], 52 ["xn--3ck7a7g.jp", "\u30ce\u30f3\u30bd.jp", kSafe], 53 // Katakana + Latin (Japanese) 54 ["xn--e-efusa1mzf.jp", "e\u30b3\u30de\u30fc\u30b9.jp", kSafe], 55 ["xn--3bkxe.jp", "\u30c8\u309a.jp", kUnsafe], 56 ["xn--6bk8d.jp", "\u30c8\u309d.jp", kSafe], 57 // Hangul (Korean) 58 ["www.xn--or3b17p6jjc.kr", "www.\uc804\uc790\uc815\ubd80.kr", kSafe], 59 // b<u-umlaut>cher (German) 60 ["xn--bcher-kva.de", "b\u00fccher.de", kSafe], 61 // a with diaeresis 62 ["www.xn--frgbolaget-q5a.se", "www.f\u00e4rgbolaget.se", kSafe], 63 // c-cedilla (French) 64 ["www.xn--alliancefranaise-npb.fr", "www.alliancefran\u00e7aise.fr", kSafe], 65 // caf'e with acute accent (French) 66 ["xn--caf-dma.fr", "caf\u00e9.fr", kSafe], 67 // c-cedillla and a with tilde (Portuguese) 68 ["xn--poema-9qae5a.com.br", "p\u00e3oema\u00e7\u00e3.com.br", kSafe], 69 // s with caron 70 ["xn--achy-f6a.com", "\u0161achy.com", kSafe], 71 ["xn--kxae4bafwg.gr", "\u03bf\u03c5\u03c4\u03bf\u03c0\u03af\u03b1.gr", kSafe], 72 // Eutopia + 123 (Greek) 73 ["xn---123-pldm0haj2bk.gr", "\u03bf\u03c5\u03c4\u03bf\u03c0\u03af\u03b1-123.gr", kSafe], 74 // Cyrillic (Russian) 75 ["xn--n1aeec9b.ru", "\u0442\u043e\u0440\u0442\u044b.ru", kSafe], 76 // Cyrillic + 123 (Russian) 77 ["xn---123-45dmmc5f.ru", "\u0442\u043e\u0440\u0442\u044b-123.ru", kSafe], 78 // 'president' in Russian. Is a wholescript confusable, but allowed. 79 ["xn--d1abbgf6aiiy.xn--p1ai", "\u043f\u0440\u0435\u0437\u0438\u0434\u0435\u043d\u0442.\u0440\u0444", kSafe], 80 // Arabic 81 ["xn--mgba1fmg.eg", "\u0627\u0641\u0644\u0627\u0645.eg", kSafe], 82 // Hebrew 83 ["xn--4dbib.he", "\u05d5\u05d0\u05d4.he", kSafe], 84 // Hebrew + Common 85 ["xn---123-ptf2c5c6bt.il", "\u05e2\u05d1\u05e8\u05d9\u05ea-123.il", kSafe], 86 // Thai 87 ["xn--12c2cc4ag3b4ccu.th", "\u0e2a\u0e32\u0e22\u0e01\u0e32\u0e23\u0e1a\u0e34\u0e19.th", kSafe], 88 // Thai + Common 89 ["xn---123-9goxcp8c9db2r.th", "\u0e20\u0e32\u0e29\u0e32\u0e44\u0e17\u0e22-123.th", kSafe], 90 // Devangari (Hindi) 91 ["www.xn--l1b6a9e1b7c.in", "www.\u0905\u0915\u094b\u0932\u093e.in", kSafe], 92 // Devanagari + Common 93 ["xn---123-kbjl2j0bl2k.in", "\u0939\u093f\u0928\u094d\u0926\u0940-123.in", kSafe], 94 95 // Block mixed numeric + numeric lookalike (12.com, using U+0577). 96 ["xn--1-xcc.com", "1\u0577.com", kUnsafe], 97 98 // Block mixed numeric lookalike + numeric (੨0.com, uses U+0A68). 99 ["xn--0-6ee.com", "\u0a680.com", kUnsafe], 100 // Block fully numeric lookalikes (৪੨.com using U+09EA and U+0A68). 101 ["xn--47b6w.com", "\u09ea\u0a68.com", kUnsafe], 102 // Block single script digit lookalikes (using three U+0A68 characters). 103 ["xn--qccaa.com", "\u0a68\u0a68\u0a68.com", kUnsafe], 104 105 // URL test with mostly numbers and one confusable character 106 // Georgian 'd' 4000.com 107 ["xn--4000-pfr.com", "\u10eb4000.com", kUnsafe, "DISABLED"], 108 109 // What used to be 5 Aspirational scripts in the earlier versions of UAX 31. 110 // UAX 31 does not define aspirational scripts any more. 111 // See http://www.unicode.org/reports/tr31/#Aspirational_Use_Scripts . 112 // Unified Canadian Syllabary 113 ["xn--dfe0tte.ca", "\u1456\u14c2\u14ef.ca", kUnsafe], 114 // Tifinagh 115 ["xn--4ljxa2bb4a6bxb.ma", "\u2d5c\u2d49\u2d3c\u2d49\u2d4f\u2d30\u2d56.ma", kUnsafe], 116 // Tifinagh with a disallowed character(U+2D6F) 117 ["xn--hmjzaby5d5f.ma", "\u2d5c\u2d49\u2d3c\u2d6f\u2d49\u2d4f.ma", kInvalid], 118 119 // Yi 120 ["xn--4o7a6e1x64c.cn", "\ua188\ua320\ua071\ua0b7.cn", kUnsafe], 121 // Mongolian - 'ordu' (place, camp) 122 ["xn--56ec8bp.cn", "\u1823\u1837\u1833\u1824.cn", kUnsafe], 123 // Mongolian with a disallowed character 124 ["xn--95e5de3ds.cn", "\u1823\u1837\u1804\u1833\u1824.cn", kUnsafe], 125 // Miao/Pollad 126 ["xn--2u0fpf0a.cn", "\U00016f04\U00016f62\U00016f59.cn", kUnsafe], 127 128 // Script mixing tests 129 // The following script combinations are allowed. 130 // HIGHLY_RESTRICTIVE with Latin limited to ASCII-Latin. 131 // ASCII-Latin + Japn (Kana + Han) 132 // ASCII-Latin + Kore (Hangul + Han) 133 // ASCII-Latin + Han + Bopomofo 134 // "payp<alpha>l.com" 135 ["xn--paypl-g9d.com", "payp\u03b1l.com", kUnsafe], 136 // google.gr with Greek omicron and epsilon 137 ["xn--ggl-6xc1ca.gr", "g\u03bf\u03bfgl\u03b5.gr", kUnsafe], 138 // google.ru with Cyrillic o 139 ["xn--ggl-tdd6ba.ru", "g\u043e\u043egl\u0435.ru", kUnsafe], 140 // h<e with acute>llo<China in Han>.cn 141 ["xn--hllo-bpa7979ih5m.cn", "h\u00e9llo\u4e2d\u56fd.cn", kUnsafe, "DISABLED"], 142 // <Greek rho><Cyrillic a><Cyrillic u>.ru 143 ["xn--2xa6t2b.ru", "\u03c1\u0430\u0443.ru", kUnsafe], 144 // Georgian + Latin 145 ["xn--abcef-vuu.test", "abc\u10ebef.test", kUnsafe], 146 // Hangul + Latin 147 ["xn--han-eb9ll88m.kr", "\ud55c\uae00han.kr", kSafe], 148 // Hangul + Latin + Han with IDN ccTLD 149 ["xn--han-or0kq92gkm3c.xn--3e0b707e", "\ud55c\uae00han\u97d3.\ud55c\uad6d", kSafe], 150 // non-ASCII Latin + Hangul 151 ["xn--caf-dma9024xvpg.kr", "caf\u00e9\uce74\ud398.kr", kUnsafe, "DISABLED"], 152 // Hangul + Hiragana 153 ["xn--y9j3b9855e.kr", "\ud55c\u3072\u3089.kr", kUnsafe], 154 // <Hiragana>.<Hangul> is allowed because script mixing check is per label. 155 ["xn--y9j3b.xn--3e0b707e", "\u3072\u3089.\ud55c\uad6d", kSafe], 156 // Traditional Han + Latin 157 ["xn--hanzi-u57ii69i.tw", "\u6f22\u5b57hanzi.tw", kSafe], 158 // Simplified Han + Latin 159 ["xn--hanzi-u57i952h.cn", "\u6c49\u5b57hanzi.cn", kSafe], 160 // Simplified Han + Traditonal Han 161 ["xn--hanzi-if9kt8n.cn", "\u6c49\u6f22hanzi.cn", kSafe], 162 // Han + Hiragana + Katakana + Latin 163 ["xn--kanji-ii4dpizfq59yuykqr4b.jp", "\u632f\u308a\u4eee\u540d\u30ab\u30bfkanji.jp", kSafe], 164 // Han + Bopomofo 165 ["xn--5ekcde0577e87tc.tw", "\u6ce8\u97f3\u3105\u3106\u3107\u3108.tw", kUnsafe], 166 // Han + Latin + Bopomofo 167 ["xn--bopo-ty4cghi8509kk7xd.tw", "\u6ce8\u97f3bopo\u3105\u3106\u3107\u3108.tw", kUnsafe], 168 // Latin + Bopomofo 169 ["xn--bopomofo-hj5gkalm.tw", "bopomofo\u3105\u3106\u3107\u3108.tw", kUnsafe], 170 // Bopomofo + Katakana 171 ["xn--lcka3d1bztghi.tw", "\u3105\u3106\u3107\u3108\u30ab\u30bf\u30ab\u30ca.tw", kUnsafe], 172 // Bopomofo + Hangul 173 ["xn--5ekcde4543qbec.tw", "\u3105\u3106\u3107\u3108\uc8fc\uc74c.tw", kUnsafe], 174 // Devanagari + Latin 175 ["xn--ab-3ofh8fqbj6h.in", "ab\u0939\u093f\u0928\u094d\u0926\u0940.in", kUnsafe], 176 // Thai + Latin 177 ["xn--ab-jsi9al4bxdb6n.th", "ab\u0e20\u0e32\u0e29\u0e32\u0e44\u0e17\u0e22.th", kUnsafe], 178 // Armenian + Latin 179 ["xn--bs-red.com", "b\u057ds.com", kUnsafe], 180 // Tibetan + Latin 181 ["xn--foo-vkm.com", "foo\u0f37.com", kUnsafe], 182 // Oriya + Latin 183 ["xn--fo-h3g.com", "fo\u0b66.com", kUnsafe], 184 // Gujarati + Latin 185 ["xn--fo-isg.com", "fo\u0ae6.com", kUnsafe], 186 // <vitamin in Katakana>b1.com 187 ["xn--b1-xi4a7cvc9f.com", "\u30d3\u30bf\u30df\u30f3b1.com", kSafe], 188 // Devanagari + Han 189 ["xn--t2bes3ds6749n.com", "\u0930\u094b\u0932\u0947\u76e7\u0938.com", kUnsafe], 190 // Devanagari + Bengali 191 ["xn--11b0x.in", "\u0915\u0995.in", kUnsafe], 192 // Canadian Syllabary + Latin 193 ["xn--ab-lym.com", "ab\u14bf.com", kUnsafe], 194 ["xn--ab1-p6q.com", "ab1\u14bf.com", kUnsafe], 195 ["xn--1ab-m6qd.com", "\u14bf1ab\u14bf.com", kUnsafe], 196 ["xn--ab-jymc.com", "\u14bfab\u14bf.com", kUnsafe], 197 // Tifinagh + Latin 198 ["xn--liy-bq1b.com", "li\u2d4fy.com", kUnsafe], 199 ["xn--rol-cq1b.com", "rol\u2d4f.com", kUnsafe], 200 ["xn--ily-8p1b.com", "\u2d4fily.com", kUnsafe], 201 ["xn--1ly-8p1b.com", "\u2d4f1ly.com", kUnsafe], 202 203 // Invisibility check 204 // Thai tone mark malek(U+0E48) repeated 205 ["xn--03c0b3ca.th", "\u0e23\u0e35\u0e48\u0e48.th", kUnsafe], 206 // Accute accent repeated 207 ["xn--a-xbba.com", "a\u0301\u0301.com", kInvalid], 208 // 'a' with acuted accent + another acute accent 209 ["xn--1ca20i.com", "\u00e1\u0301.com", kUnsafe, "DISABLED"], 210 // Combining mark at the beginning 211 ["xn--abc-fdc.jp", "\u0300abc.jp", kInvalid], 212 213 // The following three are detected by |dangerous_pattern| regex, but 214 // can be regarded as an extension of blocking repeated diacritic marks. 215 // i followed by U+0307 (combining dot above) 216 ["xn--pixel-8fd.com", "pi\u0307xel.com", kUnsafe], 217 // U+0131 (dotless i) followed by U+0307 218 ["xn--pxel-lza43z.com", "p\u0131\u0307xel.com", kUnsafe], 219 // j followed by U+0307 (combining dot above) 220 ["xn--jack-qwc.com", "j\u0307ack.com", kUnsafe], 221 // l followed by U+0307 222 ["xn--lace-qwc.com", "l\u0307ace.com", kUnsafe], 223 224 // Do not allow a combining mark after dotless i/j. 225 ["xn--pxel-lza29y.com", "p\u0131\u0300xel.com", kUnsafe], 226 ["xn--ack-gpb42h.com", "\u0237\u0301ack.com", kUnsafe], 227 228 // Mixed script confusable 229 // google with Armenian Small Letter Oh(U+0585) 230 ["xn--gogle-lkg.com", "g\u0585ogle.com", kUnsafe], 231 ["xn--range-kkg.com", "\u0585range.com", kUnsafe], 232 ["xn--cucko-pkg.com", "cucko\u0585.com", kUnsafe], 233 // Latin 'o' in Armenian. 234 ["xn--o-ybcg0cu0cq.com", "o\u0580\u0574\u0578\u0582\u0566\u0568.com", kUnsafe], 235 // Hiragana HE(U+3078) mixed with Katakana 236 ["xn--49jxi3as0d0fpc.com", "\u30e2\u30d2\u30fc\u30c8\u3078\u30d6\u30f3.com", kUnsafe], 237 238 // U+30FC should be preceded by a Hiragana/Katakana. 239 // Katakana + U+30FC + Han 240 ["xn--lck0ip02qw5ya.jp", "\u30ab\u30fc\u91ce\u7403.jp", kSafe], 241 // Hiragana + U+30FC + Han 242 ["xn--u8j5tr47nw5ya.jp", "\u304b\u30fc\u91ce\u7403.jp", kSafe], 243 // U+30FC + Han 244 ["xn--weka801xo02a.com", "\u30fc\u52d5\u753b\u30fc.com", kUnsafe], 245 // Han + U+30FC + Han 246 ["xn--wekz60nb2ay85atj0b.jp", "\u65e5\u672c\u30fc\u91ce\u7403.jp", kUnsafe], 247 // U+30FC at the beginning 248 ["xn--wek060nb2a.jp", "\u30fc\u65e5\u672c.jp", kUnsafe], 249 // Latin + U+30FC + Latin 250 ["xn--abcdef-r64e.jp", "abc\u30fcdef.jp", kUnsafe], 251 252 // U+30FB (・) is not allowed next to Latin, but allowed otherwise. 253 // U+30FB + Han 254 ["xn--vekt920a.jp", "\u30fb\u91ce.jp", kSafe], 255 // Han + U+30FB + Han 256 ["xn--vek160nb2ay85atj0b.jp", "\u65e5\u672c\u30fb\u91ce\u7403.jp", kSafe], 257 // Latin + U+30FB + Latin 258 ["xn--abcdef-k64e.jp", "abc\u30fbdef.jp", kUnsafe], 259 // U+30FB + Latin 260 ["xn--abc-os4b.jp", "\u30fbabc.jp", kUnsafe], 261 262 // U+30FD (ヽ) is allowed only after Katakana. 263 // Katakana + U+30FD 264 ["xn--lck2i.jp", "\u30ab\u30fd.jp", kSafe], 265 // Hiragana + U+30FD 266 ["xn--u8j7t.jp", "\u304b\u30fd.jp", kUnsafe], 267 // Han + U+30FD 268 ["xn--xek368f.jp", "\u4e00\u30fd.jp", kUnsafe], 269 ["xn--a-mju.jp", "a\u30fd.jp", kUnsafe], 270 ["xn--a1-bo4a.jp", "a1\u30fd.jp", kUnsafe], 271 272 // U+30FE (ヾ) is allowed only after Katakana. 273 // Katakana + U+30FE 274 ["xn--lck4i.jp", "\u30ab\u30fe.jp", kSafe], 275 // Hiragana + U+30FE 276 ["xn--u8j9t.jp", "\u304b\u30fe.jp", kUnsafe], 277 // Han + U+30FE 278 ["xn--yek168f.jp", "\u4e00\u30fe.jp", kUnsafe], 279 ["xn--a-oju.jp", "a\u30fe.jp", kUnsafe], 280 ["xn--a1-eo4a.jp", "a1\u30fe.jp", kUnsafe], 281 282 // Cyrillic labels made of Latin-look-alike Cyrillic letters. 283 // 1) ѕсоре.com with ѕсоре in Cyrillic. 284 ["xn--e1argc3h.com", "\u0455\u0441\u043e\u0440\u0435.com", kUnsafe, cyrillicConfusableExpectation()], 285 // 2) ѕсоре123.com with ѕсоре in Cyrillic. 286 ["xn--123-qdd8bmf3n.com", "\u0455\u0441\u043e\u0440\u0435123.com", kUnsafe, cyrillicConfusableExpectation()], 287 // 3) ѕсоре-рау.com with ѕсоре and рау in Cyrillic. 288 ["xn----8sbn9akccw8m.com", "\u0455\u0441\u043e\u0440\u0435-\u0440\u0430\u0443.com", kUnsafe, cyrillicConfusableExpectation()], 289 // 4) ѕсоре1рау.com with scope and pay in Cyrillic and a non-letter between 290 // them. 291 ["xn--1-8sbn9akccw8m.com", "\u0455\u0441\u043e\u0440\u0435\u0031\u0440\u0430\u0443.com", kUnsafe, cyrillicConfusableExpectation()], 292 293 // The same as above three, but in IDN TLD (рф). 294 // 1) ѕсоре.рф with ѕсоре in Cyrillic. 295 ["xn--e1argc3h.xn--p1ai", "\u0455\u0441\u043e\u0440\u0435.\u0440\u0444", kSafe], 296 // 2) ѕсоре123.рф with ѕсоре in Cyrillic. 297 ["xn--123-qdd8bmf3n.xn--p1ai", "\u0455\u0441\u043e\u0440\u0435123.\u0440\u0444", kSafe], 298 // 3) ѕсоре-рау.рф with ѕсоре and рау in Cyrillic. 299 ["xn----8sbn9akccw8m.xn--p1ai", "\u0455\u0441\u043e\u0440\u0435-\u0440\u0430\u0443.\u0440\u0444", kSafe], 300 // 4) ѕсоре1рау.com with scope and pay in Cyrillic and a non-letter between 301 // them. 302 ["xn--1-8sbn9akccw8m.xn--p1ai", "\u0455\u0441\u043e\u0440\u0435\u0031\u0440\u0430\u0443.\u0440\u0444", kSafe], 303 304 // Same as above three, but in .ru TLD. 305 // 1) ѕсоре.ru with ѕсоре in Cyrillic. 306 ["xn--e1argc3h.ru", "\u0455\u0441\u043e\u0440\u0435.ru", kSafe], 307 // 2) ѕсоре123.ru with ѕсоре in Cyrillic. 308 ["xn--123-qdd8bmf3n.ru", "\u0455\u0441\u043e\u0440\u0435123.ru", kSafe], 309 // 3) ѕсоре-рау.ru with ѕсоре and рау in Cyrillic. 310 ["xn----8sbn9akccw8m.ru", "\u0455\u0441\u043e\u0440\u0435-\u0440\u0430\u0443.ru", kSafe], 311 // 4) ѕсоре1рау.com with scope and pay in Cyrillic and a non-letter between 312 // them. 313 ["xn--1-8sbn9akccw8m.ru", "\u0455\u0441\u043e\u0440\u0435\u0031\u0440\u0430\u0443.ru", kSafe], 314 315 // ѕсоре-рау.한국 with ѕсоре and рау in Cyrillic. The label will remain 316 // punycode while the TLD will be decoded. 317 ["xn----8sbn9akccw8m.xn--3e0b707e", "xn----8sbn9akccw8m.\ud55c\uad6d", kSafe, cyrillicConfusableExpectation()], 318 319 // музей (museum in Russian) has characters without a Latin-look-alike. 320 ["xn--e1adhj9a.com", "\u043c\u0443\u0437\u0435\u0439.com", kSafe], 321 322 // ѕсоԗе.com is Cyrillic with Latin lookalikes. 323 ["xn--e1ari3f61c.com", "\u0455\u0441\u043e\u0517\u0435.com", kUnsafe, cyrillicConfusableExpectation()], 324 325 // ыоԍ.com is Cyrillic with Latin lookalikes. 326 ["xn--n1az74c.com", "\u044b\u043e\u050d.com", kUnsafe], 327 328 // сю.com is Cyrillic with Latin lookalikes. 329 ["xn--q1a0a.com", "\u0441\u044e.com", kUnsafe, cyrillicConfusableExpectation()], 330 331 // Regression test for lowercase letters in whole script confusable 332 // lookalike character lists. 333 ["xn--80a8a6a.com", "\u0430\u044c\u0441.com", kUnsafe, cyrillicConfusableExpectation()], 334 335 // googlе.한국 where е is Cyrillic. This tests the generic case when one 336 // label is not allowed but other labels in the domain name are still 337 // decoded. Here, googlе is left in punycode but the TLD is decoded. 338 ["xn--googl-3we.xn--3e0b707e", "xn--googl-3we.\ud55c\uad6d", kSafe], 339 340 // Combining Diacritic marks after a script other than Latin-Greek-Cyrillic 341 ["xn--rsa2568fvxya.com", "\ud55c\u0307\uae00.com", kUnsafe], // 한́글.com 342 ["xn--rsa0336bjom.com", "\u6f22\u0307\u5b57.com", kUnsafe], // 漢̇字.com 343 // नागरी́.com 344 ["xn--lsa922apb7a6do.com", "\u0928\u093e\u0917\u0930\u0940\u0301.com", kUnsafe], 345 346 // Similarity checks against the list of top domains. "digklmo68.com" and 347 // 'digklmo68.co.uk" are listed for unittest in the top domain list. 348 // đigklmo68.com: 349 ["xn--igklmo68-kcb.com", "\u0111igklmo68.com", kUnsafe, "DISABLED"], 350 // www.đigklmo68.com: 351 ["www.xn--igklmo68-kcb.com", "www.\u0111igklmo68.com", kUnsafe, "DISABLED"], 352 // foo.bar.đigklmo68.com: 353 ["foo.bar.xn--igklmo68-kcb.com", "foo.bar.\u0111igklmo68.com", kUnsafe, "DISABLED"], 354 // đigklmo68.co.uk: 355 ["xn--igklmo68-kcb.co.uk", "\u0111igklmo68.co.uk", kUnsafe, "DISABLED"], 356 // mail.đigklmo68.co.uk: 357 ["mail.xn--igklmo68-kcb.co.uk", "mail.\u0111igklmo68.co.uk", kUnsafe, "DISABLED"], 358 // di̇gklmo68.com: 359 ["xn--digklmo68-6jf.com", "di\u0307gklmo68.com", kUnsafe], 360 // dig̱klmo68.com: 361 ["xn--digklmo68-7vf.com", "dig\u0331klmo68.com", kUnsafe, "DISABLED"], 362 // digĸlmo68.com: 363 ["xn--diglmo68-omb.com", "dig\u0138lmo68.com", kUnsafe], 364 // digkłmo68.com: 365 ["xn--digkmo68-9ob.com", "digk\u0142mo68.com", kUnsafe, "DISABLED"], 366 // digklṃo68.com: 367 ["xn--digklo68-l89c.com", "digkl\u1e43o68.com", kUnsafe, "DISABLED"], 368 // digklmø68.com: 369 ["xn--digklm68-b5a.com", "digklm\u00f868.com", kUnsafe, "DISABLED"], 370 // digklmoб8.com: 371 ["xn--digklmo8-h7g.com", "digklmo\u04318.com", kUnsafe], 372 // digklmo6৪.com: 373 ["xn--digklmo6-7yr.com", "digklmo6\u09ea.com", kUnsafe], 374 375 // 'islkpx123.com' is in the test domain list. 376 // 'іѕӏкрх123' can look like 'islkpx123' in some fonts. 377 ["xn--123-bed4a4a6hh40i.com", "\u0456\u0455\u04cf\u043a\u0440\u0445123.com", kUnsafe, cyrillicConfusableExpectation()], 378 379 // 'o2.com', '28.com', '39.com', '43.com', '89.com', 'oo.com' and 'qq.com' 380 // are all explicitly added to the test domain list to aid testing of 381 // Latin-lookalikes that are numerics in other character sets and similar 382 // edge cases. 383 // 384 // Bengali: 385 ["xn--07be.com", "\u09e6\u09e8.com", kUnsafe, "DISABLED"], 386 ["xn--27be.com", "\u09e8\u09ea.com", kUnsafe], 387 ["xn--77ba.com", "\u09ed\u09ed.com", kUnsafe, "DISABLED"], 388 // Gurmukhi: 389 ["xn--qcce.com", "\u0a68\u0a6a.com", kUnsafe], 390 ["xn--occe.com", "\u0a66\u0a68.com", kUnsafe], 391 ["xn--rccd.com", "\u0a6b\u0a69.com", kUnsafe], 392 ["xn--pcca.com", "\u0a67\u0a67.com", kUnsafe], 393 // Telugu: 394 ["xn--drcb.com", "\u0c69\u0c68.com", kUnsafe], 395 // Devanagari: 396 ["xn--d4be.com", "\u0966\u0968.com", kUnsafe, "DISABLED"], 397 // Kannada: 398 ["xn--yucg.com", "\u0ce6\u0ce9.com", kUnsafe, "DISABLED"], 399 ["xn--yuco.com", "\u0ce6\u0ced.com", kUnsafe, "DISABLED"], 400 // Oriya: 401 ["xn--1jcf.com", "\u0b6b\u0b68.com", kUnsafe], 402 ["xn--zjca.com", "\u0b66\u0b66.com", kUnsafe], 403 // Gujarati: 404 ["xn--cgce.com", "\u0ae6\u0ae8.com", kUnsafe, "DISABLED"], 405 ["xn--fgci.com", "\u0ae9\u0aed.com", kUnsafe], 406 ["xn--dgca.com", "\u0ae7\u0ae7.com", kUnsafe, "DISABLED"], 407 408 // wmhtb.com 409 ["xn--l1acpvx.com", "\u0448\u043c\u043d\u0442\u044c.com", kUnsafe, "DISABLED"], 410 // щмнть.com 411 ["xn--l1acpzs.com", "\u0449\u043c\u043d\u0442\u044c.com", kUnsafe, "DISABLED"], 412 // шмнтв.com 413 ["xn--b1atdu1a.com", "\u0448\u043c\u043d\u0442\u0432.com", kUnsafe, "DISABLED"], 414 // шмԋтв.com 415 ["xn--b1atsw09g.com", "\u0448\u043c\u050b\u0442\u0432.com", kUnsafe], 416 // шмԧтв.com 417 ["xn--b1atsw03i.com", "\u0448\u043c\u0527\u0442\u0432.com", kUnsafe], 418 // шмԋԏв.com 419 ["xn--b1at9a12dua.com", "\u0448\u043c\u050b\u050f\u0432.com", kUnsafe], 420 // ഠട345.com 421 ["xn--345-jtke.com", "\u0d20\u0d1f345.com", kUnsafe, "DISABLED"], 422 423 // Test additional confusable LGC characters (most of them without 424 // decomposition into base + diacritc mark). The corresponding ASCII 425 // domain names are in the test top domain list. 426 // ϼκαωχ.com 427 ["xn--mxar4bh6w.com", "\u03fc\u03ba\u03b1\u03c9\u03c7.com", kUnsafe], 428 // þħĸŧƅ.com 429 ["xn--vda6f3b2kpf.com", "\u00fe\u0127\u0138\u0167\u0185.com", kUnsafe], 430 // þhktb.com 431 ["xn--hktb-9ra.com", "\u00fehktb.com", kUnsafe], 432 // pħktb.com 433 ["xn--pktb-5xa.com", "p\u0127ktb.com", kUnsafe, "DISABLED"], 434 // phĸtb.com 435 ["xn--phtb-m0a.com", "ph\u0138tb.com", kUnsafe], 436 // phkŧb.com 437 ["xn--phkb-d7a.com", "phk\u0167b.com", kUnsafe, "DISABLED"], 438 // phktƅ.com 439 ["xn--phkt-ocb.com", "phkt\u0185.com", kUnsafe], 440 // ҏнкть.com 441 ["xn--j1afq4bxw.com", "\u048f\u043d\u043a\u0442\u044c.com", kUnsafe], 442 // ҏћкть.com 443 ["xn--j1aq4a7cvo.com", "\u048f\u045b\u043a\u0442\u044c.com", kUnsafe], 444 // ҏңкть.com 445 ["xn--j1aq4azund.com", "\u048f\u04a3\u043a\u0442\u044c.com", kUnsafe], 446 // ҏҥкть.com 447 ["xn--j1aq4azuxd.com", "\u048f\u04a5\u043a\u0442\u044c.com", kUnsafe], 448 // ҏӈкть.com 449 ["xn--j1aq4azuyj.com", "\u048f\u04c8\u043a\u0442\u044c.com", kUnsafe], 450 // ҏԧкть.com 451 ["xn--j1aq4azu9z.com", "\u048f\u0527\u043a\u0442\u044c.com", kUnsafe], 452 // ҏԩкть.com 453 ["xn--j1aq4azuq0a.com", "\u048f\u0529\u043a\u0442\u044c.com", kUnsafe], 454 // ҏнқть.com 455 ["xn--m1ak4azu6b.com", "\u048f\u043d\u049b\u0442\u044c.com", kUnsafe], 456 // ҏнҝть.com 457 ["xn--m1ak4azunc.com", "\u048f\u043d\u049d\u0442\u044c.com", kUnsafe], 458 // ҏнҟть.com 459 ["xn--m1ak4azuxc.com", "\u048f\u043d\u049f\u0442\u044c.com", kUnsafe], 460 // ҏнҡть.com 461 ["xn--m1ak4azu7c.com", "\u048f\u043d\u04a1\u0442\u044c.com", kUnsafe], 462 // ҏнӄть.com 463 ["xn--m1ak4azu8i.com", "\u048f\u043d\u04c4\u0442\u044c.com", kUnsafe], 464 // ҏнԟть.com 465 ["xn--m1ak4azuzy.com", "\u048f\u043d\u051f\u0442\u044c.com", kUnsafe], 466 // ҏнԟҭь.com 467 ["xn--m1a4a4nnery.com", "\u048f\u043d\u051f\u04ad\u044c.com", kUnsafe], 468 // ҏнԟҭҍ.com 469 ["xn--m1a4ne5jry.com", "\u048f\u043d\u051f\u04ad\u048d.com", kUnsafe], 470 // ҏнԟҭв.com 471 ["xn--b1av9v8dry.com", "\u048f\u043d\u051f\u04ad\u0432.com", kUnsafe], 472 // ҏӊԟҭв.com 473 ["xn--b1a9p8c1e8r.com", "\u048f\u04ca\u051f\u04ad\u0432.com", kUnsafe], 474 // wmŋr.com 475 ["xn--wmr-jxa.com", "wm\u014br.com", kUnsafe, "DISABLED"], 476 // шмпґ.com 477 ["xn--l1agz80a.com", "\u0448\u043c\u043f\u0491.com", kUnsafe, "DISABLED"], 478 // щмпґ.com 479 ["xn--l1ag2a0y.com", "\u0449\u043c\u043f\u0491.com", kUnsafe, "DISABLED"], 480 // щӎпґ.com 481 ["xn--o1at1tsi.com", "\u0449\u04ce\u043f\u0491.com", kUnsafe], 482 // ґғ.com 483 ["xn--03ae.com", "\u0491\u0493.com", kUnsafe, "DISABLED"], 484 // ґӻ.com 485 ["xn--03a6s.com", "\u0491\u04fb.com", kUnsafe], 486 // ҫұҳҽ.com 487 ["xn--r4amg4b.com", "\u04ab\u04b1\u04b3\u04bd.com", kUnsafe, "DISABLED"], 488 // ҫұӽҽ.com 489 ["xn--r4am0b8r.com", "\u04ab\u04b1\u04fd\u04bd.com", kUnsafe], 490 // ҫұӿҽ.com 491 ["xn--r4am0b3s.com", "\u04ab\u04b1\u04ff\u04bd.com", kUnsafe], 492 // ҫұӿҿ.com 493 ["xn--r4am6b4p.com", "\u04ab\u04b1\u04ff\u04bf.com", kUnsafe], 494 // ҫұӿє.com 495 ["xn--91a7osa62a.com", "\u04ab\u04b1\u04ff\u0454.com", kUnsafe], 496 // ӏԃԍ.com 497 ["xn--s5a8h4a.com", "\u04cf\u0503\u050d.com", kUnsafe], 498 499 // U+04CF(ӏ) is mapped to multiple characters, lowercase L(l) and 500 // lowercase I(i). Lowercase L is also regarded as similar to digit 1. 501 // The test domain list has {ig, ld, 1gd}.com for Cyrillic. 502 // ӏԍ.com 503 ["xn--s5a8j.com", "\u04cf\u050d.com", kUnsafe], 504 // ӏԃ.com 505 ["xn--s5a8h.com", "\u04cf\u0503.com", kUnsafe], 506 // ӏԍԃ.com 507 ["xn--s5a8h3a.com", "\u04cf\u050d\u0503.com", kUnsafe], 508 509 // 1շ34567890.com 510 ["xn--134567890-gnk.com", "1\u057734567890.com", kUnsafe], 511 // ꓲ2345б7890.com 512 ["xn--23457890-e7g93622b.com", "\ua4f22345\u04317890.com", kUnsafe], 513 // 1ᒿ345б7890.com 514 ["xn--13457890-e7g0943b.com", "1\u14bf345\u04317890.com", kUnsafe], 515 // 12з4567890.com 516 ["xn--124567890-10h.com", "12\u04374567890.com", kUnsafe], 517 // 12ҙ4567890.com 518 ["xn--124567890-1ti.com", "12\u04994567890.com", kUnsafe], 519 // 12ӡ4567890.com 520 ["xn--124567890-mfj.com", "12\u04e14567890.com", kUnsafe], 521 // 12उ4567890.com 522 ["xn--124567890-m3r.com", "12\u09094567890.com", kUnsafe], 523 // 12ও4567890.com 524 ["xn--124567890-17s.com", "12\u09934567890.com", kUnsafe], 525 // 12ਤ4567890.com 526 ["xn--124567890-hfu.com", "12\u0a244567890.com", kUnsafe], 527 // 12ဒ4567890.com 528 ["xn--124567890-6s6a.com", "12\u10124567890.com", kUnsafe], 529 // 12ვ4567890.com 530 ["xn--124567890-we8a.com", "12\u10D54567890.com", kUnsafe], 531 // 12პ4567890.com 532 ["xn--124567890-hh8a.com", "12\u10DE4567890.com", kUnsafe], 533 // 123ㄐ567890.com 534 ["xn--123567890-dr5h.com", "123ㄐ567890.com", kUnsafe], 535 // 123Ꮞ567890.com 536 ["xn--123567890-dm4b.com", "123\u13ce567890.com", kUnsafe], 537 // 12345б7890.com 538 ["xn--123457890-fzh.com", "12345\u04317890.com", kUnsafe, "DISABLED"], 539 // 12345ճ7890.com 540 ["xn--123457890-fmk.com", "12345ճ7890.com", kUnsafe], 541 // 1234567ȣ90.com 542 ["xn--123456790-6od.com", "1234567\u022390.com", kUnsafe], 543 // 12345678୨0.com 544 ["xn--123456780-71w.com", "12345678\u0b680.com", kUnsafe], 545 // 123456789ଠ.com 546 ["xn--123456789-ohw.com", "123456789\u0b20.com", kUnsafe, "DISABLED"], 547 // 123456789ꓳ.com 548 ["xn--123456789-tx75a.com", "123456789\ua4f3.com", kUnsafe], 549 550 // aeœ.com 551 ["xn--ae-fsa.com", "ae\u0153.com", kUnsafe, "DISABLED"], 552 // æce.com 553 ["xn--ce-0ia.com", "\u00e6ce.com", kUnsafe, "DISABLED"], 554 // æœ.com 555 ["xn--6ca2t.com", "\u00e6\u0153.com", kUnsafe, "DISABLED"], 556 // ӕԥ.com 557 ["xn--y5a4n.com", "\u04d5\u0525.com", kUnsafe, "DISABLED"], 558 559 // ငၔဌ၂ဝ.com (entirely made of Myanmar characters) 560 ["xn--ridq5c9hnd.com", "\u1004\u1054\u100c\u1042\u101d.com", kUnsafe], 561 562 // ฟรฟร.com (made of two Thai characters. similar to wsws.com in 563 // some fonts) 564 ["xn--w3calb.com", "\u0e1f\u0e23\u0e1f\u0e23.com", kUnsafe], 565 // พรบ.com 566 ["xn--r3chp.com", "\u0e1e\u0e23\u0e1a.com", kUnsafe], 567 // ฟรบ.com 568 ["xn--r3cjm.com", "\u0e1f\u0e23\u0e1a.com", kUnsafe], 569 570 // Lao characters that look like w, s, o, and u. 571 // ພຣບ.com 572 ["xn--f7chp.com", "\u0e9e\u0ea3\u0e9a.com", kUnsafe, "DISABLED"], 573 // ຟຣບ.com 574 ["xn--f7cjm.com", "\u0e9f\u0ea3\u0e9a.com", kUnsafe, "DISABLED"], 575 // ຟຮບ.com 576 ["xn--f7cj9b.com", "\u0e9f\u0eae\u0e9a.com", kUnsafe, "DISABLED"], 577 // ຟຮ໐ບ.com 578 ["xn--f7cj9b5h.com", "\u0e9f\u0eae\u0ed0\u0e9a.com", kUnsafe, "DISABLED"], 579 580 // Lao character that looks like n. 581 // ก11.com 582 ["xn--11-lqi.com", "\u0e0111.com", kUnsafe, "DISABLED"], 583 584 // At one point the skeleton of 'w' was 'vv', ensure that 585 // that it's treated as 'w'. 586 ["xn--wder-qqa.com", "w\u00f3der.com", kUnsafe, "DISABLED"], 587 588 // Mixed digits: the first two will also fail mixed script test 589 // Latin + ASCII digit + Deva digit 590 ["xn--asc1deva-j0q.co.in", "asc1deva\u0967.co.in", kUnsafe], 591 // Latin + Deva digit + Beng digit 592 ["xn--devabeng-f0qu3f.co.in", "deva\u0967beng\u09e7.co.in", kUnsafe], 593 // ASCII digit + Deva digit 594 ["xn--79-v5f.co.in", "7\u09ea9.co.in", kUnsafe], 595 // Deva digit + Beng digit 596 ["xn--e4b0x.co.in", "\u0967\u09e7.co.in", kUnsafe], 597 // U+4E00 (CJK Ideograph One) is not a digit, but it's not allowed next to 598 // non-Kana scripts including numbers. 599 ["xn--d12-s18d.cn", "d12\u4e00.cn", kUnsafe, "DISABLED"], 600 // One that's really long that will force a buffer realloc 601 ["aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", kSafe], 602 603 // Not allowed; characters outside [:Identifier_Status=Allowed:] 604 // Limited Use Scripts: UTS 31 Table 7. 605 // Vai 606 ["xn--sn8a.com", "\ua50b.com", kUnsafe], 607 // 'CARD' look-alike in Cherokee 608 ["xn--58db0a9q.com", "\u13df\u13aa\u13a1\u13a0.com", kUnsafe], 609 // Scripts excluded from Identifiers: UTS 31 Table 4 610 // Coptic 611 ["xn--5ya.com", "\u03e7.com", kUnsafe], 612 // Old Italic 613 ["xn--097cc.com", "\U00010300\U00010301.com", kUnsafe], 614 615 // U+115F (Hangul Filler) 616 ["xn--osd3820f24c.kr", "\uac00\ub098\u115f.kr", kInvalid], 617 ["www.xn--google-ho0coa.com", "www.\u2039google\u203a.com", kUnsafe], 618 // Latin small capital w: hardᴡare.com 619 ["xn--hardare-l41c.com", "hard\u1d21are.com", kUnsafe], 620 // Minus Sign(U+2212) 621 ["xn--t9g238xc2a.jp", "\u65e5\u2212\u672c.jp", kUnsafe], 622 // Latin Small Letter Script G: ɡɡ.com 623 ["xn--0naa.com", "\u0261\u0261.com", kUnsafe], 624 // Hangul Jamo(U+11xx) 625 ["xn--0pdc3b.com", "\u1102\u1103\u1110.com", kUnsafe], 626 // degree sign: 36°c.com 627 ["xn--36c-tfa.com", "36\u00b0c.com", kUnsafe], 628 // Pound sign 629 ["xn--5free-fga.com", "5free\u00a3.com", kUnsafe], 630 // Hebrew points (U+05B0, U+05B6) 631 ["xn--7cbl2kc2a.com", "\u05e1\u05b6\u05e7\u05b0\u05e1.com", kUnsafe], 632 // Danda(U+0964) 633 ["xn--81bp1b6ch8s.com", "\u0924\u093f\u091c\u0964\u0930\u0940.com", kUnsafe], 634 // Small letter script G(U+0261) 635 ["xn--oogle-qmc.com", "\u0261oogle.com", kUnsafe], 636 // Small Katakana Extension(U+31F1) 637 ["xn--wlk.com", "\u31f1.com", kUnsafe], 638 // Heart symbol: ♥ 639 ["xn--ab-u0x.com", "ab\u2665.com", kUnsafe], 640 // Emoji 641 ["xn--vi8hiv.xyz", "\U0001f355\U0001f4a9.xyz", kUnsafe], 642 // Registered trade mark 643 ["xn--egistered-fna.com", "\u00aeegistered.com", kUnsafe], 644 // Latin Letter Retroflex Click 645 ["xn--registered-25c.com", "registered\u01c3.com", kUnsafe], 646 // ASCII '!' not allowed in IDN 647 ["xn--!-257eu42c.kr", "\uc548\ub155!.kr", kUnsafe], 648 // 'GOOGLE' in IPA extension: ɢᴏᴏɢʟᴇ 649 ["xn--1naa7pn51hcbaa.com", "\u0262\u1d0f\u1d0f\u0262\u029f\u1d07.com", kUnsafe], 650 // Padlock icon spoof. 651 ["xn--google-hj64e.com", "\U0001f512google.com", kUnsafe], 652 653 // Custom block list 654 // Combining Long Solidus Overlay 655 ["google.xn--comabc-k8d", "google.com\u0338abc", kUnsafe], 656 // Hyphenation Point instead of Katakana Middle dot 657 ["xn--svgy16dha.jp", "\u30a1\u2027\u30a3.jp", kUnsafe], 658 // Gershayim with other Hebrew characters is allowed. 659 ["xn--5db6bh9b.il", "\u05e9\u05d1\u05f4\u05e6.il", kSafe, "DISABLED"], 660 // Hebrew Gershayim with Latin is invalid according to Python's idna 661 // package. 662 ["xn--ab-yod.com", "a\u05f4b.com", kInvalid], 663 // Hebrew Gershayim with Arabic is disallowed. 664 ["xn--5eb7h.eg", "\u0628\u05f4.eg", kUnsafe], 665 // #if BUILDFLAG(IS_APPLE) 666 // These characters are blocked due to a font issue on Mac. 667 // Tibetan transliteration characters. 668 ["xn--com-lum.test.pl", "com\u0f8c.test.pl", kUnsafe], 669 // Arabic letter KASHMIRI YEH 670 ["xn--fgb.com", "\u0620.com", kUnsafe, "macosx"], 671 // #endif 672 673 // Hyphens (http://unicode.org/cldr/utility/confusables.jsp?a=-) 674 // Hyphen-Minus (the only hyphen allowed) 675 // abc-def 676 ["abc-def.com", "abc-def.com", kSafe], 677 // Modifier Letter Minus Sign 678 ["xn--abcdef-5od.com", "abc\u02d7def.com", kUnsafe], 679 // Hyphen 680 ["xn--abcdef-dg0c.com", "abc\u2010def.com", kUnsafe], 681 // Non-Breaking Hyphen 682 // This is actually an invalid IDNA domain (U+2011 normalizes to U+2010), 683 // but it is included to ensure that we do not inadvertently allow this 684 // character to be displayed as Unicode. 685 ["xn--abcdef-kg0c.com", "abc\u2011def.com", kInvalid], 686 // Figure Dash. 687 // Python's idna package refuses to decode the minus signs and dashes. ICU 688 // decodes them but treats them as unsafe in spoof checks, so these test 689 // cases are marked as unsafe instead of invalid. 690 ["xn--abcdef-rg0c.com", "abc\u2012def.com", kUnsafe], 691 // En Dash 692 ["xn--abcdef-yg0c.com", "abc\u2013def.com", kUnsafe], 693 // Hyphen Bullet 694 ["xn--abcdef-kq0c.com", "abc\u2043def.com", kUnsafe], 695 // Minus Sign 696 ["xn--abcdef-5d3c.com", "abc\u2212def.com", kUnsafe], 697 // Heavy Minus Sign 698 ["xn--abcdef-kg1d.com", "abc\u2796def.com", kUnsafe], 699 // Em Dash 700 // Small Em Dash (U+FE58) is normalized to Em Dash. 701 ["xn--abcdef-5g0c.com", "abc\u2014def.com", kUnsafe], 702 // Coptic Small Letter Dialect-P Ni. Looks like dash. 703 // Coptic Capital Letter Dialect-P Ni is normalized to small letter. 704 ["xn--abcdef-yy8d.com", "abc\u2cbbdef.com", kUnsafe], 705 706 // Block NV8 (Not valid in IDN 2008) characters. 707 // U+058A (֊) 708 ["xn--ab-vfd.com", "a\u058ab.com", kUnsafe], 709 ["xn--y9ac3j.com", "\u0561\u058a\u0562.com", kUnsafe], 710 // U+2019 (’) 711 ["xn--ab-n2t.com", "a\u2019b.com", kUnsafe], 712 // U+2027 (‧) 713 ["xn--ab-u3t.com", "a\u2027b.com", kUnsafe], 714 // U+30A0 (゠) 715 ["xn--ab-bg4a.com", "a\u30a0b.com", kUnsafe], 716 ["xn--9bk3828aea.com", "\uac00\u30a0\uac01.com", kUnsafe], 717 ["xn--9bk279fba.com", "\u4e00\u30a0\u4e00.com", kUnsafe], 718 ["xn--n8jl2x.com", "\u304a\u30a0\u3044.com", kUnsafe], 719 ["xn--fbke7f.com", "\u3082\u30a0\u3084.com", kUnsafe], 720 721 // Block single/double-quote-like characters. 722 // U+02BB (ʻ) 723 ["xn--ab-8nb.com", "a\u02bbb.com", kUnsafe], 724 // U+02BC (ʼ) 725 ["xn--ab-cob.com", "a\u02bcb.com", kUnsafe], 726 // U+144A: Not allowed to mix with scripts other than Canadian Syllabics. 727 ["xn--ab-jom.com", "a\u144ab.com", kUnsafe], 728 ["xn--xcec9s.com", "\u1401\u144a\u1402.com", kUnsafe], 729 730 // Custom dangerous patterns 731 // Two Katakana-Hiragana combining mark in a row 732 ["google.xn--com-oh4ba.evil.jp", "google.com\u309a\u309a.evil.jp", kUnsafe], 733 // Katakana Letter No not enclosed by {Han,Hiragana,Katakana}. 734 ["google.xn--comevil-v04f.jp", "google.com\u30ceevil.jp", kUnsafe], 735 // TODO(jshin): Review the danger of allowing the following two. 736 // Hiragana 'No' by itself is allowed. 737 ["xn--ldk.jp", "\u30ce.jp", kSafe], 738 // Hebrew Gershayim used by itself is allowed. 739 ["xn--5eb.il", "\u05f4.il", kSafe, "DISABLED"], 740 741 // Block RTL nonspacing marks (NSM) after unrelated scripts. 742 ["xn--foog-ycg.com", "foog\u0650.com", kUnsafe], // Latin + Arabic N]M 743 ["xn--foog-jdg.com", "foog\u0654.com", kUnsafe], // Latin + Arabic N]M 744 ["xn--foog-jhg.com", "foog\u0670.com", kUnsafe], // Latin + Arbic N]M 745 ["xn--foog-opf.com", "foog\u05b4.com", kUnsafe], // Latin + Hebrew N]M 746 ["xn--shb5495f.com", "\uac00\u0650.com", kUnsafe], // Hang + Arabic N]M 747 748 // 4 Deviation characters between IDNA 2003 and IDNA 2008 749 // When entered in Unicode, the first two are mapped to 'ss' and Greek sigma 750 // and the latter two are mapped away. However, the punycode form should 751 // remain in punycode. 752 // U+00DF(sharp-s) 753 ["xn--fu-hia.de", "fu\u00df.de", kUnsafe, "DISABLED"], 754 // U+03C2(final-sigma) 755 ["xn--mxac2c.gr", "\u03b1\u03b2\u03c2.gr", kUnsafe, "DISABLED"], 756 // U+200C(ZWNJ) 757 ["xn--h2by8byc123p.in", "\u0924\u094d\u200c\u0930\u093f.in", kUnsafe], 758 // U+200C(ZWJ) 759 ["xn--11b6iy14e.in", "\u0915\u094d\u200d.in", kUnsafe], 760 761 // Math Monospace Small A. When entered in Unicode, it's canonicalized to 762 // 'a'. The punycode form should remain in punycode. 763 ["xn--bc-9x80a.xyz", "\U0001d68abc.xyz", kInvalid], 764 // Math Sans Bold Capital Alpha 765 ["xn--bc-rg90a.xyz", "\U0001d756bc.xyz", kInvalid], 766 // U+3000 is canonicalized to a space(U+0020), but the punycode form 767 // should remain in punycode. 768 ["xn--p6j412gn7f.cn", "\u4e2d\u56fd\u3000", kInvalid], 769 // U+3002 is canonicalized to ASCII fullstop(U+002E), but the punycode form 770 // should remain in punycode. 771 ["xn--r6j012gn7f.cn", "\u4e2d\u56fd\u3002", kInvalid], 772 // Invalid punycode 773 // Has a codepoint beyond U+10FFFF. 774 ["xn--krank-kg706554a", "", kInvalid], 775 // '?' in punycode. 776 ["xn--hello?world.com", "", kInvalid], 777 778 // Not allowed in UTS46/IDNA 2008 779 // Georgian Capital Letter(U+10BD) 780 ["xn--1nd.com", "\u10bd.com", kInvalid], 781 // 3rd and 4th characters are '-'. 782 ["xn-----8kci4dhsd", "\u0440\u0443--\u0430\u0432\u0442\u043e", kInvalid, "DISABLED"], 783 // Leading combining mark 784 ["xn--72b.com", "\u093e.com", kInvalid], 785 // BiDi check per IDNA 2008/UTS 46 786 // Cannot starts with AN(Arabic-Indic Number) 787 ["xn--8hbae.eg", "\u0662\u0660\u0660.eg", kInvalid], 788 // Cannot start with a RTL character and ends with a LTR 789 ["xn--x-ymcov.eg", "\u062c\u0627\u0631x.eg", kInvalid], 790 // Can start with a RTL character and ends with EN(European Number) 791 ["xn--2-ymcov.eg", "\u062c\u0627\u06312.eg", kSafe], 792 // Can start with a RTL and end with AN 793 ["xn--mgbjq0r.eg", "\u062c\u0627\u0631\u0662.eg", kSafe], 794 795 // Extremely rare Latin letters 796 // Latin Ext B - Pinyin: ǔnion.com 797 ["xn--nion-unb.com", "\u01d4nion.com", kUnsafe, "DISABLED"], 798 // Latin Ext C: ⱴase.com 799 ["xn--ase-7z0b.com", "\u2c74ase.com", kUnsafe], 800 // Latin Ext D: ꝴode.com 801 ["xn--ode-ut3l.com", "\ua774ode.com", kUnsafe], 802 // Latin Ext Additional: ḷily.com 803 ["xn--ily-n3y.com", "\u1e37ily.com", kUnsafe, "DISABLED"], 804 // Latin Ext E: ꬺove.com 805 ["xn--ove-8y6l.com", "\uab3aove.com", kUnsafe], 806 // Greek Ext: ᾳβγ.com 807 ["xn--nxac616s.com", "\u1fb3\u03b2\u03b3.com", kInvalid], 808 // Cyrillic Ext A (label cannot begin with an illegal combining character). 809 ["xn--lrj.com", "\u2def.com", kInvalid], 810 // Cyrillic Ext B: ꙡ.com 811 ["xn--kx8a.com", "\ua661.com", kUnsafe], 812 // Cyrillic Ext C: ᲂ.com (Narrow o) 813 ["xn--43f.com", "\u1c82.com", kInvalid], 814 815 // The skeleton of Extended Arabic-Indic Digit Zero (۰) is a dot. Check that 816 // this is handled correctly (crbug/877045). 817 ["xn--dmb", "\u06f0", kSafe], 818 819 // Test that top domains whose skeletons are the same as the domain name are 820 // handled properly. In this case, tést.net should match test.net top 821 // domain and not be converted to unicode. 822 ["xn--tst-bma.net", "t\u00e9st.net", kUnsafe, "DISABLED"], 823 // Variations of the above, for testing crbug.com/925199. 824 // some.tést.net should match test.net. 825 ["some.xn--tst-bma.net", "some.t\u00e9st.net", kUnsafe, "DISABLED"], 826 // The following should not match test.net, so should be converted to 827 // unicode. 828 // ést.net (a suffix of tést.net). 829 ["xn--st-9ia.net", "\u00e9st.net", kSafe], 830 // some.ést.net 831 ["some.xn--st-9ia.net", "some.\u00e9st.net", kSafe], 832 // atést.net (tést.net is a suffix of atést.net) 833 ["xn--atst-cpa.net", "at\u00e9st.net", kSafe], 834 // some.atést.net 835 ["some.xn--atst-cpa.net", "some.at\u00e9st.net", kSafe], 836 837 // Modifier-letter-voicing should be blocked (wwwˬtest.com). 838 ["xn--wwwtest-2be.com", "www\u02ectest.com", kUnsafe], 839 840 // oĸ.com: Not a top domain, should be blocked because of Kra. 841 ["xn--o-tka.com", "o\u0138.com", kUnsafe], 842 843 // U+4E00 and U+3127 should be blocked when next to non-CJK. 844 ["xn--ipaddress-w75n.com", "ip\u4e00address.com", kUnsafe], 845 ["xn--ipaddress-wx5h.com", "ip\u3127address.com", kUnsafe], 846 // U+4E00 and U+3127 at the beginning and end of a string. 847 ["xn--google-gg5e.com", "google\u3127.com", kUnsafe], 848 ["xn--google-9f5e.com", "\u3127google.com", kUnsafe], 849 ["xn--google-gn7i.com", "google\u4e00.com", kUnsafe], 850 ["xn--google-9m7i.com", "\u4e00google.com", kUnsafe], 851 // These are allowed because U+4E00 and U+3447 are not immediately next to 852 // non-CJK. 853 ["xn--gamer-fg1hz05u.com", "\u4e00\u751fgamer.com", kSafe], 854 ["xn--gamer-kg1hy05u.com", "gamer\u751f\u4e00.com", kSafe], 855 ["xn--gamer-k68dt714b.com", "\u3447\u751fgamer.com", kSafe], 856 ["xn--gamer-p68ds714b.com", "gamer\u751f\u3447.com", kSafe], 857 ["xn--4gqz91g.com", "\u4e00\u732b.com", kSafe], 858 ["xn--z2kt70q.com", "\u3447\u732b.com", kSafe], 859 // U+4E00 with another ideograph. 860 ["xn--4gqc.com", "\u4e00\u4e01.com", kSafe], 861 862 // CJK ideographs looking like slashes should be blocked when next to 863 // non-CJK. 864 ["example.xn--comtest-k63k", "example.com\u4e36test", kUnsafe], 865 ["example.xn--comtest-u83k", "example.com\u4e40test", kUnsafe], 866 ["example.xn--comtest-283k", "example.com\u4e41test", kUnsafe], 867 ["example.xn--comtest-m83k", "example.com\u4e3ftest", kUnsafe], 868 // This is allowed because the ideographs are not immediately next to 869 // non-CJK. 870 ["xn--oiqsace.com", "\u4e36\u4e40\u4e41\u4e3f.com", kSafe], 871 872 // Kana voiced sound marks are not allowed. 873 ["xn--google-1m4e.com", "google\u3099.com", kUnsafe], 874 ["xn--google-8m4e.com", "google\u309A.com", kUnsafe], 875 876 // Small letter theta looks like a zero. 877 ["xn--123456789-yzg.com", "123456789\u03b8.com", kUnsafe], 878 879 ["xn--est-118d.net", "\u4e03est.net", kUnsafe], 880 ["xn--est-918d.net", "\u4e05est.net", kUnsafe], 881 ["xn--est-e28d.net", "\u4e06est.net", kUnsafe, "DISABLED"], 882 ["xn--est-t18d.net", "\u4e01est.net", kUnsafe], 883 ["xn--3-cq6a.com", "\u4e293.com", kUnsafe], 884 ["xn--cxe-n68d.com", "c\u4e2bxe.com", kUnsafe, "DISABLED"], 885 ["xn--cye-b98d.com", "cy\u4e42e.com", kUnsafe, "DISABLED"], 886 887 // U+05D7 can look like Latin n in many fonts. 888 ["xn--ceba.com", "\u05d7\u05d7.com", kUnsafe, "DISABLED"], 889 890 // U+00FE (þ) and U+00F0 (ð) are only allowed under the .is TLD. 891 ["xn--acdef-wva.com", "a\u00fecdef.com", kUnsafe], 892 ["xn--mnpqr-jta.com", "mn\u00f0pqr.com", kUnsafe], 893 ["xn--acdef-wva.is", "a\u00fecdef.is", kSafe], 894 ["xn--mnpqr-jta.is", "mn\u00f0pqr.is", kSafe], 895 896 // U+0259 (ə) is only allowed under the .az TLD. 897 ["xn--xample-vyc.com", "\u0259xample.com", kUnsafe], 898 ["xn--xample-vyc.az", "\u0259xample.az", kSafe], 899 900 // U+00B7 is only allowed on Catalan domains between two l's. 901 ["xn--googlecom-5pa.com", "google\u00b7com.com", kUnsafe], 902 ["xn--ll-0ea.com", "l\u00b7l.com", kUnsafe], 903 ["xn--ll-0ea.cat", "l\u00b7l.cat", kSafe], 904 ["xn--al-0ea.cat", "a\u00b7l.cat", kUnsafe], 905 ["xn--la-0ea.cat", "l\u00b7a.cat", kUnsafe], 906 ["xn--l-fda.cat", "\u00b7l.cat", kUnsafe], 907 ["xn--l-gda.cat", "l\u00b7.cat", kUnsafe], 908 909 ["xn--googlecom-gk6n.com", "google\u4e28com.com", kUnsafe], 910 ["xn--googlecom-0y6n.com", "google\u4e5bcom.com", kUnsafe], 911 ["xn--googlecom-v85n.com", "google\u4e03com.com", kUnsafe], 912 ["xn--googlecom-g95n.com", "google\u4e05com.com", kUnsafe], 913 ["xn--googlecom-go6n.com", "google\u4e36com.com", kUnsafe], 914 ["xn--googlecom-b76o.com", "google\u5341com.com", kUnsafe], 915 ["xn--googlecom-ql3h.com", "google\u3007com.com", kUnsafe], 916 ["xn--googlecom-0r5h.com", "google\u3112com.com", kUnsafe], 917 ["xn--googlecom-bu5h.com", "google\u311acom.com", kUnsafe], 918 ["xn--googlecom-qv5h.com", "google\u311fcom.com", kUnsafe], 919 ["xn--googlecom-0x5h.com", "google\u3127com.com", kUnsafe], 920 ["xn--googlecom-by5h.com", "google\u3128com.com", kUnsafe], 921 ["xn--googlecom-ly5h.com", "google\u3129com.com", kUnsafe], 922 ["xn--googlecom-5o5h.com", "google\u3108com.com", kUnsafe], 923 ["xn--googlecom-075n.com", "google\u4e00com.com", kUnsafe], 924 ["xn--googlecom-046h.com", "google\u31bacom.com", kUnsafe], 925 ["xn--googlecom-026h.com", "google\u31b3com.com", kUnsafe], 926 ["xn--googlecom-lg9q.com", "google\u5de5com.com", kUnsafe], 927 ["xn--googlecom-g040a.com", "google\u8ba0com.com", kUnsafe], 928 ["xn--googlecom-b85n.com", "google\u4e01com.com", kUnsafe], 929 930 // Whole-script-confusables. Cyrillic is sufficiently handled in cases above 931 // so it's not included here. 932 // Armenian: 933 ["xn--mbbkpm.com", "\u0578\u057d\u0582\u0585.com", kUnsafe, "DISABLED"], 934 ["xn--mbbkpm.am", "\u0578\u057d\u0582\u0585.am", kSafe], 935 ["xn--mbbkpm.xn--y9a3aq", "\u0578\u057d\u0582\u0585.\u0570\u0561\u0575", kSafe], 936 // Ethiopic: 937 ["xn--6xd66aa62c.com", "\u1220\u12d0\u12d0\u1350.com", kUnsafe, "DISABLED"], 938 ["xn--6xd66aa62c.et", "\u1220\u12d0\u12d0\u1350.et", kSafe], 939 ["xn--6xd66aa62c.xn--m0d3gwjla96a", "\u1220\u12d0\u12d0\u1350.\u12a2\u1275\u12ee\u1335\u12eb", kSafe], 940 // Greek: 941 ["xn--mxapd.com", "\u03b9\u03ba\u03b1.com", kUnsafe, "DISABLED"], 942 ["xn--mxapd.gr", "\u03b9\u03ba\u03b1.gr", kSafe], 943 ["xn--mxapd.xn--qxam", "\u03b9\u03ba\u03b1.\u03b5\u03bb", kSafe], 944 // Georgian: 945 ["xn--lod4df.com", "\u10d0\u10f0\u10ee.com", kUnsafe, "DISABLED"], 946 ["xn--lod4df.ge", "\u10d0\u10f0\u10ee.ge", kSafe], 947 ["xn--lod4df.xn--node", "\u10d0\u10f0\u10ee.\u10d2\u10d4", kSafe], 948 // Hebrew: 949 ["xn--7dbh4a.com", "\u05d7\u05e1\u05d3.com", kUnsafe, "DISABLED"], 950 ["xn--7dbh4a.il", "\u05d7\u05e1\u05d3.il", kSafe], 951 ["xn--9dbq2a.xn--7dbh4a", "\u05e7\u05d5\u05dd.\u05d7\u05e1\u05d3", kSafe], 952 // Myanmar: 953 ["xn--oidbbf41a.com", "\u1004\u1040\u1002\u1001\u1002.com", kUnsafe, "DISABLED"], 954 ["xn--oidbbf41a.mm", "\u1004\u1040\u1002\u1001\u1002.mm", kSafe], 955 ["xn--oidbbf41a.xn--7idjb0f4ck", "\u1004\u1040\u1002\u1001\u1002.\u1019\u103c\u1014\u103a\u1019\u102c", kSafe], 956 // Myanmar Shan digits: 957 ["xn--rmdcmef.com", "\u1090\u1091\u1095\u1096\u1097.com", kUnsafe], 958 ["xn--rmdcmef.mm", "\u1090\u1091\u1095\u1096\u1097.mm", kUnsafe], 959 ["xn--rmdcmef.xn--7idjb0f4ck", "\u1090\u1091\u1095\u1096\u1097.\u1019\u103c\u1014\u103a\u1019\u102c", kSafe, "DISABLED"], 960 // Thai: 961 // #if BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS) 962 ["xn--o3cedqz2c.com", "\u0e17\u0e19\u0e1a\u0e1e\u0e23\u0e2b.com", kUnsafe, "linux"], 963 ["xn--o3cedqz2c.th", "\u0e17\u0e19\u0e1a\u0e1e\u0e23\u0e2b.th", kSafe], 964 ["xn--o3cedqz2c.xn--o3cw4h", "\u0e17\u0e19\u0e1a\u0e1e\u0e23\u0e2b.\u0e44\u0e17\u0e22", kSafe], 965 // #else 966 ["xn--r3ch7hsc.com", "\u0e1e\u0e1a\u0e40\u0e50.com", kUnsafe], 967 ["xn--r3ch7hsc.th", "\u0e1e\u0e1a\u0e40\u0e50.th", kSafe], 968 ["xn--r3ch7hsc.xn--o3cw4h", "\u0e1e\u0e1a\u0e40\u0e50.\u0e44\u0e17\u0e22", kSafe], 969 // #endif 970 971 // Indic scripts: 972 // Bengali: 973 ["xn--07baub.com", "\u09e6\u09ed\u09e6\u09ed.com", kUnsafe, "DISABLED"], 974 // Devanagari: 975 ["xn--62ba6j.com", "\u093d\u0966\u093d.com", kUnsafe], 976 // Gujarati: 977 ["xn--becd.com", "\u0aa1\u0a9f.com", kUnsafe, "DISABLED"], 978 // Gurmukhi: 979 ["xn--occacb.com", "\u0a66\u0a67\u0a66\u0a67.com", kUnsafe], 980 // Kannada: 981 ["xn--stca6jf.com", "\u0cbd\u0ce6\u0cbd\u0ce7.com", kUnsafe], 982 // Malayalam: 983 ["xn--lwccv.com", "\u0d1f\u0d20\u0d27.com", kUnsafe, "DISABLED"], 984 // Oriya: 985 ["xn--zhca6ub.com", "\u0b6e\u0b20\u0b6e\u0b20.com", kUnsafe], 986 // Tamil: 987 ["xn--mlca6ab.com", "\u0b9f\u0baa\u0b9f\u0baa.com", kUnsafe, "DISABLED"], 988 // Telugu: 989 ["xn--brcaabbb.com", "\u0c67\u0c66\u0c67\u0c66\u0c67\u0c66.com", kUnsafe], 990 991 // IDN domain matching an IDN top-domain (f\u00f3\u00f3.com) 992 ["xn--fo-5ja.com", "f\u00f3o.com", kUnsafe, "DISABLED"], 993 994 // crbug.com/769547: Subdomains of top domains should be allowed. 995 ["xn--xample-9ua.test.net", "\u00e9xample.test.net", kSafe], 996 // Skeleton of the eTLD+1 matches a top domain, but the eTLD+1 itself is 997 // not a top domain. Should not be decoded to unicode. 998 ["xn--xample-9ua.test.xn--nt-bja", "\u00e9xample.test.n\u00e9t", kUnsafe, "DISABLED"], 999 1000 // Digit lookalike check of 16კ.com with character “კ” (U+10D9) 1001 // Test case for https://crbug.com/1156531 1002 ["xn--16-1ik.com", "16\u10d9.com", kUnsafe], 1003 1004 // Skeleton generator check of officeკ65.com with character “კ” (U+10D9) 1005 // Test case for https://crbug.com/1156531 1006 ["xn--office65-l04a.com", "office\u10d965.com", kUnsafe], 1007 1008 // Digit lookalike check of 16ੜ.com with character “ੜ” (U+0A5C) 1009 // Test case for https://crbug.com/1156531 (missed skeleton map) 1010 ["xn--16-ogg.com", "16\u0a5c.com", kUnsafe], 1011 1012 // Skeleton generator check of officeੜ65.com with character “ੜ” (U+0A5C) 1013 // Test case for https://crbug.com/1156531 (missed skeleton map) 1014 ["xn--office65-hts.com", "office\u0a5c65.com", kUnsafe], 1015 1016 // New test cases go ↑↑ above. 1017 1018 // /!\ WARNING: You MUST use tools/security/idn_test_case_generator.py to 1019 // generate new test cases, as specified by the comment at the top of this 1020 // test list. Why must you use that python script? 1021 // 1. It is easy to get things wrong. There were several hand-crafted 1022 // incorrect test cases committed that was later fixed. 1023 // 2. This test _also_ is a test of Chromium's IDN encoder/decoder, so using 1024 // Chromium's IDN encoder/decoder to generate test files loses an 1025 // advantage of having Python's IDN encode/decode the tests. 1026 ]; 1027 1028 function checkEquals(a, b, message, expectedFail) { 1029 if (!expectedFail) { 1030 Assert.equal(a, b, message); 1031 } else { 1032 Assert.notEqual(a, b, `EXPECTED-FAIL: ${message}`); 1033 } 1034 } 1035 1036 // prettier-ignore 1037 let additionalTestcases = [ 1038 // Bug 1850388 1039 ["xn--80aaa2bl1n.com", "акамаі.com", kUnsafe, cyrillicConfusableExpectation()], 1040 ]; 1041 1042 add_task(async function test_chrome_spoofs() { 1043 testCases = testCases.concat(additionalTestcases); 1044 for (let test of testCases) { 1045 let result = "\uFFFD"; 1046 try { 1047 result = idnService.convertToDisplayIDN(test[0]); 1048 } catch (e) {} 1049 // If test[3] is set to a platform, the test is expected to pass only on that platform 1050 let expectedFail = 1051 test.length == 4 && 1052 (test[3] == "DISABLED" || (test[3] && test[3] != AppConstants.platform)); 1053 if (test[2] == kSafe) { 1054 checkEquals( 1055 result, 1056 test[1], 1057 `kSafe label ${test[0]} should convert to ${test[1]}`, 1058 expectedFail 1059 ); 1060 } else if (test[2] == kUnsafe) { 1061 checkEquals( 1062 result, 1063 test[0], 1064 `kUnsafe label ${test[0]} should not convert to ${test[1]}`, 1065 expectedFail 1066 ); 1067 } else if (test[2] == kInvalid) { 1068 checkEquals( 1069 result, 1070 "\uFFFD", 1071 `kInvalid label ${test[0]} should throw`, 1072 expectedFail 1073 ); 1074 } 1075 } 1076 }); 1077 1078 add_task(async function test_interpuncts_fqdn() { 1079 let isAscii = {}; 1080 let result = idnService.convertToDisplayIDN("xn--ll-0ea.cat.", isAscii); 1081 Assert.equal(result, "l\u00b7l.cat."); 1082 });