LocaleGenerated.cpp (49073B)
1 // Generated by make_intl_data.py. DO NOT EDIT. 2 // Version: CLDR-48 3 // URL: https://unicode.org/Public/cldr/48/cldr-common-48.zip 4 5 #include "mozilla/Assertions.h" 6 #include "mozilla/Span.h" 7 #include "mozilla/TextUtils.h" 8 9 #include <algorithm> 10 #include <cstdint> 11 #include <cstring> 12 #include <iterator> 13 #include <string> 14 15 #include "mozilla/intl/Locale.h" 16 17 using namespace mozilla::intl::LanguageTagLimits; 18 19 template <size_t Length, size_t TagLength, size_t SubtagLength> 20 static inline bool HasReplacement( 21 const char (&subtags)[Length][TagLength], 22 const mozilla::intl::LanguageTagSubtag<SubtagLength>& subtag) { 23 MOZ_ASSERT(subtag.Length() == TagLength - 1, 24 "subtag must have the same length as the list of subtags"); 25 26 const char* ptr = subtag.Span().data(); 27 return std::binary_search(std::begin(subtags), std::end(subtags), ptr, 28 [](const char* a, const char* b) { 29 return memcmp(a, b, TagLength - 1) < 0; 30 }); 31 } 32 33 template <size_t Length, size_t TagLength, size_t SubtagLength> 34 static inline const char* SearchReplacement( 35 const char (&subtags)[Length][TagLength], const char* (&aliases)[Length], 36 const mozilla::intl::LanguageTagSubtag<SubtagLength>& subtag) { 37 MOZ_ASSERT(subtag.Length() == TagLength - 1, 38 "subtag must have the same length as the list of subtags"); 39 40 const char* ptr = subtag.Span().data(); 41 auto p = std::lower_bound(std::begin(subtags), std::end(subtags), ptr, 42 [](const char* a, const char* b) { 43 return memcmp(a, b, TagLength - 1) < 0; 44 }); 45 if (p != std::end(subtags) && memcmp(*p, ptr, TagLength - 1) == 0) { 46 return aliases[std::distance(std::begin(subtags), p)]; 47 } 48 return nullptr; 49 } 50 51 #ifdef DEBUG 52 static bool IsAsciiLowercaseAlphanumeric(char c) { 53 return mozilla::IsAsciiLowercaseAlpha(c) || mozilla::IsAsciiDigit(c); 54 } 55 56 static bool IsAsciiLowercaseAlphanumericOrDash(char c) { 57 return IsAsciiLowercaseAlphanumeric(c) || c == '-'; 58 } 59 60 static bool IsCanonicallyCasedLanguageTag(mozilla::Span<const char> span) { 61 return std::all_of(span.begin(), span.end(), 62 mozilla::IsAsciiLowercaseAlpha<char>); 63 } 64 65 static bool IsCanonicallyCasedScriptTag(mozilla::Span<const char> span) { 66 return mozilla::IsAsciiUppercaseAlpha(span[0]) && 67 std::all_of(span.begin() + 1, span.end(), 68 mozilla::IsAsciiLowercaseAlpha<char>); 69 } 70 71 static bool IsCanonicallyCasedRegionTag(mozilla::Span<const char> span) { 72 return std::all_of(span.begin(), span.end(), 73 mozilla::IsAsciiUppercaseAlpha<char>) || 74 std::all_of(span.begin(), span.end(), mozilla::IsAsciiDigit<char>); 75 } 76 77 static bool IsCanonicallyCasedVariantTag(mozilla::Span<const char> span) { 78 return std::all_of(span.begin(), span.end(), IsAsciiLowercaseAlphanumeric); 79 } 80 81 static bool IsCanonicallyCasedUnicodeKey(mozilla::Span<const char> key) { 82 return std::all_of(key.begin(), key.end(), IsAsciiLowercaseAlphanumeric); 83 } 84 85 static bool IsCanonicallyCasedUnicodeType(mozilla::Span<const char> type) { 86 return std::all_of(type.begin(), type.end(), 87 IsAsciiLowercaseAlphanumericOrDash); 88 } 89 90 static bool IsCanonicallyCasedTransformKey(mozilla::Span<const char> key) { 91 return std::all_of(key.begin(), key.end(), IsAsciiLowercaseAlphanumeric); 92 } 93 94 static bool IsCanonicallyCasedTransformType(mozilla::Span<const char> type) { 95 return std::all_of(type.begin(), type.end(), 96 IsAsciiLowercaseAlphanumericOrDash); 97 } 98 #endif 99 100 // Mappings from language subtags to preferred values. 101 // Derived from CLDR Supplemental Data, version 48. 102 // https://unicode.org/Public/cldr/48/cldr-common-48.zip 103 bool mozilla::intl::Locale::LanguageMapping(LanguageSubtag& language) { 104 MOZ_ASSERT(IsStructurallyValidLanguageTag(language.Span())); 105 MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language.Span())); 106 107 if (language.Length() == 2) { 108 static const char languages[8][3] = { 109 "bh", "in", "iw", "ji", "jw", "mo", "tl", "tw", 110 }; 111 static const char* aliases[8] = { 112 "bho", "id", "he", "yi", "jv", "ro", "fil", "ak", 113 }; 114 115 if (const char* replacement = SearchReplacement(languages, aliases, language)) { 116 language.Set(mozilla::MakeStringSpan(replacement)); 117 return true; 118 } 119 return false; 120 } 121 122 if (language.Length() == 3) { 123 static const char languages[420][4] = { 124 "aam", "aar", "abk", "adp", "afr", "agp", "ais", "ajp", "ajt", "aju", 125 "aka", "alb", "als", "amh", "ara", "arb", "arg", "arm", "asd", "asm", 126 "aue", "ava", "ave", "aym", "ayr", "ayx", "aze", "azj", "bak", "bam", 127 "baq", "baz", "bcc", "bcl", "bel", "ben", "bgm", "bhk", "bic", "bih", 128 "bis", "bjd", "bjq", "bkb", "blg", "bod", "bos", "bre", "btb", "bul", 129 "bur", "bxk", "bxr", "cat", "ccq", "ces", "cha", "che", "chi", "chu", 130 "chv", "cjr", "cka", "cld", "cls", "cmk", "cmn", "cor", "cos", "coy", 131 "cqu", "cre", "cwd", "cym", "cze", "daf", "dan", "dap", "dek", "deu", 132 "dgo", "dhd", "dik", "diq", "dit", "div", "djl", "dkl", "drh", "drr", 133 "dud", "duj", "dut", "dwl", "dzo", "ekk", "ell", "elp", "emk", "eng", 134 "epo", "esk", "est", "eus", "ewe", "fao", "fas", "fat", "fij", "fin", 135 "fra", "fre", "fry", "fuc", "ful", "gav", "gaz", "gbc", "gbo", "geo", 136 "ger", "gfx", "ggn", "ggo", "ggr", "gio", "gla", "gle", "glg", "gli", 137 "glv", "gno", "gom", "gre", "grn", "gti", "gug", "guj", "guv", "gya", 138 "hat", "hau", "hdn", "hea", "heb", "her", "him", "hin", "hmo", "hrr", 139 "hrv", "hun", "hye", "ibi", "ibo", "ice", "ido", "iii", "ike", "iku", 140 "ile", "ill", "ilw", "ina", "ind", "ipk", "isl", "ita", "izi", "jar", 141 "jav", "jeg", "jpn", "kal", "kan", "kas", "kat", "kau", "kaz", "kdv", 142 "kgc", "kgd", "kgh", "kgm", "khk", "khm", "kik", "kin", "kir", "kmr", 143 "knc", "kng", "koj", "kom", "kon", "kor", "kpp", "kpv", "krm", "ktr", 144 "kua", "kur", "kvs", "kwq", "kxe", "kxl", "kzh", "kzj", "kzt", "lak", 145 "lao", "lat", "lav", "lbk", "leg", "lii", "lim", "lin", "lit", "llo", 146 "lmm", "ltz", "lub", "lug", "lvs", "mac", "mah", "mal", "mao", "mar", 147 "may", "meg", "mgx", "mhr", "mkd", "mlg", "mlt", "mnt", "mof", "mol", 148 "mon", "mri", "msa", "mst", "mup", "mwd", "mwj", "mya", "myd", "myt", 149 "nad", "nau", "nav", "nbf", "nbl", "nbx", "ncp", "nde", "ndo", "nep", 150 "nld", "nln", "nlr", "nno", "nns", "nnx", "nob", "nom", "noo", "nor", 151 "npi", "nte", "nts", "nxu", "nya", "oci", "ojg", "oji", "ori", "orm", 152 "ory", "oss", "oun", "pan", "pat", "pbu", "pcr", "per", "pes", "pli", 153 "plt", "pmc", "pmk", "pmu", "pnb", "pol", "por", "ppa", "ppr", "prp", 154 "pry", "pus", "puz", "que", "quz", "rmr", "rmy", "roh", "ron", "rum", 155 "run", "rus", "sag", "san", "sap", "sca", "scc", "scr", "sgl", "sin", 156 "skk", "slk", "slo", "slv", "smd", "sme", "smo", "sna", "snb", "snd", 157 "som", "sot", "spa", "spy", "sqi", "src", "srd", "srp", "ssw", "sul", 158 "sum", "sun", "swa", "swe", "swh", "szd", "tah", "tam", "tat", "tdu", 159 "tel", "tgg", "tgk", "tgl", "tha", "thc", "thw", "thx", "tib", "tid", 160 "tie", "tir", "tkk", "tlw", "tmk", "tmp", "tne", "ton", "tpw", "tsf", 161 "tsn", "tso", "ttq", "tuk", "tur", "twi", "uig", "ukr", "umu", "unp", 162 "uok", "urd", "uzb", "uzn", "ven", "vie", "vol", "wel", "wgw", "wit", 163 "wiw", "wln", "wol", "xba", "xho", "xia", "xkh", "xpe", "xrq", "xsj", 164 "xsl", "xss", "ybd", "ydd", "yen", "yid", "yiy", "yma", "ymt", "yor", 165 "yos", "yuu", "zai", "zha", "zho", "zir", "zkb", "zsm", "zul", "zyb", 166 }; 167 static const char* aliases[420] = { 168 "aas", "aa", "ab", "dz", "af", "apf", "ami", "apc", "aeb", "jrb", 169 "ak", "sq", "sq", "am", "ar", "ar", "an", "hy", "snz", "as", 170 "ktz", "av", "ae", "ay", "ay", "nun", "az", "az", "ba", "bm", 171 "eu", "nvo", "bal", "bik", "be", "bn", "bcg", "fbl", "bir", "bho", 172 "bi", "drl", "bzc", "ebk", "iba", "bo", "bs", "br", "beb", "bg", 173 "my", "luy", "bua", "ca", "rki", "cs", "ch", "ce", "zh", "cu", 174 "cv", "mom", "cmr", "syr", "sa", "xch", "zh", "kw", "co", "pij", 175 "quh", "cr", "cr", "cy", "cs", "dnj", "da", "njz", "sqm", "de", 176 "doi", "mwr", "din", "zza", "dif", "dv", "dze", "aqd", "mn", "kzk", 177 "uth", "dwu", "nl", "dbt", "dz", "et", "el", "amq", "man", "en", 178 "eo", "ik", "et", "eu", "ee", "fo", "fa", "ak", "fj", "fi", 179 "fr", "fr", "fy", "ff", "ff", "dev", "om", "wny", "grb", "ka", 180 "de", "vaj", "gvr", "esg", "gtu", "aou", "gd", "ga", "gl", "kzk", 181 "gv", "gon", "kok", "el", "gn", "nyc", "gn", "gu", "duz", "gba", 182 "ht", "ha", "hai", "hmn", "he", "hz", "srx", "hi", "ho", "jal", 183 "hr", "hu", "hy", "opa", "ig", "is", "io", "ii", "iu", "iu", 184 "ie", "ilm", "gal", "ia", "id", "ik", "is", "it", "eza", "jgk", 185 "jv", "oyb", "ja", "kl", "kn", "ks", "ka", "kr", "kk", "zkd", 186 "tdf", "ncq", "kml", "plu", "mn", "km", "ki", "rw", "ky", "ku", 187 "kr", "kg", "kwv", "kv", "kg", "ko", "jkm", "kv", "bmf", "dtp", 188 "kj", "ku", "gdj", "yam", "tvd", "kru", "dgl", "dtp", "dtp", "ksp", 189 "lo", "la", "lv", "bnc", "enl", "raq", "li", "ln", "lt", "ngt", 190 "rmx", "lb", "lu", "lg", "lv", "mk", "mh", "ml", "mi", "mr", 191 "ms", "cir", "jbk", "chm", "mk", "mg", "mt", "wnn", "xnt", "ro", 192 "mn", "mi", "ms", "mry", "raj", "dmw", "vaj", "my", "aog", "mry", 193 "xny", "na", "nv", "nru", "nr", "gll", "kdz", "nd", "ng", "ne", 194 "nl", "azd", "nrk", "nn", "nbr", "ngv", "nb", "cbr", "dtd", "no", 195 "ne", "eko", "pij", "bpp", "ny", "oc", "oj", "oj", "or", "om", 196 "or", "os", "vaj", "pa", "kxr", "ps", "adx", "fa", "fa", "pi", 197 "mg", "huw", "crr", "phr", "lah", "pl", "pt", "bfy", "lcq", "gu", 198 "prt", "ps", "pub", "qu", "qu", "emx", "rom", "rm", "ro", "ro", 199 "rn", "ru", "sg", "sa", "aqt", "hle", "sr", "hr", "isk", "si", 200 "oyb", "sk", "sk", "sl", "kmb", "se", "sm", "sn", "iba", "sd", 201 "so", "st", "es", "kln", "sq", "sc", "sc", "sr", "ss", "sgd", 202 "ulw", "su", "sw", "sv", "sw", "umi", "ty", "ta", "tt", "dtp", 203 "te", "bjp", "tg", "fil", "th", "tpo", "ola", "oyb", "bo", "itd", 204 "ras", "ti", "twm", "weo", "tdg", "tyj", "kak", "to", "tpn", "taj", 205 "tn", "ts", "tmh", "tk", "tr", "ak", "ug", "uk", "del", "wro", 206 "ema", "ur", "uz", "uz", "ve", "vi", "vo", "cy", "wgb", "nol", 207 "nwo", "wa", "wo", "cax", "xh", "acn", "waw", "kpe", "dmw", "suj", 208 "den", "zko", "rki", "yi", "ynq", "yi", "yrm", "lrr", "mtm", "yo", 209 "zom", "yug", "zap", "za", "zh", "scv", "kjh", "ms", "zu", "za", 210 }; 211 212 if (const char* replacement = SearchReplacement(languages, aliases, language)) { 213 language.Set(mozilla::MakeStringSpan(replacement)); 214 return true; 215 } 216 return false; 217 } 218 219 return false; 220 } 221 222 // Language subtags with complex mappings. 223 // Derived from CLDR Supplemental Data, version 48. 224 // https://unicode.org/Public/cldr/48/cldr-common-48.zip 225 bool mozilla::intl::Locale::ComplexLanguageMapping(const LanguageSubtag& language) { 226 MOZ_ASSERT(IsStructurallyValidLanguageTag(language.Span())); 227 MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language.Span())); 228 229 if (language.Length() == 2) { 230 return language.EqualTo("sh"); 231 } 232 233 if (language.Length() == 3) { 234 static const char languages[6][4] = { 235 "cnr", "drw", "hbs", "prs", "swc", "tnf", 236 }; 237 238 return HasReplacement(languages, language); 239 } 240 241 return false; 242 } 243 244 // Mappings from script subtags to preferred values. 245 // Derived from CLDR Supplemental Data, version 48. 246 // https://unicode.org/Public/cldr/48/cldr-common-48.zip 247 bool mozilla::intl::Locale::ScriptMapping(ScriptSubtag& script) { 248 MOZ_ASSERT(IsStructurallyValidScriptTag(script.Span())); 249 MOZ_ASSERT(IsCanonicallyCasedScriptTag(script.Span())); 250 251 { 252 if (script.EqualTo("Qaai")) { 253 script.Set(mozilla::MakeStringSpan("Zinh")); 254 return true; 255 } 256 return false; 257 } 258 } 259 260 // Mappings from region subtags to preferred values. 261 // Derived from CLDR Supplemental Data, version 48. 262 // https://unicode.org/Public/cldr/48/cldr-common-48.zip 263 bool mozilla::intl::Locale::RegionMapping(RegionSubtag& region) { 264 MOZ_ASSERT(IsStructurallyValidRegionTag(region.Span())); 265 MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.Span())); 266 267 if (region.Length() == 2) { 268 static const char regions[23][3] = { 269 "BU", "CS", "CT", "DD", "DY", "FQ", "FX", "HV", "JT", "MI", 270 "NH", "NQ", "PU", "PZ", "QU", "RH", "TP", "UK", "VD", "WK", 271 "YD", "YU", "ZR", 272 }; 273 static const char* aliases[23] = { 274 "MM", "RS", "KI", "DE", "BJ", "AQ", "FR", "BF", "UM", "UM", 275 "VU", "AQ", "UM", "PA", "EU", "ZW", "TL", "GB", "VN", "UM", 276 "YE", "RS", "CD", 277 }; 278 279 if (const char* replacement = SearchReplacement(regions, aliases, region)) { 280 region.Set(mozilla::MakeStringSpan(replacement)); 281 return true; 282 } 283 return false; 284 } 285 286 { 287 static const char regions[300][4] = { 288 "004", "008", "010", "012", "016", "020", "024", "028", "031", "032", 289 "036", "040", "044", "048", "050", "051", "052", "056", "060", "062", 290 "064", "068", "070", "072", "074", "076", "084", "086", "090", "092", 291 "096", "100", "104", "108", "112", "116", "120", "124", "132", "136", 292 "140", "144", "148", "152", "156", "158", "162", "166", "170", "174", 293 "175", "178", "180", "184", "188", "191", "192", "196", "203", "204", 294 "208", "212", "214", "218", "222", "226", "230", "231", "232", "233", 295 "234", "238", "239", "242", "246", "248", "249", "250", "254", "258", 296 "260", "262", "266", "268", "270", "275", "276", "278", "280", "288", 297 "292", "296", "300", "304", "308", "312", "316", "320", "324", "328", 298 "332", "334", "336", "340", "344", "348", "352", "356", "360", "364", 299 "368", "372", "376", "380", "384", "388", "392", "398", "400", "404", 300 "408", "410", "414", "417", "418", "422", "426", "428", "430", "434", 301 "438", "440", "442", "446", "450", "454", "458", "462", "466", "470", 302 "474", "478", "480", "484", "492", "496", "498", "499", "500", "504", 303 "508", "512", "516", "520", "524", "528", "531", "533", "534", "535", 304 "540", "548", "554", "558", "562", "566", "570", "574", "578", "580", 305 "581", "583", "584", "585", "586", "591", "598", "600", "604", "608", 306 "612", "616", "620", "624", "626", "630", "634", "638", "642", "643", 307 "646", "652", "654", "659", "660", "662", "663", "666", "670", "674", 308 "678", "682", "686", "688", "690", "694", "702", "703", "704", "705", 309 "706", "710", "716", "720", "724", "728", "729", "732", "736", "740", 310 "744", "748", "752", "756", "760", "762", "764", "768", "772", "776", 311 "780", "784", "788", "792", "795", "796", "798", "800", "804", "807", 312 "818", "826", "830", "831", "832", "833", "834", "840", "850", "854", 313 "858", "860", "862", "876", "882", "886", "887", "891", "894", "958", 314 "959", "960", "962", "963", "964", "965", "966", "967", "968", "969", 315 "970", "971", "972", "973", "974", "975", "976", "977", "978", "979", 316 "980", "981", "982", "983", "984", "985", "986", "987", "988", "989", 317 "990", "991", "992", "993", "994", "995", "996", "997", "998", "999", 318 }; 319 static const char* aliases[300] = { 320 "AF", "AL", "AQ", "DZ", "AS", "AD", "AO", "AG", "AZ", "AR", 321 "AU", "AT", "BS", "BH", "BD", "AM", "BB", "BE", "BM", "034", 322 "BT", "BO", "BA", "BW", "BV", "BR", "BZ", "IO", "SB", "VG", 323 "BN", "BG", "MM", "BI", "BY", "KH", "CM", "CA", "CV", "KY", 324 "CF", "LK", "TD", "CL", "CN", "TW", "CX", "CC", "CO", "KM", 325 "YT", "CG", "CD", "CK", "CR", "HR", "CU", "CY", "CZ", "BJ", 326 "DK", "DM", "DO", "EC", "SV", "GQ", "ET", "ET", "ER", "EE", 327 "FO", "FK", "GS", "FJ", "FI", "AX", "FR", "FR", "GF", "PF", 328 "TF", "DJ", "GA", "GE", "GM", "PS", "DE", "DE", "DE", "GH", 329 "GI", "KI", "GR", "GL", "GD", "GP", "GU", "GT", "GN", "GY", 330 "HT", "HM", "VA", "HN", "HK", "HU", "IS", "IN", "ID", "IR", 331 "IQ", "IE", "IL", "IT", "CI", "JM", "JP", "KZ", "JO", "KE", 332 "KP", "KR", "KW", "KG", "LA", "LB", "LS", "LV", "LR", "LY", 333 "LI", "LT", "LU", "MO", "MG", "MW", "MY", "MV", "ML", "MT", 334 "MQ", "MR", "MU", "MX", "MC", "MN", "MD", "ME", "MS", "MA", 335 "MZ", "OM", "NA", "NR", "NP", "NL", "CW", "AW", "SX", "BQ", 336 "NC", "VU", "NZ", "NI", "NE", "NG", "NU", "NF", "NO", "MP", 337 "UM", "FM", "MH", "PW", "PK", "PA", "PG", "PY", "PE", "PH", 338 "PN", "PL", "PT", "GW", "TL", "PR", "QA", "RE", "RO", "RU", 339 "RW", "BL", "SH", "KN", "AI", "LC", "MF", "PM", "VC", "SM", 340 "ST", "SA", "SN", "RS", "SC", "SL", "SG", "SK", "VN", "SI", 341 "SO", "ZA", "ZW", "YE", "ES", "SS", "SD", "EH", "SD", "SR", 342 "SJ", "SZ", "SE", "CH", "SY", "TJ", "TH", "TG", "TK", "TO", 343 "TT", "AE", "TN", "TR", "TM", "TC", "TV", "UG", "UA", "MK", 344 "EG", "GB", "JE", "GG", "JE", "IM", "TZ", "US", "VI", "BF", 345 "UY", "UZ", "VE", "WF", "WS", "YE", "YE", "RS", "ZM", "AA", 346 "QM", "QN", "QP", "QQ", "QR", "QS", "QT", "EU", "QV", "QW", 347 "QX", "QY", "QZ", "XA", "XB", "XC", "XD", "XE", "XF", "XG", 348 "XH", "XI", "XJ", "XK", "XL", "XM", "XN", "XO", "XP", "XQ", 349 "XR", "XS", "XT", "XU", "XV", "XW", "XX", "XY", "XZ", "ZZ", 350 }; 351 352 if (const char* replacement = SearchReplacement(regions, aliases, region)) { 353 region.Set(mozilla::MakeStringSpan(replacement)); 354 return true; 355 } 356 return false; 357 } 358 } 359 360 // Region subtags with complex mappings. 361 // Derived from CLDR Supplemental Data, version 48. 362 // https://unicode.org/Public/cldr/48/cldr-common-48.zip 363 bool mozilla::intl::Locale::ComplexRegionMapping(const RegionSubtag& region) { 364 MOZ_ASSERT(IsStructurallyValidRegionTag(region.Span())); 365 MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.Span())); 366 367 if (region.Length() == 2) { 368 return region.EqualTo("AN") || 369 region.EqualTo("NT") || 370 region.EqualTo("PC") || 371 region.EqualTo("SU"); 372 } 373 374 { 375 static const char regions[8][4] = { 376 "172", "200", "530", "532", "536", "582", "810", "890", 377 }; 378 379 return HasReplacement(regions, region); 380 } 381 } 382 383 // Language subtags with complex mappings. 384 // Derived from CLDR Supplemental Data, version 48. 385 // https://unicode.org/Public/cldr/48/cldr-common-48.zip 386 void mozilla::intl::Locale::PerformComplexLanguageMappings() { 387 MOZ_ASSERT(IsStructurallyValidLanguageTag(Language().Span())); 388 MOZ_ASSERT(IsCanonicallyCasedLanguageTag(Language().Span())); 389 390 if (Language().EqualTo("cnr")) { 391 SetLanguage("sr"); 392 if (Region().Missing()) { 393 SetRegion("ME"); 394 } 395 } 396 else if (Language().EqualTo("drw") || 397 Language().EqualTo("prs") || 398 Language().EqualTo("tnf")) { 399 SetLanguage("fa"); 400 if (Region().Missing()) { 401 SetRegion("AF"); 402 } 403 } 404 else if (Language().EqualTo("hbs") || 405 Language().EqualTo("sh")) { 406 SetLanguage("sr"); 407 if (Script().Missing()) { 408 SetScript("Latn"); 409 } 410 } 411 else if (Language().EqualTo("swc")) { 412 SetLanguage("sw"); 413 if (Region().Missing()) { 414 SetRegion("CD"); 415 } 416 } 417 } 418 419 // Region subtags with complex mappings. 420 // Derived from CLDR Supplemental Data, version 48. 421 // https://unicode.org/Public/cldr/48/cldr-common-48.zip 422 void mozilla::intl::Locale::PerformComplexRegionMappings() { 423 MOZ_ASSERT(IsStructurallyValidLanguageTag(Language().Span())); 424 MOZ_ASSERT(IsCanonicallyCasedLanguageTag(Language().Span())); 425 MOZ_ASSERT(IsStructurallyValidRegionTag(Region().Span())); 426 MOZ_ASSERT(IsCanonicallyCasedRegionTag(Region().Span())); 427 428 if (Region().EqualTo("172")) { 429 if (Language().EqualTo("axm") || 430 Language().EqualTo("hy") || 431 Language().EqualTo("hyw") || 432 (Language().EqualTo("ku") && Script().EqualTo("Cyrl")) || 433 Language().EqualTo("rmi") || 434 (Language().EqualTo("und") && Script().EqualTo("Armn"))) { 435 SetRegion("AM"); 436 } 437 else if (Language().EqualTo("az") || 438 Language().EqualTo("bdk") || 439 Language().EqualTo("kjj") || 440 Language().EqualTo("kry") || 441 Language().EqualTo("tkr") || 442 Language().EqualTo("tly") || 443 Language().EqualTo("ttt") || 444 (Language().EqualTo("und") && Script().EqualTo("Aghb")) || 445 Language().EqualTo("xag")) { 446 SetRegion("AZ"); 447 } 448 else if (Language().EqualTo("be")) { 449 SetRegion("BY"); 450 } 451 else if (Language().EqualTo("ab") || 452 Language().EqualTo("bbl") || 453 Language().EqualTo("bhn") || 454 Language().EqualTo("jge") || 455 Language().EqualTo("ka") || 456 (Language().EqualTo("ku") && Script().EqualTo("Yezi")) || 457 (Language().EqualTo("lzz") && Script().EqualTo("Geor")) || 458 Language().EqualTo("oav") || 459 Language().EqualTo("os") || 460 Language().EqualTo("sva") || 461 (Language().EqualTo("und") && Script().EqualTo("Geor")) || 462 (Language().EqualTo("und") && Script().EqualTo("Yezi")) || 463 Language().EqualTo("uum") || 464 Language().EqualTo("xmf")) { 465 SetRegion("GE"); 466 } 467 else if (Language().EqualTo("dng") || 468 Language().EqualTo("ky")) { 469 SetRegion("KG"); 470 } 471 else if (Language().EqualTo("kk") || 472 (Language().EqualTo("ug") && Script().EqualTo("Cyrl"))) { 473 SetRegion("KZ"); 474 } 475 else if (Language().EqualTo("gag")) { 476 SetRegion("MD"); 477 } 478 else if (Language().EqualTo("abh") || 479 Language().EqualTo("paq") || 480 Language().EqualTo("sgh") || 481 Language().EqualTo("tg") || 482 Language().EqualTo("yah") || 483 Language().EqualTo("yai")) { 484 SetRegion("TJ"); 485 } 486 else if (Language().EqualTo("chg") || 487 Language().EqualTo("tk")) { 488 SetRegion("TM"); 489 } 490 else if (Language().EqualTo("crh") || 491 Language().EqualTo("got") || 492 Language().EqualTo("jct") || 493 Language().EqualTo("ji") || 494 Language().EqualTo("rue") || 495 Language().EqualTo("uk") || 496 (Language().EqualTo("und") && Script().EqualTo("Goth")) || 497 Language().EqualTo("yi")) { 498 SetRegion("UA"); 499 } 500 else if (Language().EqualTo("auz") || 501 Language().EqualTo("kaa") || 502 Language().EqualTo("sog") || 503 (Language().EqualTo("und") && Script().EqualTo("Chrs")) || 504 (Language().EqualTo("und") && Script().EqualTo("Sogd")) || 505 (Language().EqualTo("und") && Script().EqualTo("Sogo")) || 506 Language().EqualTo("uz") || 507 Language().EqualTo("xco")) { 508 SetRegion("UZ"); 509 } 510 else { 511 SetRegion("RU"); 512 } 513 } 514 else if (Region().EqualTo("200")) { 515 if (Language().EqualTo("rmc") || 516 Language().EqualTo("sk")) { 517 SetRegion("SK"); 518 } 519 else { 520 SetRegion("CZ"); 521 } 522 } 523 else if (Region().EqualTo("530") || 524 Region().EqualTo("532") || 525 Region().EqualTo("AN")) { 526 if (Language().EqualTo("vic")) { 527 SetRegion("SX"); 528 } 529 else { 530 SetRegion("CW"); 531 } 532 } 533 else if (Region().EqualTo("536") || 534 Region().EqualTo("NT")) { 535 if (Language().EqualTo("acm") || 536 Language().EqualTo("aii") || 537 Language().EqualTo("akk") || 538 (Language().EqualTo("arc") && Script().EqualTo("Hatr")) || 539 Language().EqualTo("ayp") || 540 Language().EqualTo("bjm") || 541 Language().EqualTo("ckb") || 542 Language().EqualTo("kqd") || 543 (Language().EqualTo("ku") && Script().EqualTo("Arab")) || 544 Language().EqualTo("mid") || 545 Language().EqualTo("sdb") || 546 Language().EqualTo("sdf") || 547 Language().EqualTo("syr") || 548 (Language().EqualTo("und") && Script().EqualTo("Hatr")) || 549 (Language().EqualTo("und") && Script().EqualTo("Syrc")) || 550 (Language().EqualTo("und") && Script().EqualTo("Xsux"))) { 551 SetRegion("IQ"); 552 } 553 else { 554 SetRegion("SA"); 555 } 556 } 557 else if (Region().EqualTo("582") || 558 Region().EqualTo("PC")) { 559 if (Language().EqualTo("mh")) { 560 SetRegion("MH"); 561 } 562 else if (Language().EqualTo("cal") || 563 Language().EqualTo("tpv")) { 564 SetRegion("MP"); 565 } 566 else if (Language().EqualTo("pau") || 567 Language().EqualTo("sov") || 568 Language().EqualTo("tox")) { 569 SetRegion("PW"); 570 } 571 else { 572 SetRegion("FM"); 573 } 574 } 575 else if (Region().EqualTo("810") || 576 Region().EqualTo("SU")) { 577 if (Language().EqualTo("axm") || 578 Language().EqualTo("hy") || 579 Language().EqualTo("hyw") || 580 (Language().EqualTo("ku") && Script().EqualTo("Cyrl")) || 581 Language().EqualTo("rmi") || 582 (Language().EqualTo("und") && Script().EqualTo("Armn"))) { 583 SetRegion("AM"); 584 } 585 else if (Language().EqualTo("az") || 586 Language().EqualTo("bdk") || 587 Language().EqualTo("kjj") || 588 Language().EqualTo("kry") || 589 Language().EqualTo("tkr") || 590 Language().EqualTo("tly") || 591 Language().EqualTo("ttt") || 592 (Language().EqualTo("und") && Script().EqualTo("Aghb")) || 593 Language().EqualTo("xag")) { 594 SetRegion("AZ"); 595 } 596 else if (Language().EqualTo("be")) { 597 SetRegion("BY"); 598 } 599 else if (Language().EqualTo("et") || 600 Language().EqualTo("ie") || 601 Language().EqualTo("vro")) { 602 SetRegion("EE"); 603 } 604 else if (Language().EqualTo("ab") || 605 Language().EqualTo("bbl") || 606 Language().EqualTo("bhn") || 607 Language().EqualTo("jge") || 608 Language().EqualTo("ka") || 609 (Language().EqualTo("ku") && Script().EqualTo("Yezi")) || 610 (Language().EqualTo("lzz") && Script().EqualTo("Geor")) || 611 Language().EqualTo("oav") || 612 Language().EqualTo("os") || 613 Language().EqualTo("sva") || 614 (Language().EqualTo("und") && Script().EqualTo("Geor")) || 615 (Language().EqualTo("und") && Script().EqualTo("Yezi")) || 616 Language().EqualTo("uum") || 617 Language().EqualTo("xmf")) { 618 SetRegion("GE"); 619 } 620 else if (Language().EqualTo("dng") || 621 Language().EqualTo("ky")) { 622 SetRegion("KG"); 623 } 624 else if (Language().EqualTo("kk") || 625 (Language().EqualTo("ug") && Script().EqualTo("Cyrl"))) { 626 SetRegion("KZ"); 627 } 628 else if (Language().EqualTo("kdr") || 629 Language().EqualTo("lt") || 630 Language().EqualTo("olt") || 631 Language().EqualTo("sgs")) { 632 SetRegion("LT"); 633 } 634 else if (Language().EqualTo("liv") || 635 Language().EqualTo("ltg") || 636 Language().EqualTo("lv")) { 637 SetRegion("LV"); 638 } 639 else if (Language().EqualTo("gag")) { 640 SetRegion("MD"); 641 } 642 else if (Language().EqualTo("abh") || 643 Language().EqualTo("paq") || 644 Language().EqualTo("sgh") || 645 Language().EqualTo("tg") || 646 Language().EqualTo("yah") || 647 Language().EqualTo("yai")) { 648 SetRegion("TJ"); 649 } 650 else if (Language().EqualTo("chg") || 651 Language().EqualTo("tk")) { 652 SetRegion("TM"); 653 } 654 else if (Language().EqualTo("crh") || 655 Language().EqualTo("got") || 656 Language().EqualTo("jct") || 657 Language().EqualTo("ji") || 658 Language().EqualTo("rue") || 659 Language().EqualTo("uk") || 660 (Language().EqualTo("und") && Script().EqualTo("Goth")) || 661 Language().EqualTo("yi")) { 662 SetRegion("UA"); 663 } 664 else if (Language().EqualTo("auz") || 665 Language().EqualTo("kaa") || 666 Language().EqualTo("sog") || 667 (Language().EqualTo("und") && Script().EqualTo("Chrs")) || 668 (Language().EqualTo("und") && Script().EqualTo("Sogd")) || 669 (Language().EqualTo("und") && Script().EqualTo("Sogo")) || 670 Language().EqualTo("uz") || 671 Language().EqualTo("xco")) { 672 SetRegion("UZ"); 673 } 674 else { 675 SetRegion("RU"); 676 } 677 } 678 else if (Region().EqualTo("890")) { 679 if (Language().EqualTo("bs")) { 680 SetRegion("BA"); 681 } 682 else if (Language().EqualTo("ckm") || 683 Language().EqualTo("dlm") || 684 Language().EqualTo("hr") || 685 Language().EqualTo("ist") || 686 Language().EqualTo("ruo")) { 687 SetRegion("HR"); 688 } 689 else if (Language().EqualTo("mk")) { 690 SetRegion("MK"); 691 } 692 else if (Language().EqualTo("sl")) { 693 SetRegion("SI"); 694 } 695 else { 696 SetRegion("RS"); 697 } 698 } 699 } 700 701 static auto ToSpan(const mozilla::Span<const char>& aSpan) { 702 return aSpan; 703 } 704 705 template <size_t N> 706 static auto ToSpan(const mozilla::intl::LanguageTagSubtag<N>& aSubtag) { 707 return aSubtag.Span(); 708 } 709 710 template <typename T, typename U = T> 711 static bool IsLessThan(const T& a, const U& b) { 712 return ToSpan(a) < ToSpan(b); 713 } 714 715 // Mappings from variant subtags to preferred values. 716 // Derived from CLDR Supplemental Data, version 48. 717 // https://unicode.org/Public/cldr/48/cldr-common-48.zip 718 bool mozilla::intl::Locale::PerformVariantMappings() { 719 // The variant subtags need to be sorted for binary search. 720 MOZ_ASSERT(std::is_sorted(mVariants.begin(), mVariants.end(), 721 IsLessThan<decltype(mVariants)::ElementType>)); 722 723 auto removeVariantAt = [&](size_t index) { 724 mVariants.erase(mVariants.begin() + index); 725 }; 726 727 auto insertVariantSortedIfNotPresent = [&](mozilla::Span<const char> variant) { 728 auto* p = std::lower_bound( 729 mVariants.begin(), mVariants.end(), variant, 730 IsLessThan<decltype(mVariants)::ElementType, decltype(variant)>); 731 732 // Don't insert the replacement when already present. 733 if (p != mVariants.end() && p->Span() == variant) { 734 return true; 735 } 736 737 // Insert the preferred variant in sort order. 738 auto preferred = mozilla::intl::VariantSubtag{variant}; 739 return !!mVariants.insert(p, preferred); 740 }; 741 742 for (size_t i = 0; i < mVariants.length();) { 743 const auto& variant = mVariants[i]; 744 MOZ_ASSERT(IsCanonicallyCasedVariantTag(variant.Span())); 745 746 if (variant.Span() == mozilla::MakeStringSpan("arevela") || 747 variant.Span() == mozilla::MakeStringSpan("arevmda") || 748 variant.Span() == mozilla::MakeStringSpan("bokmal") || 749 variant.Span() == mozilla::MakeStringSpan("hakka") || 750 variant.Span() == mozilla::MakeStringSpan("lojban") || 751 variant.Span() == mozilla::MakeStringSpan("nynorsk") || 752 variant.Span() == mozilla::MakeStringSpan("saaho") || 753 variant.Span() == mozilla::MakeStringSpan("xiang")) { 754 removeVariantAt(i); 755 } 756 else if (variant.Span() == mozilla::MakeStringSpan("aaland")) { 757 removeVariantAt(i); 758 SetRegion("AX"); 759 } 760 else if (variant.Span() == mozilla::MakeStringSpan("heploc")) { 761 removeVariantAt(i); 762 if (!insertVariantSortedIfNotPresent(mozilla::MakeStringSpan("alalc97"))) { 763 return false; 764 } 765 } 766 else if (variant.Span() == mozilla::MakeStringSpan("polytoni")) { 767 removeVariantAt(i); 768 if (!insertVariantSortedIfNotPresent(mozilla::MakeStringSpan("polyton"))) { 769 return false; 770 } 771 } 772 else { 773 i++; 774 } 775 } 776 return true; 777 } 778 779 // Canonicalize legacy locale identifiers. 780 // Derived from CLDR Supplemental Data, version 48. 781 // https://unicode.org/Public/cldr/48/cldr-common-48.zip 782 bool mozilla::intl::Locale::UpdateLegacyMappings() { 783 // We're mapping legacy tags to non-legacy form here. 784 // Other tags remain unchanged. 785 // 786 // Legacy tags are either sign language tags ("sgn") or have one or multiple 787 // variant subtags. Therefore we can quickly exclude most tags by checking 788 // these two subtags. 789 790 MOZ_ASSERT(IsCanonicallyCasedLanguageTag(Language().Span())); 791 792 if (!Language().EqualTo("sgn") && mVariants.length() == 0) { 793 return true; 794 } 795 796 #ifdef DEBUG 797 for (const auto& variant : Variants()) { 798 MOZ_ASSERT(IsStructurallyValidVariantTag(variant)); 799 MOZ_ASSERT(IsCanonicallyCasedVariantTag(variant)); 800 } 801 #endif 802 803 // The variant subtags need to be sorted for binary search. 804 MOZ_ASSERT(std::is_sorted(mVariants.begin(), mVariants.end(), 805 IsLessThan<decltype(mVariants)::ElementType>)); 806 807 auto findVariant = [this](mozilla::Span<const char> variant) { 808 auto* p = std::lower_bound(mVariants.begin(), mVariants.end(), variant, 809 IsLessThan<decltype(mVariants)::ElementType, 810 decltype(variant)>); 811 812 if (p != mVariants.end() && p->Span() == variant) { 813 return p; 814 } 815 return static_cast<decltype(p)>(nullptr); 816 }; 817 818 auto insertVariantSortedIfNotPresent = [&](mozilla::Span<const char> variant) { 819 auto* p = std::lower_bound(mVariants.begin(), mVariants.end(), variant, 820 IsLessThan<decltype(mVariants)::ElementType, 821 decltype(variant)>); 822 823 // Don't insert the replacement when already present. 824 if (p != mVariants.end() && p->Span() == variant) { 825 return true; 826 } 827 828 // Insert the preferred variant in sort order. 829 auto preferred = mozilla::intl::VariantSubtag{variant}; 830 return !!mVariants.insert(p, preferred); 831 }; 832 833 auto removeVariant = [&](auto* p) { 834 size_t index = std::distance(mVariants.begin(), p); 835 mVariants.erase(mVariants.begin() + index); 836 }; 837 838 auto removeVariants = [&](auto* p, auto* q) { 839 size_t pIndex = std::distance(mVariants.begin(), p); 840 size_t qIndex = std::distance(mVariants.begin(), q); 841 MOZ_ASSERT(pIndex < qIndex, "variant subtags are sorted"); 842 843 mVariants.erase(mVariants.begin() + qIndex); 844 mVariants.erase(mVariants.begin() + pIndex); 845 }; 846 847 if (mVariants.length() >= 2) { 848 if (auto* hepburn = findVariant(mozilla::MakeStringSpan("hepburn"))) { 849 if (auto* heploc = findVariant(mozilla::MakeStringSpan("heploc"))) { 850 removeVariants(hepburn, heploc); 851 852 if (!insertVariantSortedIfNotPresent(mozilla::MakeStringSpan("alalc97"))) { 853 return false; 854 } 855 } 856 } 857 } 858 859 if (Language().EqualTo("sgn")) { 860 if (Region().Present() && SignLanguageMapping(mLanguage, Region())) { 861 mRegion.Set(mozilla::MakeStringSpan("")); 862 } 863 } 864 else if (Language().EqualTo("aa") || 865 Language().EqualTo("aar")) { 866 if (auto* saaho = findVariant(mozilla::MakeStringSpan("saaho"))) { 867 removeVariant(saaho); 868 SetLanguage("ssy"); 869 } 870 } 871 else if (Language().EqualTo("arm") || 872 Language().EqualTo("hy") || 873 Language().EqualTo("hye")) { 874 if (auto* arevmda = findVariant(mozilla::MakeStringSpan("arevmda"))) { 875 removeVariant(arevmda); 876 SetLanguage("hyw"); 877 } 878 } 879 else if (Language().EqualTo("art")) { 880 if (auto* lojban = findVariant(mozilla::MakeStringSpan("lojban"))) { 881 removeVariant(lojban); 882 SetLanguage("jbo"); 883 } 884 } 885 else if (Language().EqualTo("cel")) { 886 if (auto* gaulish = findVariant(mozilla::MakeStringSpan("gaulish"))) { 887 removeVariant(gaulish); 888 SetLanguage("xtg"); 889 } 890 } 891 else if (Language().EqualTo("chi") || 892 Language().EqualTo("cmn") || 893 Language().EqualTo("zh") || 894 Language().EqualTo("zho")) { 895 if (auto* guoyu = findVariant(mozilla::MakeStringSpan("guoyu"))) { 896 if (auto* hakka = findVariant(mozilla::MakeStringSpan("hakka"))) { 897 removeVariants(guoyu, hakka); 898 SetLanguage("hak"); 899 return true; 900 } 901 } 902 if (auto* guoyu = findVariant(mozilla::MakeStringSpan("guoyu"))) { 903 if (auto* xiang = findVariant(mozilla::MakeStringSpan("xiang"))) { 904 removeVariants(guoyu, xiang); 905 SetLanguage("hsn"); 906 return true; 907 } 908 } 909 if (auto* guoyu = findVariant(mozilla::MakeStringSpan("guoyu"))) { 910 removeVariant(guoyu); 911 SetLanguage("zh"); 912 } 913 else if (auto* hakka = findVariant(mozilla::MakeStringSpan("hakka"))) { 914 removeVariant(hakka); 915 SetLanguage("hak"); 916 } 917 else if (auto* xiang = findVariant(mozilla::MakeStringSpan("xiang"))) { 918 removeVariant(xiang); 919 SetLanguage("hsn"); 920 } 921 } 922 else if (Language().EqualTo("no") || 923 Language().EqualTo("nor")) { 924 if (auto* bokmal = findVariant(mozilla::MakeStringSpan("bokmal"))) { 925 removeVariant(bokmal); 926 SetLanguage("nb"); 927 } 928 else if (auto* nynorsk = findVariant(mozilla::MakeStringSpan("nynorsk"))) { 929 removeVariant(nynorsk); 930 SetLanguage("nn"); 931 } 932 } 933 934 return true; 935 } 936 937 // Mappings from legacy sign languages. 938 // Derived from CLDR Supplemental Data, version 48. 939 // https://unicode.org/Public/cldr/48/cldr-common-48.zip 940 bool mozilla::intl::Locale::SignLanguageMapping(LanguageSubtag& language, 941 const RegionSubtag& region) { 942 MOZ_ASSERT(language.EqualTo("sgn")); 943 MOZ_ASSERT(IsStructurallyValidRegionTag(region.Span())); 944 MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.Span())); 945 946 if (region.Length() == 2) { 947 static const char regions[22][3] = { 948 "BR", "CO", "DD", "DE", "DK", "ES", "FR", "FX", "GB", "GR", 949 "IE", "IT", "JP", "MX", "NI", "NL", "NO", "PT", "SE", "UK", 950 "US", "ZA", 951 }; 952 static const char* aliases[22] = { 953 "bzs", "csn", "gsg", "gsg", "dsl", "ssp", "fsl", "fsl", "bfi", "gss", 954 "isg", "ise", "jsl", "mfs", "ncs", "dse", "nsi", "psr", "swl", "bfi", 955 "ase", "sfs", 956 }; 957 958 if (const char* replacement = SearchReplacement(regions, aliases, region)) { 959 language.Set(mozilla::MakeStringSpan(replacement)); 960 return true; 961 } 962 return false; 963 } 964 965 { 966 static const char regions[22][4] = { 967 "076", "170", "208", "249", "250", "276", "278", "280", "300", "372", 968 "380", "392", "484", "528", "558", "578", "620", "710", "724", "752", 969 "826", "840", 970 }; 971 static const char* aliases[22] = { 972 "bzs", "csn", "dsl", "fsl", "fsl", "gsg", "gsg", "gsg", "gss", "isg", 973 "ise", "jsl", "mfs", "dse", "ncs", "nsi", "psr", "sfs", "ssp", "swl", 974 "bfi", "ase", 975 }; 976 977 if (const char* replacement = SearchReplacement(regions, aliases, region)) { 978 language.Set(mozilla::MakeStringSpan(replacement)); 979 return true; 980 } 981 return false; 982 } 983 } 984 985 template <size_t Length> 986 static inline bool IsUnicodeKey(mozilla::Span<const char> key, const char (&str)[Length]) { 987 static_assert(Length == UnicodeKeyLength + 1, 988 "Unicode extension key is two characters long"); 989 return memcmp(key.data(), str, Length - 1) == 0; 990 } 991 992 template <size_t Length> 993 static inline bool IsUnicodeType(mozilla::Span<const char> type, const char (&str)[Length]) { 994 static_assert(Length > UnicodeKeyLength + 1, 995 "Unicode extension type contains more than two characters"); 996 return type.size() == (Length - 1) && 997 memcmp(type.data(), str, Length - 1) == 0; 998 } 999 1000 static int32_t CompareUnicodeType(const char* a, mozilla::Span<const char> b) { 1001 MOZ_ASSERT(!std::char_traits<char>::find(b.data(), b.size(), '\0'), 1002 "unexpected null-character in string"); 1003 1004 using UnsignedChar = unsigned char; 1005 for (size_t i = 0; i < b.size(); i++) { 1006 // |a| is zero-terminated and |b| doesn't contain a null-terminator. So if 1007 // we've reached the end of |a|, the below if-statement will always be true. 1008 // That ensures we don't read past the end of |a|. 1009 if (int32_t r = UnsignedChar(a[i]) - UnsignedChar(b[i])) { 1010 return r; 1011 } 1012 } 1013 1014 // Return zero if both strings are equal or a positive number if |b| is a 1015 // prefix of |a|. 1016 return int32_t(UnsignedChar(a[b.size()])); 1017 } 1018 1019 template <size_t Length> 1020 static inline const char* SearchUnicodeReplacement( 1021 const char* (&types)[Length], const char* (&aliases)[Length], 1022 mozilla::Span<const char> type) { 1023 1024 auto p = std::lower_bound(std::begin(types), std::end(types), type, 1025 [](const auto& a, const auto& b) { 1026 return CompareUnicodeType(a, b) < 0; 1027 }); 1028 if (p != std::end(types) && CompareUnicodeType(*p, type) == 0) { 1029 return aliases[std::distance(std::begin(types), p)]; 1030 } 1031 return nullptr; 1032 } 1033 1034 /** 1035 * Mapping from deprecated BCP 47 Unicode extension types to their preferred 1036 * values. 1037 * 1038 * Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files 1039 * Spec: https://www.unicode.org/reports/tr35/#t_Extension 1040 */ 1041 const char* mozilla::intl::Locale::ReplaceUnicodeExtensionType( 1042 mozilla::Span<const char> key, mozilla::Span<const char> type) { 1043 MOZ_ASSERT(key.size() == UnicodeKeyLength); 1044 MOZ_ASSERT(IsCanonicallyCasedUnicodeKey(key)); 1045 1046 MOZ_ASSERT(type.size() > UnicodeKeyLength); 1047 MOZ_ASSERT(IsCanonicallyCasedUnicodeType(type)); 1048 1049 if (IsUnicodeKey(key, "ca")) { 1050 if (IsUnicodeType(type, "ethiopic-amete-alem")) { 1051 return "ethioaa"; 1052 } 1053 if (IsUnicodeType(type, "islamicc")) { 1054 return "islamic-civil"; 1055 } 1056 } 1057 else if (IsUnicodeKey(key, "kb") || 1058 IsUnicodeKey(key, "kc") || 1059 IsUnicodeKey(key, "kh") || 1060 IsUnicodeKey(key, "kk") || 1061 IsUnicodeKey(key, "kn")) { 1062 if (IsUnicodeType(type, "yes")) { 1063 return "true"; 1064 } 1065 } 1066 else if (IsUnicodeKey(key, "ks")) { 1067 if (IsUnicodeType(type, "primary")) { 1068 return "level1"; 1069 } 1070 if (IsUnicodeType(type, "tertiary")) { 1071 return "level3"; 1072 } 1073 } 1074 else if (IsUnicodeKey(key, "ms")) { 1075 if (IsUnicodeType(type, "imperial")) { 1076 return "uksystem"; 1077 } 1078 } 1079 else if (IsUnicodeKey(key, "rg") || 1080 IsUnicodeKey(key, "sd")) { 1081 static const char* types[147] = { 1082 "cn11" , "cn12" , "cn13" , "cn14" , "cn15" , "cn21" , "cn22" , 1083 "cn23" , "cn31" , "cn32" , "cn33" , "cn34" , "cn35" , "cn36" , 1084 "cn37" , "cn41" , "cn42" , "cn43" , "cn44" , "cn45" , "cn46" , 1085 "cn50" , "cn51" , "cn52" , "cn53" , "cn54" , "cn61" , "cn62" , 1086 "cn63" , "cn64" , "cn65" , "cn71" , "cn91" , "cn92" , "cz10a" , 1087 "cz10b" , "cz10c" , "cz10d" , "cz10e" , "cz10f" , "cz611" , "cz612" , 1088 "cz613" , "cz614" , "cz615" , "cz621" , "cz622" , "cz623" , "cz624" , 1089 "cz626" , "cz627" , "czjc" , "czjm" , "czka" , "czkr" , "czli" , 1090 "czmo" , "czol" , "czpa" , "czpl" , "czpr" , "czst" , "czus" , 1091 "czvy" , "czzl" , "fi01" , "fra" , "frb" , "frbl" , "frc" , 1092 "frcp" , "frd" , "fre" , "frf" , "frg" , "frgf" , "frgp" , 1093 "frgua" , "frh" , "fri" , "frj" , "frk" , "frl" , "frlre" , 1094 "frm" , "frmay" , "frmf" , "frmq" , "frn" , "frnc" , "fro" , 1095 "frp" , "frpf" , "frpm" , "frq" , "frr" , "frre" , "frs" , 1096 "frt" , "frtf" , "fru" , "frv" , "frwf" , "fryt" , "laxn" , 1097 "lud" , "lug" , "lul" , "mrnkc" , "nlaw" , "nlcw" , "nlsx" , 1098 "no23" , "nzn" , "nzs" , "omba" , "omsh" , "plds" , "plkp" , 1099 "pllb" , "plld" , "pllu" , "plma" , "plmz" , "plop" , "plpd" , 1100 "plpk" , "plpm" , "plsk" , "plsl" , "plwn" , "plwp" , "plzp" , 1101 "shta" , "tteto" , "ttrcm" , "ttwto" , "twkhq" , "twtnq" , "twtpq" , 1102 "twtxq" , "usas" , "usgu" , "usmp" , "uspr" , "usum" , "usvi" , 1103 }; 1104 static const char* aliases[147] = { 1105 "cnbj" , "cntj" , "cnhe" , "cnsx" , "cnmn" , "cnln" , "cnjl" , 1106 "cnhl" , "cnsh" , "cnjs" , "cnzj" , "cnah" , "cnfj" , "cnjx" , 1107 "cnsd" , "cnha" , "cnhb" , "cnhn" , "cngd" , "cngx" , "cnhi" , 1108 "cncq" , "cnsc" , "cngz" , "cnyn" , "cnxz" , "cnsn" , "cngs" , 1109 "cnqh" , "cnnx" , "cnxj" , "twzzzz", "hkzzzz", "mozzzz", "cz110" , 1110 "cz111" , "cz112" , "cz113" , "cz114" , "cz115" , "cz663" , "cz632" , 1111 "cz633" , "cz634" , "cz635" , "cz641" , "cz642" , "cz643" , "cz644" , 1112 "cz646" , "cz647" , "cz31" , "cz64" , "cz41" , "cz52" , "cz51" , 1113 "cz80" , "cz71" , "cz53" , "cz32" , "cz10" , "cz20" , "cz42" , 1114 "cz63" , "cz72" , "axzzzz", "frges" , "frnaq" , "blzzzz", "frara" , 1115 "cpzzzz", "frbfc" , "frbre" , "frcvl" , "frges" , "gfzzzz", "gpzzzz", 1116 "gpzzzz", "frcor" , "frbfc" , "fridf" , "frocc" , "frnaq" , "rezzzz", 1117 "frges" , "ytzzzz", "mfzzzz", "mqzzzz", "frocc" , "nczzzz", "frhdf" , 1118 "frnor" , "pfzzzz", "pmzzzz", "frnor" , "frpdl" , "rezzzz", "frhdf" , 1119 "frnaq" , "tfzzzz", "frpac" , "frara" , "wfzzzz", "ytzzzz", "laxs" , 1120 "lucl" , "luec" , "luca" , "mr13" , "awzzzz", "cwzzzz", "sxzzzz", 1121 "no50" , "nzauk" , "nzcan" , "ombj" , "omsj" , "pl02" , "pl04" , 1122 "pl08" , "pl10" , "pl06" , "pl12" , "pl14" , "pl16" , "pl20" , 1123 "pl18" , "pl22" , "pl26" , "pl24" , "pl28" , "pl30" , "pl32" , 1124 "tazzzz", "tttob" , "ttmrc" , "tttob" , "twkhh" , "twtnn" , "twnwt" , 1125 "twtxg" , "aszzzz", "guzzzz", "mpzzzz", "przzzz", "umzzzz", "vizzzz", 1126 }; 1127 return SearchUnicodeReplacement(types, aliases, type); 1128 } 1129 else if (IsUnicodeKey(key, "tz")) { 1130 static const char* types[50] = { 1131 "aqams" , "aukns" , "caffs" , "camtr" , "canpg" , "capnt" , 1132 "cathu" , "cayzf" , "cet" , "cnckg" , "cnhrb" , "cnkhg" , 1133 "cst6cdt" , "cuba" , "eet" , "egypt" , "eire" , "est" , 1134 "est5edt" , "factory" , "gaza" , "gmt0" , "hongkong", "hst" , 1135 "iceland" , "iran" , "israel" , "jamaica" , "japan" , "libya" , 1136 "met" , "mncoq" , "mst" , "mst7mdt" , "mxstis" , "navajo" , 1137 "poland" , "portugal", "prc" , "pst8pdt" , "roc" , "rok" , 1138 "turkey" , "uaozh" , "uauzh" , "uct" , "umjon" , "usnavajo", 1139 "wet" , "zulu" , 1140 }; 1141 static const char* aliases[50] = { 1142 "aqmcm" , "auhba" , "cawnp" , "cator" , "cator" , "caiql" , 1143 "cator" , "caedm" , "bebru" , "cnsha" , "cnsha" , "cnurc" , 1144 "uschi" , "cuhav" , "grath" , "egcai" , "iedub" , "papty" , 1145 "usnyc" , "unk" , "gazastrp", "gmt" , "hkhkg" , "ushnl" , 1146 "isrey" , "irthr" , "jeruslm" , "jmkin" , "jptyo" , "lytip" , 1147 "bebru" , "mnuln" , "usphx" , "usden" , "mxtij" , "usden" , 1148 "plwaw" , "ptlis" , "cnsha" , "uslax" , "twtpe" , "krsel" , 1149 "trist" , "uaiev" , "uaiev" , "utc" , "ushnl" , "usden" , 1150 "ptlis" , "utc" , 1151 }; 1152 return SearchUnicodeReplacement(types, aliases, type); 1153 } 1154 return nullptr; 1155 } 1156 1157 template <size_t Length> 1158 static inline bool IsTransformKey(mozilla::Span<const char> key, const char (&str)[Length]) { 1159 static_assert(Length == TransformKeyLength + 1, 1160 "Transform extension key is two characters long"); 1161 return memcmp(key.data(), str, Length - 1) == 0; 1162 } 1163 1164 template <size_t Length> 1165 static inline bool IsTransformType(mozilla::Span<const char> type, const char (&str)[Length]) { 1166 static_assert(Length > TransformKeyLength + 1, 1167 "Transform extension type contains more than two characters"); 1168 return type.size() == (Length - 1) && 1169 memcmp(type.data(), str, Length - 1) == 0; 1170 } 1171 1172 /** 1173 * Mapping from deprecated BCP 47 Transform extension types to their preferred 1174 * values. 1175 * 1176 * Spec: https://www.unicode.org/reports/tr35/#Unicode_Locale_Extension_Data_Files 1177 * Spec: https://www.unicode.org/reports/tr35/#t_Extension 1178 */ 1179 const char* mozilla::intl::Locale::ReplaceTransformExtensionType( 1180 mozilla::Span<const char> key, mozilla::Span<const char> type) { 1181 MOZ_ASSERT(key.size() == TransformKeyLength); 1182 MOZ_ASSERT(IsCanonicallyCasedTransformKey(key)); 1183 1184 MOZ_ASSERT(type.size() > TransformKeyLength); 1185 MOZ_ASSERT(IsCanonicallyCasedTransformType(type)); 1186 1187 if (IsTransformKey(key, "d0")) { 1188 if (IsTransformType(type, "name")) { 1189 return "charname"; 1190 } 1191 } 1192 else if (IsTransformKey(key, "m0")) { 1193 if (IsTransformType(type, "beta-metsehaf")) { 1194 return "betamets"; 1195 } 1196 if (IsTransformType(type, "ies-jes")) { 1197 return "iesjes"; 1198 } 1199 if (IsTransformType(type, "names")) { 1200 return "prprname"; 1201 } 1202 if (IsTransformType(type, "tekie-alibekit")) { 1203 return "tekieali"; 1204 } 1205 } 1206 return nullptr; 1207 }