nsTextRunTransformations.cpp (39157B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "nsTextRunTransformations.h" 8 9 #include <utility> 10 11 #include "GreekCasing.h" 12 #include "IrishCasing.h" 13 #include "MathMLTextRunFactory.h" 14 #include "mozilla/ComputedStyleInlines.h" 15 #include "mozilla/MemoryReporting.h" 16 #include "mozilla/StaticPrefs_layout.h" 17 #include "mozilla/StaticPrefs_mathml.h" 18 #include "mozilla/TextEditor.h" 19 #include "mozilla/gfx/2D.h" 20 #include "nsGkAtoms.h" 21 #include "nsLineBreaker.h" 22 #include "nsSpecialCasingData.h" 23 #include "nsStyleConsts.h" 24 #include "nsStyleUtil.h" 25 #include "nsTextFrameUtils.h" 26 #include "nsUnicharUtils.h" 27 #include "nsUnicodeProperties.h" 28 29 using namespace mozilla; 30 using namespace mozilla::gfx; 31 32 // Unicode characters needing special casing treatment in tr/az languages 33 #define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE 0x0130 34 #define LATIN_SMALL_LETTER_DOTLESS_I 0x0131 35 36 // Greek sigma needs custom handling for the lowercase transform; for details 37 // see bug 740120. 38 #define GREEK_CAPITAL_LETTER_SIGMA 0x03A3 39 #define GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2 40 #define GREEK_SMALL_LETTER_SIGMA 0x03C3 41 42 already_AddRefed<nsTransformedTextRun> nsTransformedTextRun::Create( 43 const gfxTextRunFactory::Parameters* aParams, 44 nsTransformingTextRunFactory* aFactory, gfxFontGroup* aFontGroup, 45 const char16_t* aString, uint32_t aLength, 46 const gfx::ShapedTextFlags aFlags, const nsTextFrameUtils::Flags aFlags2, 47 nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) { 48 NS_ASSERTION(!(aFlags & gfx::ShapedTextFlags::TEXT_IS_8BIT), 49 "didn't expect text to be marked as 8-bit here"); 50 51 void* storage = 52 AllocateStorageForTextRun(sizeof(nsTransformedTextRun), aLength); 53 if (!storage) { 54 return nullptr; 55 } 56 57 RefPtr<nsTransformedTextRun> result = new (storage) 58 nsTransformedTextRun(aParams, aFactory, aFontGroup, aString, aLength, 59 aFlags, aFlags2, std::move(aStyles), aOwnsFactory); 60 return result.forget(); 61 } 62 63 void nsTransformedTextRun::SetCapitalization(uint32_t aStart, uint32_t aLength, 64 bool* aCapitalization) { 65 if (mCapitalize.IsEmpty()) { 66 // XXX(Bug 1631371) Check if this should use a fallible operation as it 67 // pretended earlier. 68 mCapitalize.AppendElements(GetLength()); 69 memset(mCapitalize.Elements(), 0, GetLength() * sizeof(bool)); 70 } 71 memcpy(mCapitalize.Elements() + aStart, aCapitalization, 72 aLength * sizeof(bool)); 73 mNeedsRebuild = true; 74 } 75 76 bool nsTransformedTextRun::SetPotentialLineBreaks(Range aRange, 77 const uint8_t* aBreakBefore) { 78 bool changed = gfxTextRun::SetPotentialLineBreaks(aRange, aBreakBefore); 79 if (changed) { 80 mNeedsRebuild = true; 81 } 82 return changed; 83 } 84 85 void nsTransformedTextRun::SetEmergencyWrapPositions() { 86 // This parallels part of what gfxShapedText::SetupClusterBoundaries() does 87 // for normal textruns. 88 bool prevWasHyphen = false; 89 for (uint32_t pos : IntegerRange(mString.Length())) { 90 const char16_t ch = mString[pos]; 91 if (prevWasHyphen) { 92 if (nsContentUtils::IsAlphanumeric(ch)) { 93 mCharacterGlyphs[pos].SetCanBreakBefore( 94 CompressedGlyph::FLAG_BREAK_TYPE_EMERGENCY_WRAP); 95 } 96 prevWasHyphen = false; 97 } 98 if (nsContentUtils::IsHyphen(ch) && pos && 99 nsContentUtils::IsAlphanumeric(mString[pos - 1])) { 100 prevWasHyphen = true; 101 } 102 } 103 } 104 105 size_t nsTransformedTextRun::SizeOfExcludingThis( 106 mozilla::MallocSizeOf aMallocSizeOf) { 107 size_t total = gfxTextRun::SizeOfExcludingThis(aMallocSizeOf); 108 total += mStyles.ShallowSizeOfExcludingThis(aMallocSizeOf); 109 total += mCapitalize.ShallowSizeOfExcludingThis(aMallocSizeOf); 110 if (mOwnsFactory) { 111 total += aMallocSizeOf(mFactory); 112 } 113 return total; 114 } 115 116 size_t nsTransformedTextRun::SizeOfIncludingThis( 117 mozilla::MallocSizeOf aMallocSizeOf) { 118 return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf); 119 } 120 121 already_AddRefed<nsTransformedTextRun> 122 nsTransformingTextRunFactory::MakeTextRun( 123 const char16_t* aString, uint32_t aLength, 124 const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup, 125 gfx::ShapedTextFlags aFlags, nsTextFrameUtils::Flags aFlags2, 126 nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) { 127 return nsTransformedTextRun::Create(aParams, this, aFontGroup, aString, 128 aLength, aFlags, aFlags2, 129 std::move(aStyles), aOwnsFactory); 130 } 131 132 already_AddRefed<nsTransformedTextRun> 133 nsTransformingTextRunFactory::MakeTextRun( 134 const uint8_t* aString, uint32_t aLength, 135 const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup, 136 gfx::ShapedTextFlags aFlags, nsTextFrameUtils::Flags aFlags2, 137 nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) { 138 // We'll only have a Unicode code path to minimize the amount of code needed 139 // for these rarely used features 140 NS_ConvertASCIItoUTF16 unicodeString(reinterpret_cast<const char*>(aString), 141 aLength); 142 return MakeTextRun(unicodeString.get(), aLength, aParams, aFontGroup, 143 aFlags & ~gfx::ShapedTextFlags::TEXT_IS_8BIT, aFlags2, 144 std::move(aStyles), aOwnsFactory); 145 } 146 147 void MergeCharactersInTextRun(gfxTextRun* aDest, gfxTextRun* aSrc, 148 const bool* aCharsToMerge, 149 const bool* aDeletedChars) { 150 MOZ_ASSERT(!aDest->TrailingGlyphRun(), "unexpected glyphRuns in aDest!"); 151 uint32_t offset = 0; 152 AutoTArray<gfxTextRun::DetailedGlyph, 2> glyphs; 153 const gfxTextRun::CompressedGlyph continuationGlyph = 154 gfxTextRun::CompressedGlyph::MakeComplex(false, false); 155 const gfxTextRun::CompressedGlyph* srcGlyphs = aSrc->GetCharacterGlyphs(); 156 gfxTextRun::CompressedGlyph* destGlyphs = aDest->GetCharacterGlyphs(); 157 for (gfxTextRun::GlyphRunIterator iter(aSrc, gfxTextRun::Range(aSrc)); 158 !iter.AtEnd(); iter.NextRun()) { 159 const gfxTextRun::GlyphRun* run = iter.GlyphRun(); 160 aDest->AddGlyphRun(run->mFont, run->mMatchType, offset, false, 161 run->mOrientation, run->mIsCJK); 162 163 bool anyMissing = false; 164 uint32_t mergeRunStart = iter.StringStart(); 165 // Initialize to a copy of the first source glyph in the merge run. 166 gfxTextRun::CompressedGlyph mergedGlyph = srcGlyphs[mergeRunStart]; 167 uint32_t stringEnd = iter.StringEnd(); 168 for (uint32_t k = iter.StringStart(); k < stringEnd; ++k) { 169 const gfxTextRun::CompressedGlyph g = srcGlyphs[k]; 170 if (g.IsSimpleGlyph()) { 171 if (!anyMissing) { 172 gfxTextRun::DetailedGlyph details; 173 details.mGlyphID = g.GetSimpleGlyph(); 174 details.mAdvance = g.GetSimpleAdvance(); 175 glyphs.AppendElement(details); 176 } 177 } else { 178 if (g.IsMissing()) { 179 anyMissing = true; 180 glyphs.Clear(); 181 } 182 if (g.GetGlyphCount() > 0) { 183 glyphs.AppendElements(aSrc->GetDetailedGlyphs(k), g.GetGlyphCount()); 184 } 185 } 186 187 if (k + 1 < iter.StringEnd() && aCharsToMerge[k + 1]) { 188 // next char is supposed to merge with current, so loop without 189 // writing current merged glyph to the destination 190 continue; 191 } 192 193 // If the start of the merge run is actually a character that should 194 // have been merged with the previous character (this can happen 195 // if there's a font change in the middle of a case-mapped character, 196 // that decomposed into a sequence of base+diacritics, for example), 197 // just discard the entire merge run. See comment at start of this 198 // function. 199 NS_WARNING_ASSERTION( 200 !aCharsToMerge[mergeRunStart], 201 "unable to merge across a glyph run boundary, glyph(s) discarded"); 202 if (!aCharsToMerge[mergeRunStart]) { 203 // Determine if we can just copy the existing simple glyph record. 204 if (mergedGlyph.IsSimpleGlyph() && glyphs.Length() == 1) { 205 destGlyphs[offset] = mergedGlyph; 206 } else { 207 // Otherwise set up complex glyph record and store detailed glyphs. 208 mergedGlyph.SetComplex(mergedGlyph.IsClusterStart(), 209 mergedGlyph.IsLigatureGroupStart()); 210 destGlyphs[offset] = mergedGlyph; 211 aDest->SetDetailedGlyphs(offset, glyphs.Length(), glyphs.Elements()); 212 if (anyMissing) { 213 destGlyphs[offset].SetMissing(); 214 } 215 } 216 offset++; 217 218 while (offset < aDest->GetLength() && aDeletedChars[offset]) { 219 destGlyphs[offset++] = continuationGlyph; 220 } 221 } 222 223 glyphs.Clear(); 224 anyMissing = false; 225 mergeRunStart = k + 1; 226 if (mergeRunStart < stringEnd) { 227 mergedGlyph = srcGlyphs[mergeRunStart]; 228 } 229 } 230 NS_ASSERTION(glyphs.Length() == 0, 231 "Leftover glyphs, don't request merging of the last character " 232 "with its next!"); 233 } 234 NS_ASSERTION(offset == aDest->GetLength(), "Bad offset calculations"); 235 } 236 237 gfxTextRunFactory::Parameters GetParametersForInner( 238 nsTransformedTextRun* aTextRun, gfx::ShapedTextFlags* aFlags, 239 DrawTarget* aRefDrawTarget) { 240 gfxTextRunFactory::Parameters params = { 241 aRefDrawTarget, nullptr, nullptr, 242 nullptr, 0, aTextRun->GetAppUnitsPerDevUnit()}; 243 *aFlags = aTextRun->GetFlags(); 244 return params; 245 } 246 247 // Some languages have special casing conventions that differ from the 248 // default Unicode mappings. 249 // The enum values here are named for well-known exemplar languages that 250 // exhibit the behavior in question; multiple lang tags may map to the 251 // same setting here, if the behavior is shared by other languages. 252 enum LanguageSpecificCasingBehavior { 253 eLSCB_None, // default non-lang-specific behavior 254 eLSCB_Dutch, // treat "ij" digraph as a unit for capitalization 255 eLSCB_Greek, // strip accent when uppercasing Greek vowels 256 eLSCB_Irish, // keep prefix letters as lowercase when uppercasing Irish 257 eLSCB_Turkish, // preserve dotted/dotless-i distinction in uppercase 258 eLSCB_Lithuanian // retain dot on lowercase i/j when an accent is present 259 }; 260 261 static LanguageSpecificCasingBehavior GetCasingFor(const nsAtom* aLang) { 262 if (!aLang) { 263 return eLSCB_None; 264 } 265 if (nsStyleUtil::MatchesLanguagePrefix(aLang, u"tr") || 266 nsStyleUtil::MatchesLanguagePrefix(aLang, u"az") || 267 nsStyleUtil::MatchesLanguagePrefix(aLang, u"ba") || 268 nsStyleUtil::MatchesLanguagePrefix(aLang, u"crh") || 269 nsStyleUtil::MatchesLanguagePrefix(aLang, u"tt")) { 270 return eLSCB_Turkish; 271 } 272 if (nsStyleUtil::MatchesLanguagePrefix(aLang, u"nl")) { 273 return eLSCB_Dutch; 274 } 275 if (nsStyleUtil::MatchesLanguagePrefix(aLang, u"el")) { 276 return eLSCB_Greek; 277 } 278 if (nsStyleUtil::MatchesLanguagePrefix(aLang, u"ga")) { 279 return eLSCB_Irish; 280 } 281 if (nsStyleUtil::MatchesLanguagePrefix(aLang, u"lt")) { 282 return eLSCB_Lithuanian; 283 } 284 return eLSCB_None; 285 } 286 287 bool nsCaseTransformTextRunFactory::TransformString( 288 const nsAString& aString, nsString& aConvertedString, 289 const Maybe<StyleTextTransform>& aGlobalTransform, char16_t aMaskChar, 290 bool aCaseTransformsOnly, const nsAtom* aLanguage, 291 nsTArray<bool>& aCharsToMergeArray, nsTArray<bool>& aDeletedCharsArray, 292 const nsTransformedTextRun* aTextRun, uint32_t aOffsetInTextRun, 293 nsTArray<uint8_t>* aCanBreakBeforeArray, 294 nsTArray<RefPtr<nsTransformedCharStyle>>* aStyleArray) { 295 bool auxiliaryOutputArrays = aCanBreakBeforeArray && aStyleArray; 296 MOZ_ASSERT(!auxiliaryOutputArrays || aTextRun, 297 "text run must be provided to use aux output arrays"); 298 299 uint32_t length = aString.Length(); 300 const char16_t* str = aString.BeginReading(); 301 // If an unconditional mask character was passed, we'll use it; if not, any 302 // masking called for by the textrun styles will use TextEditor's mask char. 303 const char16_t mask = aMaskChar ? aMaskChar : TextEditor::PasswordMask(); 304 305 bool mergeNeeded = false; 306 307 bool capitalizeDutchIJ = false; 308 bool prevIsLetter = false; 309 bool ntPrefix = false; // true immediately after a word-initial 'n' or 't' 310 // when doing Irish lowercasing 311 bool seenSoftDotted = false; // true immediately after an I or J that is 312 // converted to lowercase in Lithuanian mode 313 uint32_t sigmaIndex = uint32_t(-1); 314 nsUGenCategory cat; 315 316 StyleTextTransform style = aGlobalTransform.valueOr(StyleTextTransform::NONE); 317 bool forceNonFullWidth = false; 318 const nsAtom* lang = aLanguage; 319 320 LanguageSpecificCasingBehavior languageSpecificCasing = GetCasingFor(lang); 321 mozilla::GreekCasing::State greekState; 322 mozilla::IrishCasing::State irishState; 323 uint32_t irishMark = uint32_t(-1); // location of possible prefix letter(s) 324 // in the output string 325 uint32_t irishMarkSrc = uint32_t(-1); // corresponding location in source 326 // string (may differ from output due 327 // to expansions like eszet -> 'SS') 328 uint32_t greekMark = uint32_t(-1); // location of uppercase ETA that may need 329 // tonos added (if it is disjunctive eta) 330 const char16_t kGreekUpperEta = 0x0397; 331 332 // If we're doing capitalization and don't have a textrun, this is the state 333 // to be passed to each call to nsLineBreaker::ShouldCapitalize. 334 bool capitalizeNext = true; 335 336 for (uint32_t i = 0; i < length; ++i, ++aOffsetInTextRun) { 337 uint32_t ch = str[i]; 338 339 RefPtr<nsTransformedCharStyle> charStyle; 340 if (aTextRun) { 341 charStyle = aTextRun->mStyles[aOffsetInTextRun]; 342 style = aGlobalTransform.valueOr(charStyle->mTextTransform); 343 forceNonFullWidth = charStyle->mForceNonFullWidth; 344 345 nsAtom* newLang = 346 charStyle->mExplicitLanguage ? charStyle->mLanguage.get() : nullptr; 347 if (lang != newLang) { 348 lang = newLang; 349 languageSpecificCasing = GetCasingFor(lang); 350 greekState.Reset(); 351 irishState.Reset(); 352 irishMark = uint32_t(-1); 353 irishMarkSrc = uint32_t(-1); 354 greekMark = uint32_t(-1); 355 } 356 } 357 358 // These should be mutually exclusive: mMaskPassword is set if we are 359 // handling <input type=password>, where the TextEditor code controls 360 // masking and we use its PasswordMask() character, in which case 361 // aMaskChar (from -webkit-text-security) is not used. 362 MOZ_ASSERT_IF(aMaskChar, !(charStyle && charStyle->mMaskPassword)); 363 364 bool maskPassword = (charStyle && charStyle->mMaskPassword) || aMaskChar; 365 int extraChars = 0; 366 const unicode::MultiCharMapping* mcm; 367 bool inhibitBreakBefore = false; // have we just deleted preceding hyphen? 368 369 if (i < length - 1 && NS_IS_SURROGATE_PAIR(ch, str[i + 1])) { 370 ch = SURROGATE_TO_UCS4(ch, str[i + 1]); 371 } 372 const uint32_t originalCh = ch; 373 374 // Skip case transform if we're masking current character. 375 if (!maskPassword) { 376 switch ((style & StyleTextTransform::CASE_TRANSFORMS)._0) { 377 case StyleTextTransform::NONE._0: 378 break; 379 case StyleTextTransform::LOWERCASE._0: 380 if (languageSpecificCasing == eLSCB_Turkish) { 381 if (ch == 'I') { 382 ch = LATIN_SMALL_LETTER_DOTLESS_I; 383 prevIsLetter = true; 384 sigmaIndex = uint32_t(-1); 385 break; 386 } 387 if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) { 388 ch = 'i'; 389 prevIsLetter = true; 390 sigmaIndex = uint32_t(-1); 391 break; 392 } 393 } 394 395 if (languageSpecificCasing == eLSCB_Lithuanian) { 396 // clang-format off 397 /* From SpecialCasing.txt: 398 * # Introduce an explicit dot above when lowercasing capital I's and J's 399 * # whenever there are more accents above. 400 * # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek) 401 * 402 * 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I 403 * 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J 404 * 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK 405 * 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE 406 * 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE 407 * 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE 408 */ 409 // clang-format on 410 if (ch == 'I' || ch == 'J' || ch == 0x012E) { 411 ch = ToLowerCase(ch); 412 prevIsLetter = true; 413 seenSoftDotted = true; 414 sigmaIndex = uint32_t(-1); 415 break; 416 } 417 if (ch == 0x00CC) { 418 aConvertedString.Append('i'); 419 aConvertedString.Append(0x0307); 420 extraChars += 2; 421 ch = 0x0300; 422 prevIsLetter = true; 423 seenSoftDotted = false; 424 sigmaIndex = uint32_t(-1); 425 break; 426 } 427 if (ch == 0x00CD) { 428 aConvertedString.Append('i'); 429 aConvertedString.Append(0x0307); 430 extraChars += 2; 431 ch = 0x0301; 432 prevIsLetter = true; 433 seenSoftDotted = false; 434 sigmaIndex = uint32_t(-1); 435 break; 436 } 437 if (ch == 0x0128) { 438 aConvertedString.Append('i'); 439 aConvertedString.Append(0x0307); 440 extraChars += 2; 441 ch = 0x0303; 442 prevIsLetter = true; 443 seenSoftDotted = false; 444 sigmaIndex = uint32_t(-1); 445 break; 446 } 447 } 448 449 cat = unicode::GetGenCategory(ch); 450 451 if (languageSpecificCasing == eLSCB_Irish && 452 cat == nsUGenCategory::kLetter) { 453 // See bug 1018805 for Irish lowercasing requirements 454 if (!prevIsLetter && (ch == 'n' || ch == 't')) { 455 ntPrefix = true; 456 } else { 457 if (ntPrefix && mozilla::IrishCasing::IsUpperVowel(ch)) { 458 aConvertedString.Append('-'); 459 ++extraChars; 460 } 461 ntPrefix = false; 462 } 463 } else { 464 ntPrefix = false; 465 } 466 467 if (seenSoftDotted && cat == nsUGenCategory::kMark) { 468 // The seenSoftDotted flag will only be set in Lithuanian mode. 469 if (ch == 0x0300 || ch == 0x0301 || ch == 0x0303) { 470 aConvertedString.Append(0x0307); 471 ++extraChars; 472 } 473 } 474 seenSoftDotted = false; 475 476 // Special lowercasing behavior for Greek Sigma: note that this is 477 // listed as context-sensitive in Unicode's SpecialCasing.txt, but is 478 // *not* a language-specific mapping; it applies regardless of the 479 // language of the element. 480 // 481 // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA 482 // (i.e. the non-final form) whenever there is a following letter, or 483 // when the CAPITAL SIGMA occurs in isolation (neither preceded nor 484 // followed by a LETTER); and to FINAL SIGMA when it is preceded by 485 // another letter but not followed by one. 486 // 487 // To implement the context-sensitive nature of this mapping, we keep 488 // track of whether the previous character was a letter. If not, 489 // CAPITAL SIGMA will map directly to SMALL SIGMA. If the previous 490 // character was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we 491 // record the position in the converted string; if we then encounter 492 // another letter, that FINAL SIGMA is replaced with a standard 493 // SMALL SIGMA. 494 495 // If sigmaIndex is not -1, it marks where we have provisionally 496 // mapped a CAPITAL SIGMA to FINAL SIGMA; if we now find another 497 // letter, we need to change it to SMALL SIGMA. 498 if (sigmaIndex != uint32_t(-1)) { 499 if (cat == nsUGenCategory::kLetter) { 500 aConvertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex); 501 } 502 } 503 504 if (ch == GREEK_CAPITAL_LETTER_SIGMA) { 505 // If preceding char was a letter, map to FINAL instead of SMALL, 506 // and note where it occurred by setting sigmaIndex; we'll change 507 // it to standard SMALL SIGMA later if another letter follows 508 if (prevIsLetter) { 509 ch = GREEK_SMALL_LETTER_FINAL_SIGMA; 510 sigmaIndex = aConvertedString.Length(); 511 } else { 512 // CAPITAL SIGMA not preceded by a letter is unconditionally 513 // mapped to SMALL SIGMA 514 ch = GREEK_SMALL_LETTER_SIGMA; 515 sigmaIndex = uint32_t(-1); 516 } 517 prevIsLetter = true; 518 break; 519 } 520 521 // ignore diacritics for the purpose of contextual sigma mapping; 522 // otherwise, reset prevIsLetter appropriately and clear the 523 // sigmaIndex marker 524 if (cat != nsUGenCategory::kMark) { 525 prevIsLetter = (cat == nsUGenCategory::kLetter); 526 sigmaIndex = uint32_t(-1); 527 } 528 529 mcm = unicode::SpecialLower(ch); 530 if (mcm) { 531 int j = 0; 532 while (j < 2 && mcm->mMappedChars[j + 1]) { 533 aConvertedString.Append(mcm->mMappedChars[j]); 534 ++extraChars; 535 ++j; 536 } 537 ch = mcm->mMappedChars[j]; 538 break; 539 } 540 541 ch = ToLowerCase(ch); 542 break; 543 544 case StyleTextTransform::UPPERCASE._0: 545 if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') { 546 ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; 547 break; 548 } 549 550 if (languageSpecificCasing == eLSCB_Greek) { 551 bool markEta; 552 bool updateEta; 553 ch = mozilla::GreekCasing::UpperCase(ch, greekState, markEta, 554 updateEta); 555 if (markEta) { 556 greekMark = aConvertedString.Length(); 557 } else if (updateEta) { 558 // Remove the TONOS from an uppercase ETA-TONOS that turned out 559 // not to be disjunctive-eta. 560 MOZ_ASSERT(aConvertedString.Length() > 0 && 561 greekMark < aConvertedString.Length(), 562 "bad greekMark!"); 563 aConvertedString.SetCharAt(kGreekUpperEta, greekMark); 564 greekMark = uint32_t(-1); 565 } 566 break; 567 } 568 569 if (languageSpecificCasing == eLSCB_Lithuanian) { 570 /* 571 * # Remove DOT ABOVE after "i" with upper or titlecase 572 * 573 * 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE 574 */ 575 if (ch == 'i' || ch == 'j' || ch == 0x012F) { 576 seenSoftDotted = true; 577 ch = ToTitleCase(ch); 578 break; 579 } 580 if (seenSoftDotted) { 581 seenSoftDotted = false; 582 if (ch == 0x0307) { 583 ch = uint32_t(-1); 584 break; 585 } 586 } 587 } 588 589 if (languageSpecificCasing == eLSCB_Irish) { 590 bool mark; 591 uint8_t action; 592 ch = mozilla::IrishCasing::UpperCase(ch, irishState, mark, action); 593 if (mark) { 594 irishMark = aConvertedString.Length(); 595 irishMarkSrc = i; 596 break; 597 } else if (action) { 598 nsString& str = aConvertedString; // shorthand 599 switch (action) { 600 case 1: 601 // lowercase a single prefix letter 602 MOZ_ASSERT(str.Length() > 0 && irishMark < str.Length(), 603 "bad irishMark!"); 604 str.SetCharAt(ToLowerCase(str[irishMark]), irishMark); 605 irishMark = uint32_t(-1); 606 irishMarkSrc = uint32_t(-1); 607 break; 608 case 2: 609 // lowercase two prefix letters (immediately before current 610 // pos) 611 MOZ_ASSERT(str.Length() >= 2 && irishMark == str.Length() - 2, 612 "bad irishMark!"); 613 str.SetCharAt(ToLowerCase(str[irishMark]), irishMark); 614 str.SetCharAt(ToLowerCase(str[irishMark + 1]), irishMark + 1); 615 irishMark = uint32_t(-1); 616 irishMarkSrc = uint32_t(-1); 617 break; 618 case 3: 619 // lowercase one prefix letter, and delete following hyphen 620 // (which must be the immediately-preceding char) 621 MOZ_ASSERT(str.Length() >= 2 && irishMark == str.Length() - 2, 622 "bad irishMark!"); 623 MOZ_ASSERT( 624 irishMark != uint32_t(-1) && irishMarkSrc != uint32_t(-1), 625 "failed to set irishMarks"); 626 str.Replace(irishMark, 2, ToLowerCase(str[irishMark])); 627 aDeletedCharsArray[irishMarkSrc + 1] = true; 628 // Remove the trailing entries (corresponding to the deleted 629 // hyphen) from the auxiliary arrays. 630 uint32_t len = aCharsToMergeArray.Length(); 631 MOZ_ASSERT(len >= 2); 632 aCharsToMergeArray.TruncateLength(len - 1); 633 if (auxiliaryOutputArrays) { 634 MOZ_ASSERT(aStyleArray->Length() == len); 635 MOZ_ASSERT(aCanBreakBeforeArray->Length() == len); 636 aStyleArray->TruncateLength(len - 1); 637 aCanBreakBeforeArray->TruncateLength(len - 1); 638 inhibitBreakBefore = true; 639 } 640 mergeNeeded = true; 641 irishMark = uint32_t(-1); 642 irishMarkSrc = uint32_t(-1); 643 break; 644 } 645 // ch has been set to the uppercase for current char; 646 // No need to check for SpecialUpper here as none of the 647 // characters that could trigger an Irish casing action have 648 // special mappings. 649 break; 650 } 651 // If we didn't have any special action to perform, fall through 652 // to check for special uppercase (ß) 653 } 654 655 // Updated mapping for German eszett, not currently reflected in the 656 // Unicode data files. This is behind a pref, as it may not work well 657 // with many (esp. older) fonts. 658 if (ch == 0x00DF && 659 StaticPrefs:: 660 layout_css_text_transform_uppercase_eszett_enabled()) { 661 ch = 0x1E9E; 662 break; 663 } 664 665 mcm = unicode::SpecialUpper(ch); 666 if (mcm) { 667 int j = 0; 668 while (j < 2 && mcm->mMappedChars[j + 1]) { 669 aConvertedString.Append(mcm->mMappedChars[j]); 670 ++extraChars; 671 ++j; 672 } 673 ch = mcm->mMappedChars[j]; 674 break; 675 } 676 677 ch = ToUpperCase(ch); 678 break; 679 680 case StyleTextTransform::CAPITALIZE._0: { 681 if (capitalizeDutchIJ && ch == 'j') { 682 ch = 'J'; 683 capitalizeDutchIJ = false; 684 break; 685 } 686 capitalizeDutchIJ = false; 687 // If we have a textrun, its mCapitalize array tells us which chars 688 // are to be capitalized. If not, we track the state locally, and 689 // assume there's no context to be considered. 690 bool doCapitalize = false; 691 if (aTextRun) { 692 if (aOffsetInTextRun < aTextRun->mCapitalize.Length()) { 693 doCapitalize = aTextRun->mCapitalize[aOffsetInTextRun]; 694 } 695 } else { 696 doCapitalize = nsLineBreaker::ShouldCapitalize(ch, capitalizeNext); 697 } 698 if (doCapitalize) { 699 if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') { 700 ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; 701 break; 702 } 703 if (languageSpecificCasing == eLSCB_Dutch && ch == 'i') { 704 ch = 'I'; 705 capitalizeDutchIJ = true; 706 break; 707 } 708 if (languageSpecificCasing == eLSCB_Lithuanian) { 709 /* 710 * # Remove DOT ABOVE after "i" with upper or titlecase 711 * 712 * 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE 713 */ 714 if (ch == 'i' || ch == 'j' || ch == 0x012F) { 715 seenSoftDotted = true; 716 ch = ToTitleCase(ch); 717 break; 718 } 719 if (seenSoftDotted) { 720 seenSoftDotted = false; 721 if (ch == 0x0307) { 722 ch = uint32_t(-1); 723 break; 724 } 725 } 726 } 727 728 mcm = unicode::SpecialTitle(ch); 729 if (mcm) { 730 int j = 0; 731 while (j < 2 && mcm->mMappedChars[j + 1]) { 732 aConvertedString.Append(mcm->mMappedChars[j]); 733 ++extraChars; 734 ++j; 735 } 736 ch = mcm->mMappedChars[j]; 737 break; 738 } 739 740 ch = ToTitleCase(ch); 741 } 742 break; 743 } 744 745 case StyleTextTransform::MATH_AUTO._0: 746 // text-transform: math-auto is used for automatic italicization of 747 // single-char <mi> elements. However, some legacy cases (italic style 748 // fallback and <mi> with leading/trailing whitespace) are still 749 // handled in MathMLTextRunFactory. 750 if (length == 1) { 751 uint32_t ch2 = 752 MathMLTextRunFactory::MathVariant(ch, StyleMathVariant::Italic); 753 if (StaticPrefs::mathml_mathvariant_styling_fallback_disabled()) { 754 ch = ch2; 755 } else if (ch2 != ch) { 756 // Bug 930504. Some platforms do not have fonts for Mathematical 757 // Alphanumeric Symbols. Hence we only perform the transform if a 758 // character is actually available. 759 auto* fontGroup = aTextRun->GetFontGroup(); 760 fontGroup->EnsureFontList(); 761 FontMatchType matchType; 762 RefPtr<gfxFont> mathFont = fontGroup->FindFontForChar( 763 ch2, 0, 0, intl::Script::COMMON, nullptr, &matchType); 764 if (mathFont) { 765 ch = ch2; 766 } 767 } 768 } 769 break; 770 default: 771 MOZ_ASSERT_UNREACHABLE("all cases should be handled"); 772 break; 773 } 774 775 if (!aCaseTransformsOnly) { 776 if (!forceNonFullWidth && (style & StyleTextTransform::FULL_WIDTH)) { 777 ch = unicode::GetFullWidth(ch); 778 } 779 780 if (style & StyleTextTransform::FULL_SIZE_KANA) { 781 // clang-format off 782 static const uint32_t kSmallKanas[] = { 783 // ぁ ぃ ぅ ぇ ぉ っ ゃ ゅ ょ 784 0x3041, 0x3043, 0x3045, 0x3047, 0x3049, 0x3063, 0x3083, 0x3085, 0x3087, 785 // ゎ ゕ ゖ 786 0x308E, 0x3095, 0x3096, 787 // ァ ィ ゥ ェ ォ ッ ャ ュ ョ 788 0x30A1, 0x30A3, 0x30A5, 0x30A7, 0x30A9, 0x30C3, 0x30E3, 0x30E5, 0x30E7, 789 // ヮ ヵ ヶ ㇰ ㇱ ㇲ ㇳ ㇴ ㇵ 790 0x30EE, 0x30F5, 0x30F6, 0x31F0, 0x31F1, 0x31F2, 0x31F3, 0x31F4, 0x31F5, 791 // ㇶ ㇷ ㇸ ㇹ ㇺ ㇻ ㇼ ㇽ ㇾ 792 0x31F6, 0x31F7, 0x31F8, 0x31F9, 0x31FA, 0x31FB, 0x31FC, 0x31FD, 0x31FE, 793 // ㇿ 794 0x31FF, 795 // ァ ィ ゥ ェ ォ ャ ュ ョ ッ 796 0xFF67, 0xFF68, 0xFF69, 0xFF6A, 0xFF6B, 0xFF6C, 0xFF6D, 0xFF6E, 0xFF6F, 797 // 𛄲 𛅐 𛅑 𛅒 𛅕 𛅤 𛅥 𛅦 798 0x1B132, 0x1B150, 0x1B151, 0x1B152, 0x1B155, 0x1B164, 0x1B165, 0x1B166, 799 // 𛅧 800 0x1B167}; 801 static const uint16_t kFullSizeKanas[] = { 802 // あ い う え お つ や ゆ よ 803 0x3042, 0x3044, 0x3046, 0x3048, 0x304A, 0x3064, 0x3084, 0x3086, 0x3088, 804 // わ か け 805 0x308F, 0x304B, 0x3051, 806 // ア イ ウ エ オ ツ ヤ ユ ヨ 807 0x30A2, 0x30A4, 0x30A6, 0x30A8, 0x30AA, 0x30C4, 0x30E4, 0x30E6, 0x30E8, 808 // ワ カ ケ ク シ ス ト ヌ ハ 809 0x30EF, 0x30AB, 0x30B1, 0x30AF, 0x30B7, 0x30B9, 0x30C8, 0x30CC, 0x30CF, 810 // ヒ フ ヘ ホ ム ラ リ ル レ 811 0x30D2, 0x30D5, 0x30D8, 0x30DB, 0x30E0, 0x30E9, 0x30EA, 0x30EB, 0x30EC, 812 // ロ 813 0x30ED, 814 // ア イ ウ エ オ ヤ ユ ヨ ツ 815 0xFF71, 0xFF72, 0xFF73, 0xFF74, 0xFF75, 0xFF94, 0xFF95, 0xFF96, 0xFF82, 816 // こ ゐ ゑ を コ ヰ ヱ ヲ ン 817 0x3053, 0x3090, 0x3091, 0x3092, 0x30B3, 0x30F0, 0x30F1, 0x30F2, 0x30F3}; 818 // clang-format on 819 820 size_t index; 821 const uint16_t len = std::size(kSmallKanas); 822 if (mozilla::BinarySearch(kSmallKanas, 0, len, ch, &index)) { 823 ch = kFullSizeKanas[index]; 824 } 825 } 826 } 827 828 if (forceNonFullWidth) { 829 ch = unicode::GetFullWidthInverse(ch); 830 } 831 } 832 833 if (ch == uint32_t(-1)) { 834 aDeletedCharsArray.AppendElement(true); 835 mergeNeeded = true; 836 } else { 837 aDeletedCharsArray.AppendElement(false); 838 aCharsToMergeArray.AppendElement(false); 839 if (auxiliaryOutputArrays) { 840 aStyleArray->AppendElement(charStyle); 841 aCanBreakBeforeArray->AppendElement( 842 inhibitBreakBefore 843 ? gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE 844 : aTextRun->CanBreakBefore(aOffsetInTextRun)); 845 } 846 847 if (IS_IN_BMP(ch)) { 848 aConvertedString.Append(maskPassword ? mask : ch); 849 } else { 850 if (maskPassword) { 851 aConvertedString.Append(mask); 852 // TODO: We should show a password mask for a surrogate pair later. 853 aConvertedString.Append(mask); 854 } else { 855 aConvertedString.Append(H_SURROGATE(ch)); 856 aConvertedString.Append(L_SURROGATE(ch)); 857 } 858 ++extraChars; 859 } 860 if (!IS_IN_BMP(originalCh)) { 861 // Skip the trailing surrogate. 862 ++aOffsetInTextRun; 863 ++i; 864 aDeletedCharsArray.AppendElement(true); 865 } 866 867 while (extraChars-- > 0) { 868 mergeNeeded = true; 869 aCharsToMergeArray.AppendElement(true); 870 if (auxiliaryOutputArrays) { 871 aStyleArray->AppendElement(charStyle); 872 aCanBreakBeforeArray->AppendElement( 873 gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE); 874 } 875 } 876 } 877 } 878 879 // These output arrays, if present, must always have matching lengths: 880 if (auxiliaryOutputArrays) { 881 DebugOnly<uint32_t> len = aCharsToMergeArray.Length(); 882 MOZ_ASSERT(aStyleArray->Length() == len); 883 MOZ_ASSERT(aCanBreakBeforeArray->Length() == len); 884 } 885 886 return mergeNeeded; 887 } 888 889 void nsCaseTransformTextRunFactory::RebuildTextRun( 890 nsTransformedTextRun* aTextRun, DrawTarget* aRefDrawTarget, 891 gfxMissingFontRecorder* aMFR) { 892 nsAutoString convertedString; 893 AutoTArray<bool, 50> charsToMergeArray; 894 AutoTArray<bool, 50> deletedCharsArray; 895 AutoTArray<uint8_t, 50> canBreakBeforeArray; 896 AutoTArray<RefPtr<nsTransformedCharStyle>, 50> styleArray; 897 898 auto globalTransform = 899 mAllUppercase ? Some(StyleTextTransform::UPPERCASE) : Nothing(); 900 bool mergeNeeded = TransformString( 901 aTextRun->mString, convertedString, globalTransform, mMaskChar, 902 /* aCaseTransformsOnly = */ false, nullptr, charsToMergeArray, 903 deletedCharsArray, aTextRun, 0, &canBreakBeforeArray, &styleArray); 904 905 gfx::ShapedTextFlags flags; 906 gfxTextRunFactory::Parameters innerParams = 907 GetParametersForInner(aTextRun, &flags, aRefDrawTarget); 908 gfxFontGroup* fontGroup = aTextRun->GetFontGroup(); 909 910 RefPtr<nsTransformedTextRun> transformedChild; 911 RefPtr<gfxTextRun> cachedChild; 912 gfxTextRun* child; 913 914 if (mInnerTransformingTextRunFactory) { 915 transformedChild = mInnerTransformingTextRunFactory->MakeTextRun( 916 convertedString.BeginReading(), convertedString.Length(), &innerParams, 917 fontGroup, flags, nsTextFrameUtils::Flags(), std::move(styleArray), 918 false); 919 child = transformedChild.get(); 920 } else { 921 cachedChild = fontGroup->MakeTextRun( 922 convertedString.BeginReading(), convertedString.Length(), &innerParams, 923 flags, nsTextFrameUtils::Flags(), aMFR); 924 child = cachedChild.get(); 925 } 926 if (!child) { 927 return; 928 } 929 // Copy potential linebreaks into child so they're preserved 930 // (and also child will be shaped appropriately) 931 NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(), 932 "Dropped characters or break-before values somewhere!"); 933 gfxTextRun::Range range(0, uint32_t(canBreakBeforeArray.Length())); 934 child->SetPotentialLineBreaks(range, canBreakBeforeArray.Elements()); 935 if (transformedChild) { 936 transformedChild->FinishSettingProperties(aRefDrawTarget, aMFR); 937 } 938 939 aTextRun->ResetGlyphRuns(); 940 if (mergeNeeded) { 941 // Now merge multiple characters into one multi-glyph character as required 942 // and deal with skipping deleted accent chars 943 NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(), 944 "source length mismatch"); 945 NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(), 946 "destination length mismatch"); 947 MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(), 948 deletedCharsArray.Elements()); 949 } else { 950 // No merging to do, so just copy; this produces a more optimized textrun. 951 // We can't steal the data because the child may be cached and stealing 952 // the data would break the cache. 953 aTextRun->CopyGlyphDataFrom(child, gfxTextRun::Range(child), 0); 954 } 955 }