tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

nsTextRunTransformations.cpp (39157B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "nsTextRunTransformations.h"
      8 
      9 #include <utility>
     10 
     11 #include "GreekCasing.h"
     12 #include "IrishCasing.h"
     13 #include "MathMLTextRunFactory.h"
     14 #include "mozilla/ComputedStyleInlines.h"
     15 #include "mozilla/MemoryReporting.h"
     16 #include "mozilla/StaticPrefs_layout.h"
     17 #include "mozilla/StaticPrefs_mathml.h"
     18 #include "mozilla/TextEditor.h"
     19 #include "mozilla/gfx/2D.h"
     20 #include "nsGkAtoms.h"
     21 #include "nsLineBreaker.h"
     22 #include "nsSpecialCasingData.h"
     23 #include "nsStyleConsts.h"
     24 #include "nsStyleUtil.h"
     25 #include "nsTextFrameUtils.h"
     26 #include "nsUnicharUtils.h"
     27 #include "nsUnicodeProperties.h"
     28 
     29 using namespace mozilla;
     30 using namespace mozilla::gfx;
     31 
     32 // Unicode characters needing special casing treatment in tr/az languages
     33 #define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE 0x0130
     34 #define LATIN_SMALL_LETTER_DOTLESS_I 0x0131
     35 
     36 // Greek sigma needs custom handling for the lowercase transform; for details
     37 // see bug 740120.
     38 #define GREEK_CAPITAL_LETTER_SIGMA 0x03A3
     39 #define GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2
     40 #define GREEK_SMALL_LETTER_SIGMA 0x03C3
     41 
     42 already_AddRefed<nsTransformedTextRun> nsTransformedTextRun::Create(
     43    const gfxTextRunFactory::Parameters* aParams,
     44    nsTransformingTextRunFactory* aFactory, gfxFontGroup* aFontGroup,
     45    const char16_t* aString, uint32_t aLength,
     46    const gfx::ShapedTextFlags aFlags, const nsTextFrameUtils::Flags aFlags2,
     47    nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
     48  NS_ASSERTION(!(aFlags & gfx::ShapedTextFlags::TEXT_IS_8BIT),
     49               "didn't expect text to be marked as 8-bit here");
     50 
     51  void* storage =
     52      AllocateStorageForTextRun(sizeof(nsTransformedTextRun), aLength);
     53  if (!storage) {
     54    return nullptr;
     55  }
     56 
     57  RefPtr<nsTransformedTextRun> result = new (storage)
     58      nsTransformedTextRun(aParams, aFactory, aFontGroup, aString, aLength,
     59                           aFlags, aFlags2, std::move(aStyles), aOwnsFactory);
     60  return result.forget();
     61 }
     62 
     63 void nsTransformedTextRun::SetCapitalization(uint32_t aStart, uint32_t aLength,
     64                                             bool* aCapitalization) {
     65  if (mCapitalize.IsEmpty()) {
     66    // XXX(Bug 1631371) Check if this should use a fallible operation as it
     67    // pretended earlier.
     68    mCapitalize.AppendElements(GetLength());
     69    memset(mCapitalize.Elements(), 0, GetLength() * sizeof(bool));
     70  }
     71  memcpy(mCapitalize.Elements() + aStart, aCapitalization,
     72         aLength * sizeof(bool));
     73  mNeedsRebuild = true;
     74 }
     75 
     76 bool nsTransformedTextRun::SetPotentialLineBreaks(Range aRange,
     77                                                  const uint8_t* aBreakBefore) {
     78  bool changed = gfxTextRun::SetPotentialLineBreaks(aRange, aBreakBefore);
     79  if (changed) {
     80    mNeedsRebuild = true;
     81  }
     82  return changed;
     83 }
     84 
     85 void nsTransformedTextRun::SetEmergencyWrapPositions() {
     86  // This parallels part of what gfxShapedText::SetupClusterBoundaries() does
     87  // for normal textruns.
     88  bool prevWasHyphen = false;
     89  for (uint32_t pos : IntegerRange(mString.Length())) {
     90    const char16_t ch = mString[pos];
     91    if (prevWasHyphen) {
     92      if (nsContentUtils::IsAlphanumeric(ch)) {
     93        mCharacterGlyphs[pos].SetCanBreakBefore(
     94            CompressedGlyph::FLAG_BREAK_TYPE_EMERGENCY_WRAP);
     95      }
     96      prevWasHyphen = false;
     97    }
     98    if (nsContentUtils::IsHyphen(ch) && pos &&
     99        nsContentUtils::IsAlphanumeric(mString[pos - 1])) {
    100      prevWasHyphen = true;
    101    }
    102  }
    103 }
    104 
    105 size_t nsTransformedTextRun::SizeOfExcludingThis(
    106    mozilla::MallocSizeOf aMallocSizeOf) {
    107  size_t total = gfxTextRun::SizeOfExcludingThis(aMallocSizeOf);
    108  total += mStyles.ShallowSizeOfExcludingThis(aMallocSizeOf);
    109  total += mCapitalize.ShallowSizeOfExcludingThis(aMallocSizeOf);
    110  if (mOwnsFactory) {
    111    total += aMallocSizeOf(mFactory);
    112  }
    113  return total;
    114 }
    115 
    116 size_t nsTransformedTextRun::SizeOfIncludingThis(
    117    mozilla::MallocSizeOf aMallocSizeOf) {
    118  return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
    119 }
    120 
    121 already_AddRefed<nsTransformedTextRun>
    122 nsTransformingTextRunFactory::MakeTextRun(
    123    const char16_t* aString, uint32_t aLength,
    124    const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup,
    125    gfx::ShapedTextFlags aFlags, nsTextFrameUtils::Flags aFlags2,
    126    nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
    127  return nsTransformedTextRun::Create(aParams, this, aFontGroup, aString,
    128                                      aLength, aFlags, aFlags2,
    129                                      std::move(aStyles), aOwnsFactory);
    130 }
    131 
    132 already_AddRefed<nsTransformedTextRun>
    133 nsTransformingTextRunFactory::MakeTextRun(
    134    const uint8_t* aString, uint32_t aLength,
    135    const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup,
    136    gfx::ShapedTextFlags aFlags, nsTextFrameUtils::Flags aFlags2,
    137    nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
    138  // We'll only have a Unicode code path to minimize the amount of code needed
    139  // for these rarely used features
    140  NS_ConvertASCIItoUTF16 unicodeString(reinterpret_cast<const char*>(aString),
    141                                       aLength);
    142  return MakeTextRun(unicodeString.get(), aLength, aParams, aFontGroup,
    143                     aFlags & ~gfx::ShapedTextFlags::TEXT_IS_8BIT, aFlags2,
    144                     std::move(aStyles), aOwnsFactory);
    145 }
    146 
    147 void MergeCharactersInTextRun(gfxTextRun* aDest, gfxTextRun* aSrc,
    148                              const bool* aCharsToMerge,
    149                              const bool* aDeletedChars) {
    150  MOZ_ASSERT(!aDest->TrailingGlyphRun(), "unexpected glyphRuns in aDest!");
    151  uint32_t offset = 0;
    152  AutoTArray<gfxTextRun::DetailedGlyph, 2> glyphs;
    153  const gfxTextRun::CompressedGlyph continuationGlyph =
    154      gfxTextRun::CompressedGlyph::MakeComplex(false, false);
    155  const gfxTextRun::CompressedGlyph* srcGlyphs = aSrc->GetCharacterGlyphs();
    156  gfxTextRun::CompressedGlyph* destGlyphs = aDest->GetCharacterGlyphs();
    157  for (gfxTextRun::GlyphRunIterator iter(aSrc, gfxTextRun::Range(aSrc));
    158       !iter.AtEnd(); iter.NextRun()) {
    159    const gfxTextRun::GlyphRun* run = iter.GlyphRun();
    160    aDest->AddGlyphRun(run->mFont, run->mMatchType, offset, false,
    161                       run->mOrientation, run->mIsCJK);
    162 
    163    bool anyMissing = false;
    164    uint32_t mergeRunStart = iter.StringStart();
    165    // Initialize to a copy of the first source glyph in the merge run.
    166    gfxTextRun::CompressedGlyph mergedGlyph = srcGlyphs[mergeRunStart];
    167    uint32_t stringEnd = iter.StringEnd();
    168    for (uint32_t k = iter.StringStart(); k < stringEnd; ++k) {
    169      const gfxTextRun::CompressedGlyph g = srcGlyphs[k];
    170      if (g.IsSimpleGlyph()) {
    171        if (!anyMissing) {
    172          gfxTextRun::DetailedGlyph details;
    173          details.mGlyphID = g.GetSimpleGlyph();
    174          details.mAdvance = g.GetSimpleAdvance();
    175          glyphs.AppendElement(details);
    176        }
    177      } else {
    178        if (g.IsMissing()) {
    179          anyMissing = true;
    180          glyphs.Clear();
    181        }
    182        if (g.GetGlyphCount() > 0) {
    183          glyphs.AppendElements(aSrc->GetDetailedGlyphs(k), g.GetGlyphCount());
    184        }
    185      }
    186 
    187      if (k + 1 < iter.StringEnd() && aCharsToMerge[k + 1]) {
    188        // next char is supposed to merge with current, so loop without
    189        // writing current merged glyph to the destination
    190        continue;
    191      }
    192 
    193      // If the start of the merge run is actually a character that should
    194      // have been merged with the previous character (this can happen
    195      // if there's a font change in the middle of a case-mapped character,
    196      // that decomposed into a sequence of base+diacritics, for example),
    197      // just discard the entire merge run. See comment at start of this
    198      // function.
    199      NS_WARNING_ASSERTION(
    200          !aCharsToMerge[mergeRunStart],
    201          "unable to merge across a glyph run boundary, glyph(s) discarded");
    202      if (!aCharsToMerge[mergeRunStart]) {
    203        // Determine if we can just copy the existing simple glyph record.
    204        if (mergedGlyph.IsSimpleGlyph() && glyphs.Length() == 1) {
    205          destGlyphs[offset] = mergedGlyph;
    206        } else {
    207          // Otherwise set up complex glyph record and store detailed glyphs.
    208          mergedGlyph.SetComplex(mergedGlyph.IsClusterStart(),
    209                                 mergedGlyph.IsLigatureGroupStart());
    210          destGlyphs[offset] = mergedGlyph;
    211          aDest->SetDetailedGlyphs(offset, glyphs.Length(), glyphs.Elements());
    212          if (anyMissing) {
    213            destGlyphs[offset].SetMissing();
    214          }
    215        }
    216        offset++;
    217 
    218        while (offset < aDest->GetLength() && aDeletedChars[offset]) {
    219          destGlyphs[offset++] = continuationGlyph;
    220        }
    221      }
    222 
    223      glyphs.Clear();
    224      anyMissing = false;
    225      mergeRunStart = k + 1;
    226      if (mergeRunStart < stringEnd) {
    227        mergedGlyph = srcGlyphs[mergeRunStart];
    228      }
    229    }
    230    NS_ASSERTION(glyphs.Length() == 0,
    231                 "Leftover glyphs, don't request merging of the last character "
    232                 "with its next!");
    233  }
    234  NS_ASSERTION(offset == aDest->GetLength(), "Bad offset calculations");
    235 }
    236 
    237 gfxTextRunFactory::Parameters GetParametersForInner(
    238    nsTransformedTextRun* aTextRun, gfx::ShapedTextFlags* aFlags,
    239    DrawTarget* aRefDrawTarget) {
    240  gfxTextRunFactory::Parameters params = {
    241      aRefDrawTarget, nullptr, nullptr,
    242      nullptr,        0,       aTextRun->GetAppUnitsPerDevUnit()};
    243  *aFlags = aTextRun->GetFlags();
    244  return params;
    245 }
    246 
    247 // Some languages have special casing conventions that differ from the
    248 // default Unicode mappings.
    249 // The enum values here are named for well-known exemplar languages that
    250 // exhibit the behavior in question; multiple lang tags may map to the
    251 // same setting here, if the behavior is shared by other languages.
    252 enum LanguageSpecificCasingBehavior {
    253  eLSCB_None,       // default non-lang-specific behavior
    254  eLSCB_Dutch,      // treat "ij" digraph as a unit for capitalization
    255  eLSCB_Greek,      // strip accent when uppercasing Greek vowels
    256  eLSCB_Irish,      // keep prefix letters as lowercase when uppercasing Irish
    257  eLSCB_Turkish,    // preserve dotted/dotless-i distinction in uppercase
    258  eLSCB_Lithuanian  // retain dot on lowercase i/j when an accent is present
    259 };
    260 
    261 static LanguageSpecificCasingBehavior GetCasingFor(const nsAtom* aLang) {
    262  if (!aLang) {
    263    return eLSCB_None;
    264  }
    265  if (nsStyleUtil::MatchesLanguagePrefix(aLang, u"tr") ||
    266      nsStyleUtil::MatchesLanguagePrefix(aLang, u"az") ||
    267      nsStyleUtil::MatchesLanguagePrefix(aLang, u"ba") ||
    268      nsStyleUtil::MatchesLanguagePrefix(aLang, u"crh") ||
    269      nsStyleUtil::MatchesLanguagePrefix(aLang, u"tt")) {
    270    return eLSCB_Turkish;
    271  }
    272  if (nsStyleUtil::MatchesLanguagePrefix(aLang, u"nl")) {
    273    return eLSCB_Dutch;
    274  }
    275  if (nsStyleUtil::MatchesLanguagePrefix(aLang, u"el")) {
    276    return eLSCB_Greek;
    277  }
    278  if (nsStyleUtil::MatchesLanguagePrefix(aLang, u"ga")) {
    279    return eLSCB_Irish;
    280  }
    281  if (nsStyleUtil::MatchesLanguagePrefix(aLang, u"lt")) {
    282    return eLSCB_Lithuanian;
    283  }
    284  return eLSCB_None;
    285 }
    286 
    287 bool nsCaseTransformTextRunFactory::TransformString(
    288    const nsAString& aString, nsString& aConvertedString,
    289    const Maybe<StyleTextTransform>& aGlobalTransform, char16_t aMaskChar,
    290    bool aCaseTransformsOnly, const nsAtom* aLanguage,
    291    nsTArray<bool>& aCharsToMergeArray, nsTArray<bool>& aDeletedCharsArray,
    292    const nsTransformedTextRun* aTextRun, uint32_t aOffsetInTextRun,
    293    nsTArray<uint8_t>* aCanBreakBeforeArray,
    294    nsTArray<RefPtr<nsTransformedCharStyle>>* aStyleArray) {
    295  bool auxiliaryOutputArrays = aCanBreakBeforeArray && aStyleArray;
    296  MOZ_ASSERT(!auxiliaryOutputArrays || aTextRun,
    297             "text run must be provided to use aux output arrays");
    298 
    299  uint32_t length = aString.Length();
    300  const char16_t* str = aString.BeginReading();
    301  // If an unconditional mask character was passed, we'll use it; if not, any
    302  // masking called for by the textrun styles will use TextEditor's mask char.
    303  const char16_t mask = aMaskChar ? aMaskChar : TextEditor::PasswordMask();
    304 
    305  bool mergeNeeded = false;
    306 
    307  bool capitalizeDutchIJ = false;
    308  bool prevIsLetter = false;
    309  bool ntPrefix = false;  // true immediately after a word-initial 'n' or 't'
    310                          // when doing Irish lowercasing
    311  bool seenSoftDotted = false;  // true immediately after an I or J that is
    312                                // converted to lowercase in Lithuanian mode
    313  uint32_t sigmaIndex = uint32_t(-1);
    314  nsUGenCategory cat;
    315 
    316  StyleTextTransform style = aGlobalTransform.valueOr(StyleTextTransform::NONE);
    317  bool forceNonFullWidth = false;
    318  const nsAtom* lang = aLanguage;
    319 
    320  LanguageSpecificCasingBehavior languageSpecificCasing = GetCasingFor(lang);
    321  mozilla::GreekCasing::State greekState;
    322  mozilla::IrishCasing::State irishState;
    323  uint32_t irishMark = uint32_t(-1);  // location of possible prefix letter(s)
    324                                      // in the output string
    325  uint32_t irishMarkSrc = uint32_t(-1);  // corresponding location in source
    326                                         // string (may differ from output due
    327                                         // to expansions like eszet -> 'SS')
    328  uint32_t greekMark = uint32_t(-1);  // location of uppercase ETA that may need
    329                                      // tonos added (if it is disjunctive eta)
    330  const char16_t kGreekUpperEta = 0x0397;
    331 
    332  // If we're doing capitalization and don't have a textrun, this is the state
    333  // to be passed to each call to nsLineBreaker::ShouldCapitalize.
    334  bool capitalizeNext = true;
    335 
    336  for (uint32_t i = 0; i < length; ++i, ++aOffsetInTextRun) {
    337    uint32_t ch = str[i];
    338 
    339    RefPtr<nsTransformedCharStyle> charStyle;
    340    if (aTextRun) {
    341      charStyle = aTextRun->mStyles[aOffsetInTextRun];
    342      style = aGlobalTransform.valueOr(charStyle->mTextTransform);
    343      forceNonFullWidth = charStyle->mForceNonFullWidth;
    344 
    345      nsAtom* newLang =
    346          charStyle->mExplicitLanguage ? charStyle->mLanguage.get() : nullptr;
    347      if (lang != newLang) {
    348        lang = newLang;
    349        languageSpecificCasing = GetCasingFor(lang);
    350        greekState.Reset();
    351        irishState.Reset();
    352        irishMark = uint32_t(-1);
    353        irishMarkSrc = uint32_t(-1);
    354        greekMark = uint32_t(-1);
    355      }
    356    }
    357 
    358    // These should be mutually exclusive: mMaskPassword is set if we are
    359    // handling <input type=password>, where the TextEditor code controls
    360    // masking and we use its PasswordMask() character, in which case
    361    // aMaskChar (from -webkit-text-security) is not used.
    362    MOZ_ASSERT_IF(aMaskChar, !(charStyle && charStyle->mMaskPassword));
    363 
    364    bool maskPassword = (charStyle && charStyle->mMaskPassword) || aMaskChar;
    365    int extraChars = 0;
    366    const unicode::MultiCharMapping* mcm;
    367    bool inhibitBreakBefore = false;  // have we just deleted preceding hyphen?
    368 
    369    if (i < length - 1 && NS_IS_SURROGATE_PAIR(ch, str[i + 1])) {
    370      ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
    371    }
    372    const uint32_t originalCh = ch;
    373 
    374    // Skip case transform if we're masking current character.
    375    if (!maskPassword) {
    376      switch ((style & StyleTextTransform::CASE_TRANSFORMS)._0) {
    377        case StyleTextTransform::NONE._0:
    378          break;
    379        case StyleTextTransform::LOWERCASE._0:
    380          if (languageSpecificCasing == eLSCB_Turkish) {
    381            if (ch == 'I') {
    382              ch = LATIN_SMALL_LETTER_DOTLESS_I;
    383              prevIsLetter = true;
    384              sigmaIndex = uint32_t(-1);
    385              break;
    386            }
    387            if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
    388              ch = 'i';
    389              prevIsLetter = true;
    390              sigmaIndex = uint32_t(-1);
    391              break;
    392            }
    393          }
    394 
    395          if (languageSpecificCasing == eLSCB_Lithuanian) {
    396            // clang-format off
    397            /* From SpecialCasing.txt:
    398             * # Introduce an explicit dot above when lowercasing capital I's and J's
    399             * # whenever there are more accents above.
    400             * # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
    401             *
    402             * 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
    403             * 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
    404             * 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
    405             * 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
    406             * 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
    407             * 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
    408             */
    409            // clang-format on
    410            if (ch == 'I' || ch == 'J' || ch == 0x012E) {
    411              ch = ToLowerCase(ch);
    412              prevIsLetter = true;
    413              seenSoftDotted = true;
    414              sigmaIndex = uint32_t(-1);
    415              break;
    416            }
    417            if (ch == 0x00CC) {
    418              aConvertedString.Append('i');
    419              aConvertedString.Append(0x0307);
    420              extraChars += 2;
    421              ch = 0x0300;
    422              prevIsLetter = true;
    423              seenSoftDotted = false;
    424              sigmaIndex = uint32_t(-1);
    425              break;
    426            }
    427            if (ch == 0x00CD) {
    428              aConvertedString.Append('i');
    429              aConvertedString.Append(0x0307);
    430              extraChars += 2;
    431              ch = 0x0301;
    432              prevIsLetter = true;
    433              seenSoftDotted = false;
    434              sigmaIndex = uint32_t(-1);
    435              break;
    436            }
    437            if (ch == 0x0128) {
    438              aConvertedString.Append('i');
    439              aConvertedString.Append(0x0307);
    440              extraChars += 2;
    441              ch = 0x0303;
    442              prevIsLetter = true;
    443              seenSoftDotted = false;
    444              sigmaIndex = uint32_t(-1);
    445              break;
    446            }
    447          }
    448 
    449          cat = unicode::GetGenCategory(ch);
    450 
    451          if (languageSpecificCasing == eLSCB_Irish &&
    452              cat == nsUGenCategory::kLetter) {
    453            // See bug 1018805 for Irish lowercasing requirements
    454            if (!prevIsLetter && (ch == 'n' || ch == 't')) {
    455              ntPrefix = true;
    456            } else {
    457              if (ntPrefix && mozilla::IrishCasing::IsUpperVowel(ch)) {
    458                aConvertedString.Append('-');
    459                ++extraChars;
    460              }
    461              ntPrefix = false;
    462            }
    463          } else {
    464            ntPrefix = false;
    465          }
    466 
    467          if (seenSoftDotted && cat == nsUGenCategory::kMark) {
    468            // The seenSoftDotted flag will only be set in Lithuanian mode.
    469            if (ch == 0x0300 || ch == 0x0301 || ch == 0x0303) {
    470              aConvertedString.Append(0x0307);
    471              ++extraChars;
    472            }
    473          }
    474          seenSoftDotted = false;
    475 
    476          // Special lowercasing behavior for Greek Sigma: note that this is
    477          // listed as context-sensitive in Unicode's SpecialCasing.txt, but is
    478          // *not* a language-specific mapping; it applies regardless of the
    479          // language of the element.
    480          //
    481          // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA
    482          // (i.e. the non-final form) whenever there is a following letter, or
    483          // when the CAPITAL SIGMA occurs in isolation (neither preceded nor
    484          // followed by a LETTER); and to FINAL SIGMA when it is preceded by
    485          // another letter but not followed by one.
    486          //
    487          // To implement the context-sensitive nature of this mapping, we keep
    488          // track of whether the previous character was a letter. If not,
    489          // CAPITAL SIGMA will map directly to SMALL SIGMA. If the previous
    490          // character was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we
    491          // record the position in the converted string; if we then encounter
    492          // another letter, that FINAL SIGMA is replaced with a standard
    493          // SMALL SIGMA.
    494 
    495          // If sigmaIndex is not -1, it marks where we have provisionally
    496          // mapped a CAPITAL SIGMA to FINAL SIGMA; if we now find another
    497          // letter, we need to change it to SMALL SIGMA.
    498          if (sigmaIndex != uint32_t(-1)) {
    499            if (cat == nsUGenCategory::kLetter) {
    500              aConvertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex);
    501            }
    502          }
    503 
    504          if (ch == GREEK_CAPITAL_LETTER_SIGMA) {
    505            // If preceding char was a letter, map to FINAL instead of SMALL,
    506            // and note where it occurred by setting sigmaIndex; we'll change
    507            // it to standard SMALL SIGMA later if another letter follows
    508            if (prevIsLetter) {
    509              ch = GREEK_SMALL_LETTER_FINAL_SIGMA;
    510              sigmaIndex = aConvertedString.Length();
    511            } else {
    512              // CAPITAL SIGMA not preceded by a letter is unconditionally
    513              // mapped to SMALL SIGMA
    514              ch = GREEK_SMALL_LETTER_SIGMA;
    515              sigmaIndex = uint32_t(-1);
    516            }
    517            prevIsLetter = true;
    518            break;
    519          }
    520 
    521          // ignore diacritics for the purpose of contextual sigma mapping;
    522          // otherwise, reset prevIsLetter appropriately and clear the
    523          // sigmaIndex marker
    524          if (cat != nsUGenCategory::kMark) {
    525            prevIsLetter = (cat == nsUGenCategory::kLetter);
    526            sigmaIndex = uint32_t(-1);
    527          }
    528 
    529          mcm = unicode::SpecialLower(ch);
    530          if (mcm) {
    531            int j = 0;
    532            while (j < 2 && mcm->mMappedChars[j + 1]) {
    533              aConvertedString.Append(mcm->mMappedChars[j]);
    534              ++extraChars;
    535              ++j;
    536            }
    537            ch = mcm->mMappedChars[j];
    538            break;
    539          }
    540 
    541          ch = ToLowerCase(ch);
    542          break;
    543 
    544        case StyleTextTransform::UPPERCASE._0:
    545          if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') {
    546            ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
    547            break;
    548          }
    549 
    550          if (languageSpecificCasing == eLSCB_Greek) {
    551            bool markEta;
    552            bool updateEta;
    553            ch = mozilla::GreekCasing::UpperCase(ch, greekState, markEta,
    554                                                 updateEta);
    555            if (markEta) {
    556              greekMark = aConvertedString.Length();
    557            } else if (updateEta) {
    558              // Remove the TONOS from an uppercase ETA-TONOS that turned out
    559              // not to be disjunctive-eta.
    560              MOZ_ASSERT(aConvertedString.Length() > 0 &&
    561                             greekMark < aConvertedString.Length(),
    562                         "bad greekMark!");
    563              aConvertedString.SetCharAt(kGreekUpperEta, greekMark);
    564              greekMark = uint32_t(-1);
    565            }
    566            break;
    567          }
    568 
    569          if (languageSpecificCasing == eLSCB_Lithuanian) {
    570            /*
    571             * # Remove DOT ABOVE after "i" with upper or titlecase
    572             *
    573             * 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
    574             */
    575            if (ch == 'i' || ch == 'j' || ch == 0x012F) {
    576              seenSoftDotted = true;
    577              ch = ToTitleCase(ch);
    578              break;
    579            }
    580            if (seenSoftDotted) {
    581              seenSoftDotted = false;
    582              if (ch == 0x0307) {
    583                ch = uint32_t(-1);
    584                break;
    585              }
    586            }
    587          }
    588 
    589          if (languageSpecificCasing == eLSCB_Irish) {
    590            bool mark;
    591            uint8_t action;
    592            ch = mozilla::IrishCasing::UpperCase(ch, irishState, mark, action);
    593            if (mark) {
    594              irishMark = aConvertedString.Length();
    595              irishMarkSrc = i;
    596              break;
    597            } else if (action) {
    598              nsString& str = aConvertedString;  // shorthand
    599              switch (action) {
    600                case 1:
    601                  // lowercase a single prefix letter
    602                  MOZ_ASSERT(str.Length() > 0 && irishMark < str.Length(),
    603                             "bad irishMark!");
    604                  str.SetCharAt(ToLowerCase(str[irishMark]), irishMark);
    605                  irishMark = uint32_t(-1);
    606                  irishMarkSrc = uint32_t(-1);
    607                  break;
    608                case 2:
    609                  // lowercase two prefix letters (immediately before current
    610                  // pos)
    611                  MOZ_ASSERT(str.Length() >= 2 && irishMark == str.Length() - 2,
    612                             "bad irishMark!");
    613                  str.SetCharAt(ToLowerCase(str[irishMark]), irishMark);
    614                  str.SetCharAt(ToLowerCase(str[irishMark + 1]), irishMark + 1);
    615                  irishMark = uint32_t(-1);
    616                  irishMarkSrc = uint32_t(-1);
    617                  break;
    618                case 3:
    619                  // lowercase one prefix letter, and delete following hyphen
    620                  // (which must be the immediately-preceding char)
    621                  MOZ_ASSERT(str.Length() >= 2 && irishMark == str.Length() - 2,
    622                             "bad irishMark!");
    623                  MOZ_ASSERT(
    624                      irishMark != uint32_t(-1) && irishMarkSrc != uint32_t(-1),
    625                      "failed to set irishMarks");
    626                  str.Replace(irishMark, 2, ToLowerCase(str[irishMark]));
    627                  aDeletedCharsArray[irishMarkSrc + 1] = true;
    628                  // Remove the trailing entries (corresponding to the deleted
    629                  // hyphen) from the auxiliary arrays.
    630                  uint32_t len = aCharsToMergeArray.Length();
    631                  MOZ_ASSERT(len >= 2);
    632                  aCharsToMergeArray.TruncateLength(len - 1);
    633                  if (auxiliaryOutputArrays) {
    634                    MOZ_ASSERT(aStyleArray->Length() == len);
    635                    MOZ_ASSERT(aCanBreakBeforeArray->Length() == len);
    636                    aStyleArray->TruncateLength(len - 1);
    637                    aCanBreakBeforeArray->TruncateLength(len - 1);
    638                    inhibitBreakBefore = true;
    639                  }
    640                  mergeNeeded = true;
    641                  irishMark = uint32_t(-1);
    642                  irishMarkSrc = uint32_t(-1);
    643                  break;
    644              }
    645              // ch has been set to the uppercase for current char;
    646              // No need to check for SpecialUpper here as none of the
    647              // characters that could trigger an Irish casing action have
    648              // special mappings.
    649              break;
    650            }
    651            // If we didn't have any special action to perform, fall through
    652            // to check for special uppercase (ß)
    653          }
    654 
    655          // Updated mapping for German eszett, not currently reflected in the
    656          // Unicode data files. This is behind a pref, as it may not work well
    657          // with many (esp. older) fonts.
    658          if (ch == 0x00DF &&
    659              StaticPrefs::
    660                  layout_css_text_transform_uppercase_eszett_enabled()) {
    661            ch = 0x1E9E;
    662            break;
    663          }
    664 
    665          mcm = unicode::SpecialUpper(ch);
    666          if (mcm) {
    667            int j = 0;
    668            while (j < 2 && mcm->mMappedChars[j + 1]) {
    669              aConvertedString.Append(mcm->mMappedChars[j]);
    670              ++extraChars;
    671              ++j;
    672            }
    673            ch = mcm->mMappedChars[j];
    674            break;
    675          }
    676 
    677          ch = ToUpperCase(ch);
    678          break;
    679 
    680        case StyleTextTransform::CAPITALIZE._0: {
    681          if (capitalizeDutchIJ && ch == 'j') {
    682            ch = 'J';
    683            capitalizeDutchIJ = false;
    684            break;
    685          }
    686          capitalizeDutchIJ = false;
    687          // If we have a textrun, its mCapitalize array tells us which chars
    688          // are to be capitalized. If not, we track the state locally, and
    689          // assume there's no context to be considered.
    690          bool doCapitalize = false;
    691          if (aTextRun) {
    692            if (aOffsetInTextRun < aTextRun->mCapitalize.Length()) {
    693              doCapitalize = aTextRun->mCapitalize[aOffsetInTextRun];
    694            }
    695          } else {
    696            doCapitalize = nsLineBreaker::ShouldCapitalize(ch, capitalizeNext);
    697          }
    698          if (doCapitalize) {
    699            if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') {
    700              ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
    701              break;
    702            }
    703            if (languageSpecificCasing == eLSCB_Dutch && ch == 'i') {
    704              ch = 'I';
    705              capitalizeDutchIJ = true;
    706              break;
    707            }
    708            if (languageSpecificCasing == eLSCB_Lithuanian) {
    709              /*
    710               * # Remove DOT ABOVE after "i" with upper or titlecase
    711               *
    712               * 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
    713               */
    714              if (ch == 'i' || ch == 'j' || ch == 0x012F) {
    715                seenSoftDotted = true;
    716                ch = ToTitleCase(ch);
    717                break;
    718              }
    719              if (seenSoftDotted) {
    720                seenSoftDotted = false;
    721                if (ch == 0x0307) {
    722                  ch = uint32_t(-1);
    723                  break;
    724                }
    725              }
    726            }
    727 
    728            mcm = unicode::SpecialTitle(ch);
    729            if (mcm) {
    730              int j = 0;
    731              while (j < 2 && mcm->mMappedChars[j + 1]) {
    732                aConvertedString.Append(mcm->mMappedChars[j]);
    733                ++extraChars;
    734                ++j;
    735              }
    736              ch = mcm->mMappedChars[j];
    737              break;
    738            }
    739 
    740            ch = ToTitleCase(ch);
    741          }
    742          break;
    743        }
    744 
    745        case StyleTextTransform::MATH_AUTO._0:
    746          // text-transform: math-auto is used for automatic italicization of
    747          // single-char <mi> elements. However, some legacy cases (italic style
    748          // fallback and <mi> with leading/trailing whitespace) are still
    749          // handled in MathMLTextRunFactory.
    750          if (length == 1) {
    751            uint32_t ch2 =
    752                MathMLTextRunFactory::MathVariant(ch, StyleMathVariant::Italic);
    753            if (StaticPrefs::mathml_mathvariant_styling_fallback_disabled()) {
    754              ch = ch2;
    755            } else if (ch2 != ch) {
    756              // Bug 930504. Some platforms do not have fonts for Mathematical
    757              // Alphanumeric Symbols. Hence we only perform the transform if a
    758              // character is actually available.
    759              auto* fontGroup = aTextRun->GetFontGroup();
    760              fontGroup->EnsureFontList();
    761              FontMatchType matchType;
    762              RefPtr<gfxFont> mathFont = fontGroup->FindFontForChar(
    763                  ch2, 0, 0, intl::Script::COMMON, nullptr, &matchType);
    764              if (mathFont) {
    765                ch = ch2;
    766              }
    767            }
    768          }
    769          break;
    770        default:
    771          MOZ_ASSERT_UNREACHABLE("all cases should be handled");
    772          break;
    773      }
    774 
    775      if (!aCaseTransformsOnly) {
    776        if (!forceNonFullWidth && (style & StyleTextTransform::FULL_WIDTH)) {
    777          ch = unicode::GetFullWidth(ch);
    778        }
    779 
    780        if (style & StyleTextTransform::FULL_SIZE_KANA) {
    781          // clang-format off
    782          static const uint32_t kSmallKanas[] = {
    783              // ぁ   ぃ      ぅ      ぇ      ぉ      っ      ゃ      ゅ      ょ
    784              0x3041, 0x3043, 0x3045, 0x3047, 0x3049, 0x3063, 0x3083, 0x3085, 0x3087,
    785              // ゎ   ゕ      ゖ
    786              0x308E, 0x3095, 0x3096,
    787              // ァ   ィ      ゥ      ェ      ォ      ッ      ャ      ュ      ョ
    788              0x30A1, 0x30A3, 0x30A5, 0x30A7, 0x30A9, 0x30C3, 0x30E3, 0x30E5, 0x30E7,
    789              // ヮ   ヵ      ヶ      ㇰ      ㇱ      ㇲ      ㇳ      ㇴ      ㇵ
    790              0x30EE, 0x30F5, 0x30F6, 0x31F0, 0x31F1, 0x31F2, 0x31F3, 0x31F4, 0x31F5,
    791              // ㇶ   ㇷ      ㇸ      ㇹ      ㇺ      ㇻ      ㇼ      ㇽ      ㇾ
    792              0x31F6, 0x31F7, 0x31F8, 0x31F9, 0x31FA, 0x31FB, 0x31FC, 0x31FD, 0x31FE,
    793              // ㇿ
    794              0x31FF,
    795              // ァ    ィ       ゥ       ェ       ォ       ャ       ュ       ョ       ッ
    796              0xFF67, 0xFF68, 0xFF69, 0xFF6A, 0xFF6B, 0xFF6C, 0xFF6D, 0xFF6E, 0xFF6F,
    797              // 𛄲    𛅐       𛅑       𛅒       𛅕       𛅤       𛅥       𛅦
    798              0x1B132, 0x1B150, 0x1B151, 0x1B152, 0x1B155, 0x1B164, 0x1B165, 0x1B166,
    799              // 𛅧
    800              0x1B167};
    801          static const uint16_t kFullSizeKanas[] = {
    802              // あ   い      う      え      お      つ      や      ゆ      よ
    803              0x3042, 0x3044, 0x3046, 0x3048, 0x304A, 0x3064, 0x3084, 0x3086, 0x3088,
    804              // わ   か      け
    805              0x308F, 0x304B, 0x3051,
    806              // ア   イ      ウ      エ      オ      ツ      ヤ      ユ      ヨ
    807              0x30A2, 0x30A4, 0x30A6, 0x30A8, 0x30AA, 0x30C4, 0x30E4, 0x30E6, 0x30E8,
    808              // ワ   カ      ケ      ク      シ      ス      ト      ヌ      ハ
    809              0x30EF, 0x30AB, 0x30B1, 0x30AF, 0x30B7, 0x30B9, 0x30C8, 0x30CC, 0x30CF,
    810              // ヒ   フ      ヘ      ホ      ム      ラ      リ      ル      レ
    811              0x30D2, 0x30D5, 0x30D8, 0x30DB, 0x30E0, 0x30E9, 0x30EA, 0x30EB, 0x30EC,
    812              // ロ
    813              0x30ED,
    814              // ア    イ       ウ       エ       オ       ヤ       ユ       ヨ        ツ
    815              0xFF71, 0xFF72, 0xFF73, 0xFF74, 0xFF75, 0xFF94, 0xFF95, 0xFF96, 0xFF82,
    816              // こ   ゐ       ゑ      を      コ       ヰ      ヱ      ヲ       ン
    817              0x3053, 0x3090, 0x3091, 0x3092, 0x30B3, 0x30F0, 0x30F1, 0x30F2, 0x30F3};
    818          // clang-format on
    819 
    820          size_t index;
    821          const uint16_t len = std::size(kSmallKanas);
    822          if (mozilla::BinarySearch(kSmallKanas, 0, len, ch, &index)) {
    823            ch = kFullSizeKanas[index];
    824          }
    825        }
    826      }
    827 
    828      if (forceNonFullWidth) {
    829        ch = unicode::GetFullWidthInverse(ch);
    830      }
    831    }
    832 
    833    if (ch == uint32_t(-1)) {
    834      aDeletedCharsArray.AppendElement(true);
    835      mergeNeeded = true;
    836    } else {
    837      aDeletedCharsArray.AppendElement(false);
    838      aCharsToMergeArray.AppendElement(false);
    839      if (auxiliaryOutputArrays) {
    840        aStyleArray->AppendElement(charStyle);
    841        aCanBreakBeforeArray->AppendElement(
    842            inhibitBreakBefore
    843                ? gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE
    844                : aTextRun->CanBreakBefore(aOffsetInTextRun));
    845      }
    846 
    847      if (IS_IN_BMP(ch)) {
    848        aConvertedString.Append(maskPassword ? mask : ch);
    849      } else {
    850        if (maskPassword) {
    851          aConvertedString.Append(mask);
    852          // TODO: We should show a password mask for a surrogate pair later.
    853          aConvertedString.Append(mask);
    854        } else {
    855          aConvertedString.Append(H_SURROGATE(ch));
    856          aConvertedString.Append(L_SURROGATE(ch));
    857        }
    858        ++extraChars;
    859      }
    860      if (!IS_IN_BMP(originalCh)) {
    861        // Skip the trailing surrogate.
    862        ++aOffsetInTextRun;
    863        ++i;
    864        aDeletedCharsArray.AppendElement(true);
    865      }
    866 
    867      while (extraChars-- > 0) {
    868        mergeNeeded = true;
    869        aCharsToMergeArray.AppendElement(true);
    870        if (auxiliaryOutputArrays) {
    871          aStyleArray->AppendElement(charStyle);
    872          aCanBreakBeforeArray->AppendElement(
    873              gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE);
    874        }
    875      }
    876    }
    877  }
    878 
    879  // These output arrays, if present, must always have matching lengths:
    880  if (auxiliaryOutputArrays) {
    881    DebugOnly<uint32_t> len = aCharsToMergeArray.Length();
    882    MOZ_ASSERT(aStyleArray->Length() == len);
    883    MOZ_ASSERT(aCanBreakBeforeArray->Length() == len);
    884  }
    885 
    886  return mergeNeeded;
    887 }
    888 
    889 void nsCaseTransformTextRunFactory::RebuildTextRun(
    890    nsTransformedTextRun* aTextRun, DrawTarget* aRefDrawTarget,
    891    gfxMissingFontRecorder* aMFR) {
    892  nsAutoString convertedString;
    893  AutoTArray<bool, 50> charsToMergeArray;
    894  AutoTArray<bool, 50> deletedCharsArray;
    895  AutoTArray<uint8_t, 50> canBreakBeforeArray;
    896  AutoTArray<RefPtr<nsTransformedCharStyle>, 50> styleArray;
    897 
    898  auto globalTransform =
    899      mAllUppercase ? Some(StyleTextTransform::UPPERCASE) : Nothing();
    900  bool mergeNeeded = TransformString(
    901      aTextRun->mString, convertedString, globalTransform, mMaskChar,
    902      /* aCaseTransformsOnly = */ false, nullptr, charsToMergeArray,
    903      deletedCharsArray, aTextRun, 0, &canBreakBeforeArray, &styleArray);
    904 
    905  gfx::ShapedTextFlags flags;
    906  gfxTextRunFactory::Parameters innerParams =
    907      GetParametersForInner(aTextRun, &flags, aRefDrawTarget);
    908  gfxFontGroup* fontGroup = aTextRun->GetFontGroup();
    909 
    910  RefPtr<nsTransformedTextRun> transformedChild;
    911  RefPtr<gfxTextRun> cachedChild;
    912  gfxTextRun* child;
    913 
    914  if (mInnerTransformingTextRunFactory) {
    915    transformedChild = mInnerTransformingTextRunFactory->MakeTextRun(
    916        convertedString.BeginReading(), convertedString.Length(), &innerParams,
    917        fontGroup, flags, nsTextFrameUtils::Flags(), std::move(styleArray),
    918        false);
    919    child = transformedChild.get();
    920  } else {
    921    cachedChild = fontGroup->MakeTextRun(
    922        convertedString.BeginReading(), convertedString.Length(), &innerParams,
    923        flags, nsTextFrameUtils::Flags(), aMFR);
    924    child = cachedChild.get();
    925  }
    926  if (!child) {
    927    return;
    928  }
    929  // Copy potential linebreaks into child so they're preserved
    930  // (and also child will be shaped appropriately)
    931  NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(),
    932               "Dropped characters or break-before values somewhere!");
    933  gfxTextRun::Range range(0, uint32_t(canBreakBeforeArray.Length()));
    934  child->SetPotentialLineBreaks(range, canBreakBeforeArray.Elements());
    935  if (transformedChild) {
    936    transformedChild->FinishSettingProperties(aRefDrawTarget, aMFR);
    937  }
    938 
    939  aTextRun->ResetGlyphRuns();
    940  if (mergeNeeded) {
    941    // Now merge multiple characters into one multi-glyph character as required
    942    // and deal with skipping deleted accent chars
    943    NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(),
    944                 "source length mismatch");
    945    NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(),
    946                 "destination length mismatch");
    947    MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(),
    948                             deletedCharsArray.Elements());
    949  } else {
    950    // No merging to do, so just copy; this produces a more optimized textrun.
    951    // We can't steal the data because the child may be cached and stealing
    952    // the data would break the cache.
    953    aTextRun->CopyGlyphDataFrom(child, gfxTextRun::Range(child), 0);
    954  }
    955 }