tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

nsLineBreaker.cpp (26042B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "nsLineBreaker.h"
      8 
      9 #include "gfxTextRun.h"  // for the gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_* values
     10 #include "mozilla/AutoRestore.h"
     11 #include "mozilla/ClearOnShutdown.h"
     12 #include "mozilla/ScopeExit.h"
     13 #include "mozilla/StaticPrefs_intl.h"
     14 #include "mozilla/gfx/2D.h"
     15 #include "mozilla/intl/LineBreaker.h"  // for LineBreaker::ComputeBreakPositions
     16 #include "mozilla/intl/Locale.h"
     17 #include "mozilla/intl/UnicodeProperties.h"
     18 #include "nsContentUtils.h"
     19 #include "nsHyphenationManager.h"
     20 #include "nsHyphenator.h"
     21 
     22 using mozilla::AutoRestore;
     23 using mozilla::intl::LineBreaker;
     24 using mozilla::intl::LineBreakRule;
     25 using mozilla::intl::Locale;
     26 using mozilla::intl::LocaleParser;
     27 using mozilla::intl::UnicodeProperties;
     28 using mozilla::intl::WordBreakRule;
     29 
     30 // There is no break opportunity between any pair of characters that has line
     31 // break class of either AL (Alphabetic), IS (Infix Numeric Separator), NU
     32 // (Numeric), or QU (Quotation). See
     33 // https://www.unicode.org/Public/UCD/latest/ucd/LineBreak.txt for Unicode code
     34 // point and line break class mapping.
     35 static constexpr uint8_t kNonBreakableASCII[] = {
     36    // clang-format off
     37 // 0x20-0x2f
     38 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0,
     39 // 0x30-0x3f
     40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
     41 // 0x40-0x4f
     42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     43 // 0x50-0x5f
     44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
     45 // 0x60-0x6f
     46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     47 // 0x70-0x7f
     48 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
     49    // clang-format on
     50 };
     51 
     52 template <typename T>
     53 static constexpr bool IsNonBreakableChar(T aChar, bool aLegacyBehavior) {
     54  if (aLegacyBehavior) {
     55    // If not using ICU4X, line break rules aren't compatible with UAX#14. Use
     56    // old way.
     57    return (0x0030 <= aChar && aChar <= 0x0039) ||
     58           (0x0041 <= aChar && aChar <= 0x005A) ||
     59           (0x0061 <= aChar && aChar <= 0x007A) || (0x000a == aChar);
     60  }
     61  if (aChar < 0x20 || aChar > 0x7f) {
     62    return false;
     63  }
     64  return !!kNonBreakableASCII[aChar - 0x20];
     65 }
     66 
     67 nsLineBreaker::nsLineBreaker()
     68    : mLegacyBehavior(!mozilla::StaticPrefs::intl_icu4x_segmenter_enabled()) {}
     69 
     70 nsLineBreaker::~nsLineBreaker() {
     71  NS_ASSERTION(mCurrentWord.Length() == 0,
     72               "Should have Reset() before destruction!");
     73 }
     74 
     75 /* static */
     76 bool nsLineBreaker::ShouldCapitalize(uint32_t aChar, bool& aCapitalizeNext) {
     77  using mozilla::intl::GeneralCategory;
     78  auto category = UnicodeProperties::CharType(aChar);
     79  switch (category) {
     80    case GeneralCategory::Uppercase_Letter:
     81    case GeneralCategory::Lowercase_Letter:
     82    case GeneralCategory::Titlecase_Letter:
     83    case GeneralCategory::Modifier_Letter:
     84    case GeneralCategory::Other_Letter:
     85    case GeneralCategory::Decimal_Number:
     86    case GeneralCategory::Letter_Number:
     87    case GeneralCategory::Other_Number:
     88      if (aCapitalizeNext) {
     89        aCapitalizeNext = false;
     90        return true;
     91      }
     92      break;
     93    case GeneralCategory::Space_Separator:
     94    case GeneralCategory::Line_Separator:
     95    case GeneralCategory::Paragraph_Separator:
     96    case GeneralCategory::Dash_Punctuation:
     97    case GeneralCategory::Initial_Punctuation:
     98      /* These punctuation categories are excluded, for examples like
     99       *   "what colo[u]r" -> "What Colo[u]r?" (rather than "What Colo[U]R?")
    100       * and
    101       *   "snake_case" -> "Snake_case" (to match word selection behavior)
    102      case GeneralCategory::Open_Punctuation:
    103      case GeneralCategory::Close_Punctuation:
    104      case GeneralCategory::Connector_Punctuation:
    105       */
    106      aCapitalizeNext = true;
    107      break;
    108    case GeneralCategory::Final_Punctuation:
    109      /* Special-case: exclude Unicode single-close-quote/apostrophe,
    110         for examples like "Lowe’s" etc. */
    111      if (aChar != 0x2019) {
    112        aCapitalizeNext = true;
    113      }
    114      break;
    115    case GeneralCategory::Other_Punctuation:
    116      /* Special-case: exclude ASCII apostrophe, for "Lowe's" etc.,
    117         and MIDDLE DOT, for Catalan "l·l". */
    118      if (aChar != '\'' && aChar != 0x00B7) {
    119        aCapitalizeNext = true;
    120      }
    121      break;
    122    default:
    123      break;
    124  }
    125  return false;
    126 }
    127 
    128 static void SetupCapitalization(const char16_t* aWord, uint32_t aLength,
    129                                bool* aCapitalization) {
    130  // Capitalize the first alphanumeric character after a space or punctuation.
    131  bool capitalizeNextChar = true;
    132  for (uint32_t i = 0; i < aLength; ++i) {
    133    uint32_t ch = aWord[i];
    134    if (i + 1 < aLength && NS_IS_SURROGATE_PAIR(ch, aWord[i + 1])) {
    135      ch = SURROGATE_TO_UCS4(ch, aWord[i + 1]);
    136    }
    137    aCapitalization[i] =
    138        nsLineBreaker::ShouldCapitalize(ch, capitalizeNextChar);
    139 
    140    if (!IS_IN_BMP(ch)) {
    141      ++i;
    142    }
    143  }
    144 }
    145 
    146 nsresult nsLineBreaker::FlushCurrentWord() {
    147  auto cleanup = mozilla::MakeScopeExit([&] {
    148    mCurrentWord.Clear();
    149    mTextItems.Clear();
    150    mCurrentWordMightBeBreakable = false;
    151    mCurrentWordContainsMixedLang = false;
    152    mCurrentWordLanguage = nullptr;
    153    mWordContinuation = false;
    154  });
    155 
    156  uint32_t length = mCurrentWord.Length();
    157  AutoTArray<uint8_t, 4000> breakState;
    158  if (!breakState.AppendElements(length, mozilla::fallible)) {
    159    return NS_ERROR_OUT_OF_MEMORY;
    160  }
    161 
    162  if (mLineBreak == LineBreakRule::Anywhere) {
    163    memset(breakState.Elements(),
    164           gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL,
    165           length * sizeof(uint8_t));
    166  } else if (!mCurrentWordMightBeBreakable &&
    167             mWordBreak != WordBreakRule::BreakAll) {
    168    // word-break: normal or keep-all has no break opportunity if the word
    169    // is non-breakable. (See the comment of kNonBreakableASCII).
    170    memset(breakState.Elements(),
    171           gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
    172           length * sizeof(uint8_t));
    173  } else {
    174    LineBreaker::ComputeBreakPositions(
    175        mCurrentWord.Elements(), length, mWordBreak, mLineBreak,
    176        mScriptIsChineseOrJapanese, breakState.Elements());
    177  }
    178 
    179  bool autoHyphenate = mCurrentWordLanguage && !mCurrentWordContainsMixedLang;
    180  uint32_t i;
    181  for (i = 0; autoHyphenate && i < mTextItems.Length(); ++i) {
    182    TextItem* ti = &mTextItems[i];
    183    if (!(ti->mFlags & BREAK_USE_AUTO_HYPHENATION)) {
    184      autoHyphenate = false;
    185    }
    186  }
    187  if (autoHyphenate) {
    188    RefPtr<nsHyphenator> hyphenator =
    189        nsHyphenationManager::Instance()->GetHyphenator(mCurrentWordLanguage);
    190    if (hyphenator) {
    191      FindHyphenationPoints(hyphenator, mCurrentWord.Elements(),
    192                            mCurrentWord.Elements() + length,
    193                            breakState.Elements());
    194    }
    195  }
    196 
    197  nsTArray<bool> capitalizationState;
    198  uint32_t offset = 0;
    199  for (i = 0; i < mTextItems.Length(); ++i) {
    200    TextItem* ti = &mTextItems[i];
    201    NS_ASSERTION(ti->mLength > 0, "Zero length word contribution?");
    202 
    203    if ((ti->mFlags & BREAK_SUPPRESS_INITIAL) && ti->mSinkOffset == 0) {
    204      breakState[offset] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
    205    }
    206    if (ti->mFlags & BREAK_SUPPRESS_INSIDE) {
    207      uint32_t exclude = ti->mSinkOffset == 0 ? 1 : 0;
    208      memset(breakState.Elements() + offset + exclude,
    209             gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
    210             (ti->mLength - exclude) * sizeof(uint8_t));
    211    }
    212 
    213    // Don't set the break state for the first character of the word, because
    214    // it was already set correctly earlier and we don't know what the true
    215    // value should be.
    216    uint32_t skipSet = i == 0 ? 1 : 0;
    217    if (ti->mSink) {
    218      ti->mSink->SetBreaks(ti->mSinkOffset + skipSet, ti->mLength - skipSet,
    219                           breakState.Elements() + offset + skipSet);
    220 
    221      if (!mWordContinuation && (ti->mFlags & BREAK_NEED_CAPITALIZATION)) {
    222        if (capitalizationState.Length() == 0) {
    223          if (!capitalizationState.AppendElements(length, mozilla::fallible)) {
    224            return NS_ERROR_OUT_OF_MEMORY;
    225          }
    226          memset(capitalizationState.Elements(), false, length * sizeof(bool));
    227          SetupCapitalization(mCurrentWord.Elements(), length,
    228                              capitalizationState.Elements());
    229        }
    230        ti->mSink->SetCapitalization(ti->mSinkOffset, ti->mLength,
    231                                     capitalizationState.Elements() + offset);
    232      }
    233    }
    234 
    235    offset += ti->mLength;
    236  }
    237 
    238  return NS_OK;
    239 }
    240 
    241 // If the aFlags parameter to AppendText has all these bits set,
    242 // then we don't need to worry about finding break opportunities
    243 // in the appended text.
    244 #define NO_BREAKS_NEEDED_FLAGS                      \
    245  (BREAK_SUPPRESS_INITIAL | BREAK_SUPPRESS_INSIDE | \
    246   BREAK_SKIP_SETTING_NO_BREAKS)
    247 
    248 nsresult nsLineBreaker::AppendText(nsAtom* aHyphenationLanguage,
    249                                   const char16_t* aText, uint32_t aLength,
    250                                   uint32_t aFlags, nsILineBreakSink* aSink) {
    251  NS_ASSERTION(aLength > 0, "Appending empty text...");
    252 
    253  uint32_t offset = 0;
    254 
    255  // Continue the current word
    256  if (mCurrentWord.Length() > 0) {
    257    NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere,
    258                 "These should not be set");
    259 
    260    while (offset < aLength && !IsSegmentSpace(aText[offset])) {
    261      mCurrentWord.AppendElement(aText[offset]);
    262      if (!mCurrentWordMightBeBreakable &&
    263          !IsNonBreakableChar<char16_t>(aText[offset], mLegacyBehavior)) {
    264        mCurrentWordMightBeBreakable = true;
    265      }
    266      UpdateCurrentWordLanguage(aHyphenationLanguage);
    267      ++offset;
    268    }
    269 
    270    if (offset > 0) {
    271      mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
    272    }
    273 
    274    if (offset == aLength) {
    275      return NS_OK;
    276    }
    277 
    278    // We encountered whitespace, so we're done with this word
    279    nsresult rv = FlushCurrentWord();
    280    if (NS_FAILED(rv)) {
    281      return rv;
    282    }
    283  }
    284 
    285  AutoTArray<uint8_t, 4000> breakState;
    286  if (aSink) {
    287    if (!breakState.AppendElements(aLength, mozilla::fallible)) {
    288      return NS_ERROR_OUT_OF_MEMORY;
    289    }
    290  }
    291 
    292  bool noCapitalizationNeeded = true;
    293  nsTArray<bool> capitalizationState;
    294  if (aSink && (aFlags & BREAK_NEED_CAPITALIZATION)) {
    295    if (!capitalizationState.AppendElements(aLength, mozilla::fallible)) {
    296      return NS_ERROR_OUT_OF_MEMORY;
    297    }
    298    memset(capitalizationState.Elements(), false, aLength * sizeof(bool));
    299    noCapitalizationNeeded = false;
    300  }
    301 
    302  uint32_t start = offset;
    303  bool noBreaksNeeded =
    304      !aSink || ((aFlags & NO_BREAKS_NEEDED_FLAGS) == NO_BREAKS_NEEDED_FLAGS &&
    305                 !mBreakHere && !mAfterBreakableSpace);
    306  if (noBreaksNeeded && noCapitalizationNeeded) {
    307    // Skip to the space before the last word, since either the break data
    308    // here is not needed, or no breaks are set in the sink and there cannot
    309    // be any breaks in this chunk; and we don't need to do word-initial
    310    // capitalization. All we need is the context for the next chunk (if any).
    311    offset = aLength;
    312    while (offset > start) {
    313      --offset;
    314      if (IsSegmentSpace(aText[offset])) {
    315        break;
    316      }
    317    }
    318  }
    319  uint32_t wordStart = offset;
    320  bool wordMightBeBreakable = false;
    321 
    322  RefPtr<nsHyphenator> hyphenator;
    323  if ((aFlags & BREAK_USE_AUTO_HYPHENATION) &&
    324      !(aFlags & BREAK_SUPPRESS_INSIDE) && aHyphenationLanguage) {
    325    hyphenator =
    326        nsHyphenationManager::Instance()->GetHyphenator(aHyphenationLanguage);
    327  }
    328 
    329  for (;;) {
    330    char16_t ch = aText[offset];
    331    bool isSpace = IsSegmentSpace(ch);
    332    bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);
    333 
    334    if (aSink && !noBreaksNeeded) {
    335      breakState[offset] =
    336          mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ||
    337                  mWordBreak == WordBreakRule::BreakAll ||
    338                  mLineBreak == LineBreakRule::Anywhere
    339              ? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
    340              : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
    341    }
    342    mBreakHere = false;
    343    mAfterBreakableSpace = isBreakableSpace;
    344 
    345    if (isSpace || ch == '\n') {
    346      if (offset > wordStart && aSink) {
    347        if (!(aFlags & BREAK_SUPPRESS_INSIDE)) {
    348          if (mLineBreak == LineBreakRule::Anywhere) {
    349            memset(breakState.Elements() + wordStart,
    350                   gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL,
    351                   offset - wordStart);
    352          } else if (wordMightBeBreakable) {
    353            // Save current start-of-word state because ComputeBreakPositions()
    354            // will set it to false.
    355            AutoRestore<uint8_t> saveWordStartBreakState(breakState[wordStart]);
    356            LineBreaker::ComputeBreakPositions(
    357                aText + wordStart, offset - wordStart, mWordBreak, mLineBreak,
    358                mScriptIsChineseOrJapanese, breakState.Elements() + wordStart);
    359          }
    360          if (hyphenator) {
    361            FindHyphenationPoints(hyphenator, aText + wordStart, aText + offset,
    362                                  breakState.Elements() + wordStart);
    363          }
    364        }
    365        if (!mWordContinuation && !noCapitalizationNeeded) {
    366          SetupCapitalization(aText + wordStart, offset - wordStart,
    367                              capitalizationState.Elements() + wordStart);
    368        }
    369      }
    370      wordMightBeBreakable = false;
    371      mWordContinuation = false;
    372      ++offset;
    373      if (offset >= aLength) {
    374        break;
    375      }
    376      wordStart = offset;
    377      continue;
    378    }
    379 
    380    if (!wordMightBeBreakable &&
    381        !IsNonBreakableChar<char16_t>(ch, mLegacyBehavior)) {
    382      wordMightBeBreakable = true;
    383    }
    384    ++offset;
    385    if (offset >= aLength) {
    386      // Save this word
    387      mCurrentWordMightBeBreakable = wordMightBeBreakable;
    388      uint32_t len = offset - wordStart;
    389      char16_t* elems = mCurrentWord.AppendElements(len, mozilla::fallible);
    390      if (!elems) {
    391        return NS_ERROR_OUT_OF_MEMORY;
    392      }
    393      memcpy(elems, aText + wordStart, sizeof(char16_t) * len);
    394      mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
    395      // Ensure that the break-before for this word is written out
    396      offset = wordStart + 1;
    397      UpdateCurrentWordLanguage(aHyphenationLanguage);
    398      break;
    399    }
    400  }
    401 
    402  if (aSink) {
    403    if (!noBreaksNeeded) {
    404      aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
    405    }
    406    if (!noCapitalizationNeeded) {
    407      aSink->SetCapitalization(start, offset - start,
    408                               capitalizationState.Elements() + start);
    409    }
    410  }
    411  return NS_OK;
    412 }
    413 
    414 void nsLineBreaker::FindHyphenationPoints(nsHyphenator* aHyphenator,
    415                                          const char16_t* aTextStart,
    416                                          const char16_t* aTextLimit,
    417                                          uint8_t* aBreakState) {
    418  // Early-return for words that are definitely too short to hyphenate.
    419  if (aTextLimit - aTextStart < mHyphenateLimitWord) {
    420    return;
    421  }
    422 
    423  nsDependentSubstring string(aTextStart, aTextLimit);
    424  AutoTArray<bool, 200> hyphens;
    425  if (NS_FAILED(aHyphenator->Hyphenate(string, hyphens))) {
    426    return;
    427  }
    428 
    429  // Keep track of the length seen so far, in terms of characters that are
    430  // countable for hyphenate-limit-chars purposes.
    431  uint32_t length = 0;
    432  // When setting a potential break in aBreakState, we record the previous
    433  // value in case we need to restore it because the position turns out to
    434  // be too close to the end of the word.
    435  struct BreakInfo {
    436    uint32_t mPosition;
    437    uint32_t mLength;
    438    uint8_t mState;
    439  };
    440  AutoTArray<BreakInfo, 16> oldBreaks;
    441  // Don't consider setting any breaks where i >= endLimit, as they will
    442  // definitely be too near the end of the word to be accepted.
    443  uint32_t endLimit =
    444      string.Length() - std::max<uint32_t>(1u, mHyphenateLimitEnd);
    445  for (uint32_t i = 0; i < string.Length(); ++i) {
    446    // Get current character, converting surrogate pairs to UCS4 for char
    447    // category lookup.
    448    uint32_t ch = string[i];
    449    if (NS_IS_HIGH_SURROGATE(ch) && i + 1 < string.Length() &&
    450        NS_IS_LOW_SURROGATE(string[i + 1])) {
    451      ch = SURROGATE_TO_UCS4(ch, string[i + 1]);
    452    }
    453 
    454    // According to CSS Text, "Nonspacing combining marks (Unicode General
    455    // Category Mn) and intra-word punctuation (Unicode General Category P*)
    456    // do not count towards the minimum."
    457    // (https://drafts.csswg.org/css-text-4/#hyphenate-char-limits)
    458    // We also don't count Control or Format categories.
    459    using mozilla::intl::GeneralCategory;
    460    switch (UnicodeProperties::CharType(ch)) {
    461      case GeneralCategory::Nonspacing_Mark:
    462      case GeneralCategory::Dash_Punctuation:
    463      case GeneralCategory::Open_Punctuation:
    464      case GeneralCategory::Close_Punctuation:
    465      case GeneralCategory::Connector_Punctuation:
    466      case GeneralCategory::Other_Punctuation:
    467      case GeneralCategory::Initial_Punctuation:
    468      case GeneralCategory::Final_Punctuation:
    469      case GeneralCategory::Control:
    470      case GeneralCategory::Format:
    471      case GeneralCategory::Surrogate:
    472        break;
    473      default:
    474        ++length;
    475        break;
    476    }
    477 
    478    // Don't accept any breaks until we're far enough into the word, or if
    479    // we're too near the end for it to possibly be accepted. (Note that the
    480    // check against endLimit is just an initial worst-case check that assumes
    481    // all the remaining characters are countable; if there are combining
    482    // marks, etc., in the trailing part of the word we may need to reset the
    483    // potential break later, after we've fully counted length.)
    484    if (hyphens[i] && length >= mHyphenateLimitStart && i < endLimit) {
    485      // Keep track of hyphen position and "countable" length of the word.
    486      oldBreaks.AppendElement(BreakInfo{i + 1, length, aBreakState[i + 1]});
    487      aBreakState[i + 1] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN;
    488    }
    489 
    490    // If the character was outside the BMP, skip past the low surrogate.
    491    if (!IS_IN_BMP(ch)) {
    492      ++i;
    493    }
    494  }
    495 
    496  if (length < mHyphenateLimitWord) {
    497    // After discounting combining marks, punctuation, controls, etc., the word
    498    // was too short for hyphenate-limit-chars. If we've set any hyphen breaks,
    499    // forget them.
    500    while (!oldBreaks.IsEmpty()) {
    501      auto lastBreak = oldBreaks.PopLastElement();
    502      aBreakState[lastBreak.mPosition] = lastBreak.mState;
    503    }
    504  } else {
    505    // Check if trailing fragment is too short; if so, remove the last hyphen
    506    // break(s) that we set, until the fragment will be long enough.
    507    while (!oldBreaks.IsEmpty()) {
    508      auto lastBreak = oldBreaks.PopLastElement();
    509      if (length - lastBreak.mLength >= mHyphenateLimitEnd) {
    510        break;
    511      }
    512      aBreakState[lastBreak.mPosition] = lastBreak.mState;
    513    }
    514  }
    515 }
    516 
    517 nsresult nsLineBreaker::AppendText(nsAtom* aHyphenationLanguage,
    518                                   const uint8_t* aText, uint32_t aLength,
    519                                   uint32_t aFlags, nsILineBreakSink* aSink) {
    520  NS_ASSERTION(aLength > 0, "Appending empty text...");
    521 
    522  if (aFlags & (BREAK_NEED_CAPITALIZATION | BREAK_USE_AUTO_HYPHENATION)) {
    523    // Defer to the Unicode path if capitalization or hyphenation is required
    524    nsAutoString str;
    525    const char* cp = reinterpret_cast<const char*>(aText);
    526    CopyASCIItoUTF16(nsDependentCSubstring(cp, cp + aLength), str);
    527    return AppendText(aHyphenationLanguage, str.get(), aLength, aFlags, aSink);
    528  }
    529 
    530  uint32_t offset = 0;
    531 
    532  // Continue the current word
    533  if (mCurrentWord.Length() > 0) {
    534    NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere,
    535                 "These should not be set");
    536 
    537    while (offset < aLength && !IsSegmentSpace(aText[offset])) {
    538      mCurrentWord.AppendElement(aText[offset]);
    539      if (!mCurrentWordMightBeBreakable &&
    540          !IsNonBreakableChar<uint8_t>(aText[offset], mLegacyBehavior)) {
    541        mCurrentWordMightBeBreakable = true;
    542      }
    543      ++offset;
    544    }
    545 
    546    if (offset > 0) {
    547      mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
    548    }
    549 
    550    if (offset == aLength) {
    551      // We did not encounter whitespace so the word hasn't finished yet.
    552      return NS_OK;
    553    }
    554 
    555    // We encountered whitespace, so we're done with this word
    556    nsresult rv = FlushCurrentWord();
    557    if (NS_FAILED(rv)) {
    558      return rv;
    559    }
    560  }
    561 
    562  AutoTArray<uint8_t, 4000> breakState;
    563  if (aSink) {
    564    if (!breakState.AppendElements(aLength, mozilla::fallible)) {
    565      return NS_ERROR_OUT_OF_MEMORY;
    566    }
    567  }
    568 
    569  uint32_t start = offset;
    570  bool noBreaksNeeded =
    571      !aSink || ((aFlags & NO_BREAKS_NEEDED_FLAGS) == NO_BREAKS_NEEDED_FLAGS &&
    572                 !mBreakHere && !mAfterBreakableSpace);
    573  if (noBreaksNeeded) {
    574    // Skip to the space before the last word, since either the break data
    575    // here is not needed, or no breaks are set in the sink and there cannot
    576    // be any breaks in this chunk; all we need is the context for the next
    577    // chunk (if any)
    578    offset = aLength;
    579    while (offset > start) {
    580      --offset;
    581      if (IsSegmentSpace(aText[offset])) {
    582        break;
    583      }
    584    }
    585  }
    586  uint32_t wordStart = offset;
    587  bool wordMightBeBreakable = false;
    588 
    589  for (;;) {
    590    uint8_t ch = aText[offset];
    591    bool isSpace = IsSegmentSpace(ch);
    592    bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);
    593 
    594    if (aSink) {
    595      // Consider word-break style.  Since the break position of CJK scripts
    596      // will be set by nsILineBreaker, we don't consider CJK at this point.
    597      breakState[offset] =
    598          mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ||
    599                  mWordBreak == WordBreakRule::BreakAll ||
    600                  mLineBreak == LineBreakRule::Anywhere
    601              ? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
    602              : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
    603    }
    604    mBreakHere = false;
    605    mAfterBreakableSpace = isBreakableSpace;
    606 
    607    if (isSpace) {
    608      if (offset > wordStart && aSink && !(aFlags & BREAK_SUPPRESS_INSIDE)) {
    609        if (mLineBreak == LineBreakRule::Anywhere) {
    610          memset(breakState.Elements() + wordStart,
    611                 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL,
    612                 offset - wordStart);
    613        } else if (wordMightBeBreakable) {
    614          // Save current start-of-word state because ComputeBreakPositions()
    615          // will set it to false.
    616          AutoRestore<uint8_t> saveWordStartBreakState(breakState[wordStart]);
    617          LineBreaker::ComputeBreakPositions(
    618              aText + wordStart, offset - wordStart, mWordBreak, mLineBreak,
    619              mScriptIsChineseOrJapanese, breakState.Elements() + wordStart);
    620        }
    621      }
    622 
    623      wordMightBeBreakable = false;
    624      mWordContinuation = false;
    625      ++offset;
    626      if (offset >= aLength) {
    627        break;
    628      }
    629      wordStart = offset;
    630      continue;
    631    }
    632 
    633    if (!wordMightBeBreakable &&
    634        !IsNonBreakableChar<uint8_t>(ch, mLegacyBehavior)) {
    635      wordMightBeBreakable = true;
    636    }
    637    ++offset;
    638    if (offset >= aLength) {
    639      // Save this word
    640      mCurrentWordMightBeBreakable = wordMightBeBreakable;
    641      uint32_t len = offset - wordStart;
    642      char16_t* elems = mCurrentWord.AppendElements(len, mozilla::fallible);
    643      if (!elems) {
    644        return NS_ERROR_OUT_OF_MEMORY;
    645      }
    646      uint32_t i;
    647      for (i = wordStart; i < offset; ++i) {
    648        elems[i - wordStart] = aText[i];
    649      }
    650      mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
    651      // Ensure that the break-before for this word is written out
    652      offset = wordStart + 1;
    653      break;
    654    }
    655  }
    656 
    657  if (!noBreaksNeeded) {
    658    aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
    659  }
    660  return NS_OK;
    661 }
    662 
    663 void nsLineBreaker::UpdateCurrentWordLanguage(nsAtom* aHyphenationLanguage) {
    664  if (mCurrentWordLanguage && mCurrentWordLanguage != aHyphenationLanguage) {
    665    mCurrentWordContainsMixedLang = true;
    666    mScriptIsChineseOrJapanese = false;
    667    return;
    668  }
    669 
    670  if (aHyphenationLanguage && !mCurrentWordLanguage) {
    671    static mozilla::StaticRefPtr<nsAtom> sLastHyphenationLanguage;
    672    static bool sLastScriptIsChineseOrJapanese = false;
    673    static bool sInit = false;
    674 
    675    if (!sInit) {
    676      mozilla::ClearOnShutdown(&sLastHyphenationLanguage);
    677      sInit = true;
    678    }
    679 
    680    if (sLastHyphenationLanguage == aHyphenationLanguage) {
    681      MOZ_ASSERT(nsAtomString(sLastHyphenationLanguage)
    682                     .Equals(nsAtomString(aHyphenationLanguage)));
    683      mScriptIsChineseOrJapanese = sLastScriptIsChineseOrJapanese;
    684    } else {
    685      Locale loc;
    686      auto result =
    687          LocaleParser::TryParse(nsAtomCString(aHyphenationLanguage), loc);
    688 
    689      if (result.isErr()) {
    690        return;
    691      }
    692      if (loc.Script().Missing() && loc.AddLikelySubtags().isErr()) {
    693        return;
    694      }
    695      mScriptIsChineseOrJapanese =
    696          loc.Script().EqualTo("Hans") || loc.Script().EqualTo("Hant") ||
    697          loc.Script().EqualTo("Jpan") || loc.Script().EqualTo("Hrkt");
    698 
    699      sLastHyphenationLanguage = aHyphenationLanguage;
    700      sLastScriptIsChineseOrJapanese = mScriptIsChineseOrJapanese;
    701    }
    702  }
    703  mCurrentWordLanguage = aHyphenationLanguage;
    704 }
    705 
    706 nsresult nsLineBreaker::AppendInvisibleWhitespace(uint32_t aFlags) {
    707  nsresult rv = FlushCurrentWord();
    708  if (NS_FAILED(rv)) {
    709    return rv;
    710  }
    711 
    712  bool isBreakableSpace = !(aFlags & BREAK_SUPPRESS_INSIDE);
    713  if (mAfterBreakableSpace && !isBreakableSpace) {
    714    mBreakHere = true;
    715  }
    716  mAfterBreakableSpace = isBreakableSpace;
    717  mWordContinuation = false;
    718  return NS_OK;
    719 }
    720 
    721 nsresult nsLineBreaker::Reset(bool* aTrailingBreak) {
    722  nsresult rv = FlushCurrentWord();
    723  if (NS_FAILED(rv)) {
    724    return rv;
    725  }
    726 
    727  *aTrailingBreak = mBreakHere || mAfterBreakableSpace;
    728  mBreakHere = false;
    729  mAfterBreakableSpace = false;
    730  return NS_OK;
    731 }