nsLineBreaker.cpp (26042B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "nsLineBreaker.h" 8 9 #include "gfxTextRun.h" // for the gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_* values 10 #include "mozilla/AutoRestore.h" 11 #include "mozilla/ClearOnShutdown.h" 12 #include "mozilla/ScopeExit.h" 13 #include "mozilla/StaticPrefs_intl.h" 14 #include "mozilla/gfx/2D.h" 15 #include "mozilla/intl/LineBreaker.h" // for LineBreaker::ComputeBreakPositions 16 #include "mozilla/intl/Locale.h" 17 #include "mozilla/intl/UnicodeProperties.h" 18 #include "nsContentUtils.h" 19 #include "nsHyphenationManager.h" 20 #include "nsHyphenator.h" 21 22 using mozilla::AutoRestore; 23 using mozilla::intl::LineBreaker; 24 using mozilla::intl::LineBreakRule; 25 using mozilla::intl::Locale; 26 using mozilla::intl::LocaleParser; 27 using mozilla::intl::UnicodeProperties; 28 using mozilla::intl::WordBreakRule; 29 30 // There is no break opportunity between any pair of characters that has line 31 // break class of either AL (Alphabetic), IS (Infix Numeric Separator), NU 32 // (Numeric), or QU (Quotation). See 33 // https://www.unicode.org/Public/UCD/latest/ucd/LineBreak.txt for Unicode code 34 // point and line break class mapping. 35 static constexpr uint8_t kNonBreakableASCII[] = { 36 // clang-format off 37 // 0x20-0x2f 38 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 39 // 0x30-0x3f 40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 41 // 0x40-0x4f 42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 43 // 0x50-0x5f 44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 45 // 0x60-0x6f 46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 47 // 0x70-0x7f 48 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 49 // clang-format on 50 }; 51 52 template <typename T> 53 static constexpr bool IsNonBreakableChar(T aChar, bool aLegacyBehavior) { 54 if (aLegacyBehavior) { 55 // If not using ICU4X, line break rules aren't compatible with UAX#14. Use 56 // old way. 57 return (0x0030 <= aChar && aChar <= 0x0039) || 58 (0x0041 <= aChar && aChar <= 0x005A) || 59 (0x0061 <= aChar && aChar <= 0x007A) || (0x000a == aChar); 60 } 61 if (aChar < 0x20 || aChar > 0x7f) { 62 return false; 63 } 64 return !!kNonBreakableASCII[aChar - 0x20]; 65 } 66 67 nsLineBreaker::nsLineBreaker() 68 : mLegacyBehavior(!mozilla::StaticPrefs::intl_icu4x_segmenter_enabled()) {} 69 70 nsLineBreaker::~nsLineBreaker() { 71 NS_ASSERTION(mCurrentWord.Length() == 0, 72 "Should have Reset() before destruction!"); 73 } 74 75 /* static */ 76 bool nsLineBreaker::ShouldCapitalize(uint32_t aChar, bool& aCapitalizeNext) { 77 using mozilla::intl::GeneralCategory; 78 auto category = UnicodeProperties::CharType(aChar); 79 switch (category) { 80 case GeneralCategory::Uppercase_Letter: 81 case GeneralCategory::Lowercase_Letter: 82 case GeneralCategory::Titlecase_Letter: 83 case GeneralCategory::Modifier_Letter: 84 case GeneralCategory::Other_Letter: 85 case GeneralCategory::Decimal_Number: 86 case GeneralCategory::Letter_Number: 87 case GeneralCategory::Other_Number: 88 if (aCapitalizeNext) { 89 aCapitalizeNext = false; 90 return true; 91 } 92 break; 93 case GeneralCategory::Space_Separator: 94 case GeneralCategory::Line_Separator: 95 case GeneralCategory::Paragraph_Separator: 96 case GeneralCategory::Dash_Punctuation: 97 case GeneralCategory::Initial_Punctuation: 98 /* These punctuation categories are excluded, for examples like 99 * "what colo[u]r" -> "What Colo[u]r?" (rather than "What Colo[U]R?") 100 * and 101 * "snake_case" -> "Snake_case" (to match word selection behavior) 102 case GeneralCategory::Open_Punctuation: 103 case GeneralCategory::Close_Punctuation: 104 case GeneralCategory::Connector_Punctuation: 105 */ 106 aCapitalizeNext = true; 107 break; 108 case GeneralCategory::Final_Punctuation: 109 /* Special-case: exclude Unicode single-close-quote/apostrophe, 110 for examples like "Lowe’s" etc. */ 111 if (aChar != 0x2019) { 112 aCapitalizeNext = true; 113 } 114 break; 115 case GeneralCategory::Other_Punctuation: 116 /* Special-case: exclude ASCII apostrophe, for "Lowe's" etc., 117 and MIDDLE DOT, for Catalan "l·l". */ 118 if (aChar != '\'' && aChar != 0x00B7) { 119 aCapitalizeNext = true; 120 } 121 break; 122 default: 123 break; 124 } 125 return false; 126 } 127 128 static void SetupCapitalization(const char16_t* aWord, uint32_t aLength, 129 bool* aCapitalization) { 130 // Capitalize the first alphanumeric character after a space or punctuation. 131 bool capitalizeNextChar = true; 132 for (uint32_t i = 0; i < aLength; ++i) { 133 uint32_t ch = aWord[i]; 134 if (i + 1 < aLength && NS_IS_SURROGATE_PAIR(ch, aWord[i + 1])) { 135 ch = SURROGATE_TO_UCS4(ch, aWord[i + 1]); 136 } 137 aCapitalization[i] = 138 nsLineBreaker::ShouldCapitalize(ch, capitalizeNextChar); 139 140 if (!IS_IN_BMP(ch)) { 141 ++i; 142 } 143 } 144 } 145 146 nsresult nsLineBreaker::FlushCurrentWord() { 147 auto cleanup = mozilla::MakeScopeExit([&] { 148 mCurrentWord.Clear(); 149 mTextItems.Clear(); 150 mCurrentWordMightBeBreakable = false; 151 mCurrentWordContainsMixedLang = false; 152 mCurrentWordLanguage = nullptr; 153 mWordContinuation = false; 154 }); 155 156 uint32_t length = mCurrentWord.Length(); 157 AutoTArray<uint8_t, 4000> breakState; 158 if (!breakState.AppendElements(length, mozilla::fallible)) { 159 return NS_ERROR_OUT_OF_MEMORY; 160 } 161 162 if (mLineBreak == LineBreakRule::Anywhere) { 163 memset(breakState.Elements(), 164 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL, 165 length * sizeof(uint8_t)); 166 } else if (!mCurrentWordMightBeBreakable && 167 mWordBreak != WordBreakRule::BreakAll) { 168 // word-break: normal or keep-all has no break opportunity if the word 169 // is non-breakable. (See the comment of kNonBreakableASCII). 170 memset(breakState.Elements(), 171 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE, 172 length * sizeof(uint8_t)); 173 } else { 174 LineBreaker::ComputeBreakPositions( 175 mCurrentWord.Elements(), length, mWordBreak, mLineBreak, 176 mScriptIsChineseOrJapanese, breakState.Elements()); 177 } 178 179 bool autoHyphenate = mCurrentWordLanguage && !mCurrentWordContainsMixedLang; 180 uint32_t i; 181 for (i = 0; autoHyphenate && i < mTextItems.Length(); ++i) { 182 TextItem* ti = &mTextItems[i]; 183 if (!(ti->mFlags & BREAK_USE_AUTO_HYPHENATION)) { 184 autoHyphenate = false; 185 } 186 } 187 if (autoHyphenate) { 188 RefPtr<nsHyphenator> hyphenator = 189 nsHyphenationManager::Instance()->GetHyphenator(mCurrentWordLanguage); 190 if (hyphenator) { 191 FindHyphenationPoints(hyphenator, mCurrentWord.Elements(), 192 mCurrentWord.Elements() + length, 193 breakState.Elements()); 194 } 195 } 196 197 nsTArray<bool> capitalizationState; 198 uint32_t offset = 0; 199 for (i = 0; i < mTextItems.Length(); ++i) { 200 TextItem* ti = &mTextItems[i]; 201 NS_ASSERTION(ti->mLength > 0, "Zero length word contribution?"); 202 203 if ((ti->mFlags & BREAK_SUPPRESS_INITIAL) && ti->mSinkOffset == 0) { 204 breakState[offset] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE; 205 } 206 if (ti->mFlags & BREAK_SUPPRESS_INSIDE) { 207 uint32_t exclude = ti->mSinkOffset == 0 ? 1 : 0; 208 memset(breakState.Elements() + offset + exclude, 209 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE, 210 (ti->mLength - exclude) * sizeof(uint8_t)); 211 } 212 213 // Don't set the break state for the first character of the word, because 214 // it was already set correctly earlier and we don't know what the true 215 // value should be. 216 uint32_t skipSet = i == 0 ? 1 : 0; 217 if (ti->mSink) { 218 ti->mSink->SetBreaks(ti->mSinkOffset + skipSet, ti->mLength - skipSet, 219 breakState.Elements() + offset + skipSet); 220 221 if (!mWordContinuation && (ti->mFlags & BREAK_NEED_CAPITALIZATION)) { 222 if (capitalizationState.Length() == 0) { 223 if (!capitalizationState.AppendElements(length, mozilla::fallible)) { 224 return NS_ERROR_OUT_OF_MEMORY; 225 } 226 memset(capitalizationState.Elements(), false, length * sizeof(bool)); 227 SetupCapitalization(mCurrentWord.Elements(), length, 228 capitalizationState.Elements()); 229 } 230 ti->mSink->SetCapitalization(ti->mSinkOffset, ti->mLength, 231 capitalizationState.Elements() + offset); 232 } 233 } 234 235 offset += ti->mLength; 236 } 237 238 return NS_OK; 239 } 240 241 // If the aFlags parameter to AppendText has all these bits set, 242 // then we don't need to worry about finding break opportunities 243 // in the appended text. 244 #define NO_BREAKS_NEEDED_FLAGS \ 245 (BREAK_SUPPRESS_INITIAL | BREAK_SUPPRESS_INSIDE | \ 246 BREAK_SKIP_SETTING_NO_BREAKS) 247 248 nsresult nsLineBreaker::AppendText(nsAtom* aHyphenationLanguage, 249 const char16_t* aText, uint32_t aLength, 250 uint32_t aFlags, nsILineBreakSink* aSink) { 251 NS_ASSERTION(aLength > 0, "Appending empty text..."); 252 253 uint32_t offset = 0; 254 255 // Continue the current word 256 if (mCurrentWord.Length() > 0) { 257 NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere, 258 "These should not be set"); 259 260 while (offset < aLength && !IsSegmentSpace(aText[offset])) { 261 mCurrentWord.AppendElement(aText[offset]); 262 if (!mCurrentWordMightBeBreakable && 263 !IsNonBreakableChar<char16_t>(aText[offset], mLegacyBehavior)) { 264 mCurrentWordMightBeBreakable = true; 265 } 266 UpdateCurrentWordLanguage(aHyphenationLanguage); 267 ++offset; 268 } 269 270 if (offset > 0) { 271 mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags)); 272 } 273 274 if (offset == aLength) { 275 return NS_OK; 276 } 277 278 // We encountered whitespace, so we're done with this word 279 nsresult rv = FlushCurrentWord(); 280 if (NS_FAILED(rv)) { 281 return rv; 282 } 283 } 284 285 AutoTArray<uint8_t, 4000> breakState; 286 if (aSink) { 287 if (!breakState.AppendElements(aLength, mozilla::fallible)) { 288 return NS_ERROR_OUT_OF_MEMORY; 289 } 290 } 291 292 bool noCapitalizationNeeded = true; 293 nsTArray<bool> capitalizationState; 294 if (aSink && (aFlags & BREAK_NEED_CAPITALIZATION)) { 295 if (!capitalizationState.AppendElements(aLength, mozilla::fallible)) { 296 return NS_ERROR_OUT_OF_MEMORY; 297 } 298 memset(capitalizationState.Elements(), false, aLength * sizeof(bool)); 299 noCapitalizationNeeded = false; 300 } 301 302 uint32_t start = offset; 303 bool noBreaksNeeded = 304 !aSink || ((aFlags & NO_BREAKS_NEEDED_FLAGS) == NO_BREAKS_NEEDED_FLAGS && 305 !mBreakHere && !mAfterBreakableSpace); 306 if (noBreaksNeeded && noCapitalizationNeeded) { 307 // Skip to the space before the last word, since either the break data 308 // here is not needed, or no breaks are set in the sink and there cannot 309 // be any breaks in this chunk; and we don't need to do word-initial 310 // capitalization. All we need is the context for the next chunk (if any). 311 offset = aLength; 312 while (offset > start) { 313 --offset; 314 if (IsSegmentSpace(aText[offset])) { 315 break; 316 } 317 } 318 } 319 uint32_t wordStart = offset; 320 bool wordMightBeBreakable = false; 321 322 RefPtr<nsHyphenator> hyphenator; 323 if ((aFlags & BREAK_USE_AUTO_HYPHENATION) && 324 !(aFlags & BREAK_SUPPRESS_INSIDE) && aHyphenationLanguage) { 325 hyphenator = 326 nsHyphenationManager::Instance()->GetHyphenator(aHyphenationLanguage); 327 } 328 329 for (;;) { 330 char16_t ch = aText[offset]; 331 bool isSpace = IsSegmentSpace(ch); 332 bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE); 333 334 if (aSink && !noBreaksNeeded) { 335 breakState[offset] = 336 mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) || 337 mWordBreak == WordBreakRule::BreakAll || 338 mLineBreak == LineBreakRule::Anywhere 339 ? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL 340 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE; 341 } 342 mBreakHere = false; 343 mAfterBreakableSpace = isBreakableSpace; 344 345 if (isSpace || ch == '\n') { 346 if (offset > wordStart && aSink) { 347 if (!(aFlags & BREAK_SUPPRESS_INSIDE)) { 348 if (mLineBreak == LineBreakRule::Anywhere) { 349 memset(breakState.Elements() + wordStart, 350 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL, 351 offset - wordStart); 352 } else if (wordMightBeBreakable) { 353 // Save current start-of-word state because ComputeBreakPositions() 354 // will set it to false. 355 AutoRestore<uint8_t> saveWordStartBreakState(breakState[wordStart]); 356 LineBreaker::ComputeBreakPositions( 357 aText + wordStart, offset - wordStart, mWordBreak, mLineBreak, 358 mScriptIsChineseOrJapanese, breakState.Elements() + wordStart); 359 } 360 if (hyphenator) { 361 FindHyphenationPoints(hyphenator, aText + wordStart, aText + offset, 362 breakState.Elements() + wordStart); 363 } 364 } 365 if (!mWordContinuation && !noCapitalizationNeeded) { 366 SetupCapitalization(aText + wordStart, offset - wordStart, 367 capitalizationState.Elements() + wordStart); 368 } 369 } 370 wordMightBeBreakable = false; 371 mWordContinuation = false; 372 ++offset; 373 if (offset >= aLength) { 374 break; 375 } 376 wordStart = offset; 377 continue; 378 } 379 380 if (!wordMightBeBreakable && 381 !IsNonBreakableChar<char16_t>(ch, mLegacyBehavior)) { 382 wordMightBeBreakable = true; 383 } 384 ++offset; 385 if (offset >= aLength) { 386 // Save this word 387 mCurrentWordMightBeBreakable = wordMightBeBreakable; 388 uint32_t len = offset - wordStart; 389 char16_t* elems = mCurrentWord.AppendElements(len, mozilla::fallible); 390 if (!elems) { 391 return NS_ERROR_OUT_OF_MEMORY; 392 } 393 memcpy(elems, aText + wordStart, sizeof(char16_t) * len); 394 mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags)); 395 // Ensure that the break-before for this word is written out 396 offset = wordStart + 1; 397 UpdateCurrentWordLanguage(aHyphenationLanguage); 398 break; 399 } 400 } 401 402 if (aSink) { 403 if (!noBreaksNeeded) { 404 aSink->SetBreaks(start, offset - start, breakState.Elements() + start); 405 } 406 if (!noCapitalizationNeeded) { 407 aSink->SetCapitalization(start, offset - start, 408 capitalizationState.Elements() + start); 409 } 410 } 411 return NS_OK; 412 } 413 414 void nsLineBreaker::FindHyphenationPoints(nsHyphenator* aHyphenator, 415 const char16_t* aTextStart, 416 const char16_t* aTextLimit, 417 uint8_t* aBreakState) { 418 // Early-return for words that are definitely too short to hyphenate. 419 if (aTextLimit - aTextStart < mHyphenateLimitWord) { 420 return; 421 } 422 423 nsDependentSubstring string(aTextStart, aTextLimit); 424 AutoTArray<bool, 200> hyphens; 425 if (NS_FAILED(aHyphenator->Hyphenate(string, hyphens))) { 426 return; 427 } 428 429 // Keep track of the length seen so far, in terms of characters that are 430 // countable for hyphenate-limit-chars purposes. 431 uint32_t length = 0; 432 // When setting a potential break in aBreakState, we record the previous 433 // value in case we need to restore it because the position turns out to 434 // be too close to the end of the word. 435 struct BreakInfo { 436 uint32_t mPosition; 437 uint32_t mLength; 438 uint8_t mState; 439 }; 440 AutoTArray<BreakInfo, 16> oldBreaks; 441 // Don't consider setting any breaks where i >= endLimit, as they will 442 // definitely be too near the end of the word to be accepted. 443 uint32_t endLimit = 444 string.Length() - std::max<uint32_t>(1u, mHyphenateLimitEnd); 445 for (uint32_t i = 0; i < string.Length(); ++i) { 446 // Get current character, converting surrogate pairs to UCS4 for char 447 // category lookup. 448 uint32_t ch = string[i]; 449 if (NS_IS_HIGH_SURROGATE(ch) && i + 1 < string.Length() && 450 NS_IS_LOW_SURROGATE(string[i + 1])) { 451 ch = SURROGATE_TO_UCS4(ch, string[i + 1]); 452 } 453 454 // According to CSS Text, "Nonspacing combining marks (Unicode General 455 // Category Mn) and intra-word punctuation (Unicode General Category P*) 456 // do not count towards the minimum." 457 // (https://drafts.csswg.org/css-text-4/#hyphenate-char-limits) 458 // We also don't count Control or Format categories. 459 using mozilla::intl::GeneralCategory; 460 switch (UnicodeProperties::CharType(ch)) { 461 case GeneralCategory::Nonspacing_Mark: 462 case GeneralCategory::Dash_Punctuation: 463 case GeneralCategory::Open_Punctuation: 464 case GeneralCategory::Close_Punctuation: 465 case GeneralCategory::Connector_Punctuation: 466 case GeneralCategory::Other_Punctuation: 467 case GeneralCategory::Initial_Punctuation: 468 case GeneralCategory::Final_Punctuation: 469 case GeneralCategory::Control: 470 case GeneralCategory::Format: 471 case GeneralCategory::Surrogate: 472 break; 473 default: 474 ++length; 475 break; 476 } 477 478 // Don't accept any breaks until we're far enough into the word, or if 479 // we're too near the end for it to possibly be accepted. (Note that the 480 // check against endLimit is just an initial worst-case check that assumes 481 // all the remaining characters are countable; if there are combining 482 // marks, etc., in the trailing part of the word we may need to reset the 483 // potential break later, after we've fully counted length.) 484 if (hyphens[i] && length >= mHyphenateLimitStart && i < endLimit) { 485 // Keep track of hyphen position and "countable" length of the word. 486 oldBreaks.AppendElement(BreakInfo{i + 1, length, aBreakState[i + 1]}); 487 aBreakState[i + 1] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN; 488 } 489 490 // If the character was outside the BMP, skip past the low surrogate. 491 if (!IS_IN_BMP(ch)) { 492 ++i; 493 } 494 } 495 496 if (length < mHyphenateLimitWord) { 497 // After discounting combining marks, punctuation, controls, etc., the word 498 // was too short for hyphenate-limit-chars. If we've set any hyphen breaks, 499 // forget them. 500 while (!oldBreaks.IsEmpty()) { 501 auto lastBreak = oldBreaks.PopLastElement(); 502 aBreakState[lastBreak.mPosition] = lastBreak.mState; 503 } 504 } else { 505 // Check if trailing fragment is too short; if so, remove the last hyphen 506 // break(s) that we set, until the fragment will be long enough. 507 while (!oldBreaks.IsEmpty()) { 508 auto lastBreak = oldBreaks.PopLastElement(); 509 if (length - lastBreak.mLength >= mHyphenateLimitEnd) { 510 break; 511 } 512 aBreakState[lastBreak.mPosition] = lastBreak.mState; 513 } 514 } 515 } 516 517 nsresult nsLineBreaker::AppendText(nsAtom* aHyphenationLanguage, 518 const uint8_t* aText, uint32_t aLength, 519 uint32_t aFlags, nsILineBreakSink* aSink) { 520 NS_ASSERTION(aLength > 0, "Appending empty text..."); 521 522 if (aFlags & (BREAK_NEED_CAPITALIZATION | BREAK_USE_AUTO_HYPHENATION)) { 523 // Defer to the Unicode path if capitalization or hyphenation is required 524 nsAutoString str; 525 const char* cp = reinterpret_cast<const char*>(aText); 526 CopyASCIItoUTF16(nsDependentCSubstring(cp, cp + aLength), str); 527 return AppendText(aHyphenationLanguage, str.get(), aLength, aFlags, aSink); 528 } 529 530 uint32_t offset = 0; 531 532 // Continue the current word 533 if (mCurrentWord.Length() > 0) { 534 NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere, 535 "These should not be set"); 536 537 while (offset < aLength && !IsSegmentSpace(aText[offset])) { 538 mCurrentWord.AppendElement(aText[offset]); 539 if (!mCurrentWordMightBeBreakable && 540 !IsNonBreakableChar<uint8_t>(aText[offset], mLegacyBehavior)) { 541 mCurrentWordMightBeBreakable = true; 542 } 543 ++offset; 544 } 545 546 if (offset > 0) { 547 mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags)); 548 } 549 550 if (offset == aLength) { 551 // We did not encounter whitespace so the word hasn't finished yet. 552 return NS_OK; 553 } 554 555 // We encountered whitespace, so we're done with this word 556 nsresult rv = FlushCurrentWord(); 557 if (NS_FAILED(rv)) { 558 return rv; 559 } 560 } 561 562 AutoTArray<uint8_t, 4000> breakState; 563 if (aSink) { 564 if (!breakState.AppendElements(aLength, mozilla::fallible)) { 565 return NS_ERROR_OUT_OF_MEMORY; 566 } 567 } 568 569 uint32_t start = offset; 570 bool noBreaksNeeded = 571 !aSink || ((aFlags & NO_BREAKS_NEEDED_FLAGS) == NO_BREAKS_NEEDED_FLAGS && 572 !mBreakHere && !mAfterBreakableSpace); 573 if (noBreaksNeeded) { 574 // Skip to the space before the last word, since either the break data 575 // here is not needed, or no breaks are set in the sink and there cannot 576 // be any breaks in this chunk; all we need is the context for the next 577 // chunk (if any) 578 offset = aLength; 579 while (offset > start) { 580 --offset; 581 if (IsSegmentSpace(aText[offset])) { 582 break; 583 } 584 } 585 } 586 uint32_t wordStart = offset; 587 bool wordMightBeBreakable = false; 588 589 for (;;) { 590 uint8_t ch = aText[offset]; 591 bool isSpace = IsSegmentSpace(ch); 592 bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE); 593 594 if (aSink) { 595 // Consider word-break style. Since the break position of CJK scripts 596 // will be set by nsILineBreaker, we don't consider CJK at this point. 597 breakState[offset] = 598 mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) || 599 mWordBreak == WordBreakRule::BreakAll || 600 mLineBreak == LineBreakRule::Anywhere 601 ? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL 602 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE; 603 } 604 mBreakHere = false; 605 mAfterBreakableSpace = isBreakableSpace; 606 607 if (isSpace) { 608 if (offset > wordStart && aSink && !(aFlags & BREAK_SUPPRESS_INSIDE)) { 609 if (mLineBreak == LineBreakRule::Anywhere) { 610 memset(breakState.Elements() + wordStart, 611 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL, 612 offset - wordStart); 613 } else if (wordMightBeBreakable) { 614 // Save current start-of-word state because ComputeBreakPositions() 615 // will set it to false. 616 AutoRestore<uint8_t> saveWordStartBreakState(breakState[wordStart]); 617 LineBreaker::ComputeBreakPositions( 618 aText + wordStart, offset - wordStart, mWordBreak, mLineBreak, 619 mScriptIsChineseOrJapanese, breakState.Elements() + wordStart); 620 } 621 } 622 623 wordMightBeBreakable = false; 624 mWordContinuation = false; 625 ++offset; 626 if (offset >= aLength) { 627 break; 628 } 629 wordStart = offset; 630 continue; 631 } 632 633 if (!wordMightBeBreakable && 634 !IsNonBreakableChar<uint8_t>(ch, mLegacyBehavior)) { 635 wordMightBeBreakable = true; 636 } 637 ++offset; 638 if (offset >= aLength) { 639 // Save this word 640 mCurrentWordMightBeBreakable = wordMightBeBreakable; 641 uint32_t len = offset - wordStart; 642 char16_t* elems = mCurrentWord.AppendElements(len, mozilla::fallible); 643 if (!elems) { 644 return NS_ERROR_OUT_OF_MEMORY; 645 } 646 uint32_t i; 647 for (i = wordStart; i < offset; ++i) { 648 elems[i - wordStart] = aText[i]; 649 } 650 mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags)); 651 // Ensure that the break-before for this word is written out 652 offset = wordStart + 1; 653 break; 654 } 655 } 656 657 if (!noBreaksNeeded) { 658 aSink->SetBreaks(start, offset - start, breakState.Elements() + start); 659 } 660 return NS_OK; 661 } 662 663 void nsLineBreaker::UpdateCurrentWordLanguage(nsAtom* aHyphenationLanguage) { 664 if (mCurrentWordLanguage && mCurrentWordLanguage != aHyphenationLanguage) { 665 mCurrentWordContainsMixedLang = true; 666 mScriptIsChineseOrJapanese = false; 667 return; 668 } 669 670 if (aHyphenationLanguage && !mCurrentWordLanguage) { 671 static mozilla::StaticRefPtr<nsAtom> sLastHyphenationLanguage; 672 static bool sLastScriptIsChineseOrJapanese = false; 673 static bool sInit = false; 674 675 if (!sInit) { 676 mozilla::ClearOnShutdown(&sLastHyphenationLanguage); 677 sInit = true; 678 } 679 680 if (sLastHyphenationLanguage == aHyphenationLanguage) { 681 MOZ_ASSERT(nsAtomString(sLastHyphenationLanguage) 682 .Equals(nsAtomString(aHyphenationLanguage))); 683 mScriptIsChineseOrJapanese = sLastScriptIsChineseOrJapanese; 684 } else { 685 Locale loc; 686 auto result = 687 LocaleParser::TryParse(nsAtomCString(aHyphenationLanguage), loc); 688 689 if (result.isErr()) { 690 return; 691 } 692 if (loc.Script().Missing() && loc.AddLikelySubtags().isErr()) { 693 return; 694 } 695 mScriptIsChineseOrJapanese = 696 loc.Script().EqualTo("Hans") || loc.Script().EqualTo("Hant") || 697 loc.Script().EqualTo("Jpan") || loc.Script().EqualTo("Hrkt"); 698 699 sLastHyphenationLanguage = aHyphenationLanguage; 700 sLastScriptIsChineseOrJapanese = mScriptIsChineseOrJapanese; 701 } 702 } 703 mCurrentWordLanguage = aHyphenationLanguage; 704 } 705 706 nsresult nsLineBreaker::AppendInvisibleWhitespace(uint32_t aFlags) { 707 nsresult rv = FlushCurrentWord(); 708 if (NS_FAILED(rv)) { 709 return rv; 710 } 711 712 bool isBreakableSpace = !(aFlags & BREAK_SUPPRESS_INSIDE); 713 if (mAfterBreakableSpace && !isBreakableSpace) { 714 mBreakHere = true; 715 } 716 mAfterBreakableSpace = isBreakableSpace; 717 mWordContinuation = false; 718 return NS_OK; 719 } 720 721 nsresult nsLineBreaker::Reset(bool* aTrailingBreak) { 722 nsresult rv = FlushCurrentWord(); 723 if (NS_FAILED(rv)) { 724 return rv; 725 } 726 727 *aTrailingBreak = mBreakHere || mAfterBreakableSpace; 728 mBreakHere = false; 729 mAfterBreakableSpace = false; 730 return NS_OK; 731 }