WordBreaker.h (2306B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 #ifndef mozilla_intl_WordBreaker_h__ 6 #define mozilla_intl_WordBreaker_h__ 7 8 #include "nsStringFwd.h" 9 #include <cstdint> 10 11 #define NS_WORDBREAKER_NEED_MORE_TEXT -1 12 13 namespace mozilla { 14 namespace intl { 15 16 struct WordRange { 17 uint32_t mBegin; 18 uint32_t mEnd; 19 }; 20 21 class WordBreaker final { 22 public: 23 // WordBreaker is a utility class with only static methods. No need to 24 // instantiate it. 25 WordBreaker() = delete; 26 ~WordBreaker() = delete; 27 28 // Find the word boundary by scanning forward and backward from aPos. 29 // 30 // @return WordRange where mBegin equals to the offset to first character in 31 // the word and mEnd equals to the offset to the last character plus 1. mEnd 32 // can be aText.Lengh() if the desired word is at the end of aText. 33 // 34 // If aPos is already at the end of aText or beyond, both mBegin and mEnd 35 // equals to aText.Length(). 36 // 37 // If setting StopAtPunctuation, even if using UAX#29 word segmenter rule, 38 // there will be break opportunities on characters with punctuation class. 39 enum class FindWordOptions { None, StopAtPunctuation }; 40 41 static WordRange FindWord( 42 const nsAString& aText, uint32_t aPos, 43 const FindWordOptions aOptions = FindWordOptions::None); 44 45 // Find the next word break opportunity starting from aPos + 1. It can return 46 // aLen if there's no break opportunity between [aPos + 1, aLen - 1]. 47 // 48 // If aPos is already at the end of aText or beyond, i.e. aPos >= aLen, return 49 // NS_WORDBREAKER_NEED_MORE_TEXT. 50 // 51 // DEPRECATED: Use WordBreakIteratorUtf16 instead. 52 static int32_t Next(const char16_t* aText, uint32_t aLen, uint32_t aPos); 53 54 private: 55 enum WordBreakClass : uint8_t { 56 kWbClassSpace = 0, 57 kWbClassAlphaLetter, 58 kWbClassPunct, 59 kWbClassHanLetter, 60 kWbClassKatakanaLetter, 61 kWbClassHiraganaLetter, 62 kWbClassHWKatakanaLetter, 63 kWbClassScriptioContinua 64 }; 65 66 static WordBreakClass GetClass(char16_t aChar); 67 }; 68 69 } // namespace intl 70 } // namespace mozilla 71 72 #endif /* mozilla_intl_WordBreaker_h__ */