mozInlineSpellWordUtil.h (9059B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 #ifndef mozInlineSpellWordUtil_h 7 #define mozInlineSpellWordUtil_h 8 9 #include <utility> 10 11 #include "mozilla/Attributes.h" 12 #include "mozilla/Maybe.h" 13 #include "mozilla/RangeBoundary.h" 14 #include "mozilla/Result.h" 15 #include "mozilla/dom/Document.h" 16 #include "nsCOMPtr.h" 17 #include "nsString.h" 18 #include "nsTArray.h" 19 20 // #define DEBUG_SPELLCHECK 21 22 class nsRange; 23 class nsINode; 24 25 namespace mozilla { 26 class EditorBase; 27 28 namespace dom { 29 class Document; 30 } 31 } // namespace mozilla 32 33 struct NodeOffset { 34 nsCOMPtr<nsINode> mNode; 35 int32_t mOffset; 36 37 NodeOffset() : mOffset(0) {} 38 NodeOffset(nsINode* aNode, int32_t aOffset) 39 : mNode(aNode), mOffset(aOffset) {} 40 41 bool operator==(const NodeOffset& aOther) const { 42 return mNode == aOther.mNode && mOffset == aOther.mOffset; 43 } 44 45 bool operator==(const mozilla::RangeBoundary& aRangeBoundary) const; 46 47 bool operator!=(const NodeOffset& aOther) const { return !(*this == aOther); } 48 49 nsINode* Node() const { return mNode.get(); } 50 int32_t Offset() const { return mOffset; } 51 }; 52 53 class NodeOffsetRange { 54 private: 55 NodeOffset mBegin; 56 NodeOffset mEnd; 57 58 public: 59 NodeOffsetRange() {} 60 NodeOffsetRange(NodeOffset b, NodeOffset e) 61 : mBegin(std::move(b)), mEnd(std::move(e)) {} 62 63 bool operator==(const nsRange& aRange) const; 64 65 const NodeOffset& Begin() const { return mBegin; } 66 67 const NodeOffset& End() const { return mEnd; } 68 }; 69 70 /** 71 * This class extracts text from the DOM and builds it into a single string. 72 * The string includes whitespace breaks whereever non-inline elements begin 73 * and end. This string is broken into "real words", following somewhat 74 * complex rules; for example substrings that look like URLs or 75 * email addresses are treated as single words, but otherwise many kinds of 76 * punctuation are treated as word separators. GetNextWord provides a way 77 * to iterate over these "real words". 78 * 79 * The basic operation is: 80 * 81 * 1. Call Init with the editor that you're using. 82 * 2. Call SetPositionAndEnd to to initialize the current position inside the 83 * previously given range and set where you want to stop spellchecking. 84 * We'll stop at the word boundary after that. If SetEnd is not called, 85 * we'll stop at the end of the root element. 86 * 3. Call GetNextWord over and over until it returns false. 87 */ 88 89 class MOZ_STACK_CLASS mozInlineSpellWordUtil { 90 public: 91 static mozilla::Maybe<mozInlineSpellWordUtil> Create( 92 const mozilla::EditorBase& aEditorBase); 93 94 // sets the current position, this should be inside the range. If we are in 95 // the middle of a word, we'll move to its start. 96 nsresult SetPositionAndEnd(nsINode* aPositionNode, int32_t aPositionOffset, 97 nsINode* aEndNode, int32_t aEndOffset); 98 99 // Given a point inside or immediately following a word, this returns the 100 // DOM range that exactly encloses that word's characters. The current 101 // position will be at the end of the word. This will find the previous 102 // word if the current position is space, so if you care that the point is 103 // inside the word, you should check the range. 104 // 105 // THIS CHANGES THE CURRENT POSITION AND RANGE. It is designed to be called 106 // before you actually generate the range you are interested in and iterate 107 // the words in it. 108 nsresult GetRangeForWord(nsINode* aWordNode, int32_t aWordOffset, 109 nsRange** aRange); 110 111 // Convenience functions, object must be initialized 112 nsresult MakeRange(NodeOffset aBegin, NodeOffset aEnd, 113 nsRange** aRange) const; 114 static already_AddRefed<nsRange> MakeRange(const NodeOffsetRange& aRange); 115 116 struct Word { 117 nsAutoString mText; 118 NodeOffsetRange mNodeOffsetRange; 119 bool mSkipChecking = false; 120 }; 121 122 // Moves to the the next word in the range, and retrieves it's text and range. 123 // `false` is returned when we are done checking. 124 // mSkipChecking will be set if the word is "special" and shouldn't be 125 // checked (e.g., an email address). 126 bool GetNextWord(Word& aWord); 127 128 // Call to normalize some punctuation. This function takes an autostring 129 // so we can access characters directly. 130 static void NormalizeWord(nsAString& aWord); 131 132 mozilla::dom::Document* GetDocument() const { return mDocument; } 133 const nsINode* GetRootNode() const { return mRootNode; } 134 135 private: 136 // A list of where we extracted text from, ordered by mSoftTextOffset. A given 137 // DOM node appears at most once in this list. 138 struct DOMTextMapping { 139 NodeOffset mNodeOffset; 140 int32_t mSoftTextOffset; 141 int32_t mLength; 142 143 DOMTextMapping(NodeOffset aNodeOffset, int32_t aSoftTextOffset, 144 int32_t aLength) 145 : mNodeOffset(std::move(aNodeOffset)), 146 mSoftTextOffset(aSoftTextOffset), 147 mLength(aLength) {} 148 }; 149 150 struct SoftText { 151 void AdjustBeginAndBuildText(NodeOffset aBegin, NodeOffset aEnd, 152 const nsINode* aRootNode); 153 154 void Invalidate() { mIsValid = false; } 155 156 const NodeOffset& GetBegin() const { return mBegin; } 157 const NodeOffset& GetEnd() const { return mEnd; } 158 159 const nsTArray<DOMTextMapping>& GetDOMMapping() const { 160 return mDOMMapping; 161 } 162 163 const nsString& GetValue() const { return mValue; } 164 165 bool mIsValid = false; 166 167 private: 168 NodeOffset mBegin = NodeOffset(nullptr, 0); 169 NodeOffset mEnd = NodeOffset(nullptr, 0); 170 171 nsTArray<DOMTextMapping> mDOMMapping; 172 173 // DOM text covering the soft range, with newlines added at block boundaries 174 nsString mValue; 175 }; 176 177 SoftText mSoftText; 178 179 mozInlineSpellWordUtil(mozilla::dom::Document& aDocument, 180 bool aIsContentEditableOrDesignMode, nsINode& aRootNode 181 182 ) 183 : mDocument(&aDocument), 184 mIsContentEditableOrDesignMode(aIsContentEditableOrDesignMode), 185 mRootNode(&aRootNode), 186 mNextWordIndex(-1) {} 187 188 // cached stuff for the editor 189 const RefPtr<mozilla::dom::Document> mDocument; 190 const bool mIsContentEditableOrDesignMode; 191 192 // range to check, see SetPosition and SetEnd 193 const nsINode* mRootNode; 194 195 // A list of the "real words" in mSoftText.mValue, ordered by mSoftTextOffset 196 struct RealWord { 197 int32_t mSoftTextOffset; 198 uint32_t mLength : 31; 199 uint32_t mCheckableWord : 1; 200 201 RealWord(int32_t aOffset, uint32_t aLength, bool aCheckable) 202 : mSoftTextOffset(aOffset), 203 mLength(aLength), 204 mCheckableWord(aCheckable) { 205 static_assert(sizeof(RealWord) == 8, 206 "RealWord should be limited to 8 bytes"); 207 MOZ_ASSERT(aLength < INT32_MAX, 208 "Word length is too large to fit in the bitfield"); 209 } 210 211 int32_t EndOffset() const { return mSoftTextOffset + mLength; } 212 }; 213 using RealWords = nsTArray<RealWord>; 214 RealWords mRealWords; 215 int32_t mNextWordIndex; 216 217 nsresult EnsureWords(NodeOffset aSoftBegin, NodeOffset aSoftEnd); 218 219 int32_t MapDOMPositionToSoftTextOffset(const NodeOffset& aNodeOffset) const; 220 // Map an offset into mSoftText.mValue to a DOM position. Note that two DOM 221 // positions can map to the same mSoftText.mValue offset, e.g. given nodes 222 // A=aaaa and B=bbbb forming aaaabbbb, (A,4) and (B,0) give the same string 223 // offset. So, aHintBefore controls which position we return ... if aHint is 224 // eEnd then the position indicates the END of a range so we return (A,4). 225 // Otherwise the position indicates the START of a range so we return (B,0). 226 enum DOMMapHint { HINT_BEGIN, HINT_END }; 227 NodeOffset MapSoftTextOffsetToDOMPosition(int32_t aSoftTextOffset, 228 DOMMapHint aHint) const; 229 230 static void ToString(DOMMapHint aHint, nsACString& aResult); 231 232 // Finds the index of the real word containing aSoftTextOffset, or -1 if none. 233 // 234 // If it's exactly between two words, then if aHint is HINT_BEGIN, return the 235 // later word (favouring the assumption that it's the BEGINning of a word), 236 // otherwise return the earlier word (assuming it's the END of a word). 237 // If aSearchForward is true, then if we don't find a word at the given 238 // position, search forward until we do find a word and return that (if 239 // found). 240 int32_t FindRealWordContaining(int32_t aSoftTextOffset, DOMMapHint aHint, 241 bool aSearchForward) const; 242 243 mozilla::Result<RealWords, nsresult> BuildRealWords() const; 244 245 nsresult SplitDOMWordAndAppendTo(int32_t aStart, int32_t aEnd, 246 nsTArray<RealWord>& aRealWords) const; 247 248 nsresult MakeRangeForWord(const RealWord& aWord, nsRange** aRange) const; 249 void MakeNodeOffsetRangeForWord(const RealWord& aWord, 250 NodeOffsetRange* aNodeOffsetRange); 251 }; 252 253 #endif