tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

mozInlineSpellWordUtil.h (9059B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 #ifndef mozInlineSpellWordUtil_h
      7 #define mozInlineSpellWordUtil_h
      8 
      9 #include <utility>
     10 
     11 #include "mozilla/Attributes.h"
     12 #include "mozilla/Maybe.h"
     13 #include "mozilla/RangeBoundary.h"
     14 #include "mozilla/Result.h"
     15 #include "mozilla/dom/Document.h"
     16 #include "nsCOMPtr.h"
     17 #include "nsString.h"
     18 #include "nsTArray.h"
     19 
     20 // #define DEBUG_SPELLCHECK
     21 
     22 class nsRange;
     23 class nsINode;
     24 
     25 namespace mozilla {
     26 class EditorBase;
     27 
     28 namespace dom {
     29 class Document;
     30 }
     31 }  // namespace mozilla
     32 
     33 struct NodeOffset {
     34  nsCOMPtr<nsINode> mNode;
     35  int32_t mOffset;
     36 
     37  NodeOffset() : mOffset(0) {}
     38  NodeOffset(nsINode* aNode, int32_t aOffset)
     39      : mNode(aNode), mOffset(aOffset) {}
     40 
     41  bool operator==(const NodeOffset& aOther) const {
     42    return mNode == aOther.mNode && mOffset == aOther.mOffset;
     43  }
     44 
     45  bool operator==(const mozilla::RangeBoundary& aRangeBoundary) const;
     46 
     47  bool operator!=(const NodeOffset& aOther) const { return !(*this == aOther); }
     48 
     49  nsINode* Node() const { return mNode.get(); }
     50  int32_t Offset() const { return mOffset; }
     51 };
     52 
     53 class NodeOffsetRange {
     54 private:
     55  NodeOffset mBegin;
     56  NodeOffset mEnd;
     57 
     58 public:
     59  NodeOffsetRange() {}
     60  NodeOffsetRange(NodeOffset b, NodeOffset e)
     61      : mBegin(std::move(b)), mEnd(std::move(e)) {}
     62 
     63  bool operator==(const nsRange& aRange) const;
     64 
     65  const NodeOffset& Begin() const { return mBegin; }
     66 
     67  const NodeOffset& End() const { return mEnd; }
     68 };
     69 
     70 /**
     71 *    This class extracts text from the DOM and builds it into a single string.
     72 *    The string includes whitespace breaks whereever non-inline elements begin
     73 *    and end. This string is broken into "real words", following somewhat
     74 *    complex rules; for example substrings that look like URLs or
     75 *    email addresses are treated as single words, but otherwise many kinds of
     76 *    punctuation are treated as word separators. GetNextWord provides a way
     77 *    to iterate over these "real words".
     78 *
     79 *    The basic operation is:
     80 *
     81 *    1. Call Init with the editor that you're using.
     82 *    2. Call SetPositionAndEnd to to initialize the current position inside the
     83 *       previously given range and set where you want to stop spellchecking.
     84 *       We'll stop at the word boundary after that. If SetEnd is not called,
     85 *       we'll stop at the end of the root element.
     86 *    3. Call GetNextWord over and over until it returns false.
     87 */
     88 
     89 class MOZ_STACK_CLASS mozInlineSpellWordUtil {
     90 public:
     91  static mozilla::Maybe<mozInlineSpellWordUtil> Create(
     92      const mozilla::EditorBase& aEditorBase);
     93 
     94  // sets the current position, this should be inside the range. If we are in
     95  // the middle of a word, we'll move to its start.
     96  nsresult SetPositionAndEnd(nsINode* aPositionNode, int32_t aPositionOffset,
     97                             nsINode* aEndNode, int32_t aEndOffset);
     98 
     99  // Given a point inside or immediately following a word, this returns the
    100  // DOM range that exactly encloses that word's characters. The current
    101  // position will be at the end of the word. This will find the previous
    102  // word if the current position is space, so if you care that the point is
    103  // inside the word, you should check the range.
    104  //
    105  // THIS CHANGES THE CURRENT POSITION AND RANGE. It is designed to be called
    106  // before you actually generate the range you are interested in and iterate
    107  // the words in it.
    108  nsresult GetRangeForWord(nsINode* aWordNode, int32_t aWordOffset,
    109                           nsRange** aRange);
    110 
    111  // Convenience functions, object must be initialized
    112  nsresult MakeRange(NodeOffset aBegin, NodeOffset aEnd,
    113                     nsRange** aRange) const;
    114  static already_AddRefed<nsRange> MakeRange(const NodeOffsetRange& aRange);
    115 
    116  struct Word {
    117    nsAutoString mText;
    118    NodeOffsetRange mNodeOffsetRange;
    119    bool mSkipChecking = false;
    120  };
    121 
    122  // Moves to the the next word in the range, and retrieves it's text and range.
    123  // `false` is returned when we are done checking.
    124  // mSkipChecking will be set if the word is "special" and shouldn't be
    125  // checked (e.g., an email address).
    126  bool GetNextWord(Word& aWord);
    127 
    128  // Call to normalize some punctuation. This function takes an autostring
    129  // so we can access characters directly.
    130  static void NormalizeWord(nsAString& aWord);
    131 
    132  mozilla::dom::Document* GetDocument() const { return mDocument; }
    133  const nsINode* GetRootNode() const { return mRootNode; }
    134 
    135 private:
    136  // A list of where we extracted text from, ordered by mSoftTextOffset. A given
    137  // DOM node appears at most once in this list.
    138  struct DOMTextMapping {
    139    NodeOffset mNodeOffset;
    140    int32_t mSoftTextOffset;
    141    int32_t mLength;
    142 
    143    DOMTextMapping(NodeOffset aNodeOffset, int32_t aSoftTextOffset,
    144                   int32_t aLength)
    145        : mNodeOffset(std::move(aNodeOffset)),
    146          mSoftTextOffset(aSoftTextOffset),
    147          mLength(aLength) {}
    148  };
    149 
    150  struct SoftText {
    151    void AdjustBeginAndBuildText(NodeOffset aBegin, NodeOffset aEnd,
    152                                 const nsINode* aRootNode);
    153 
    154    void Invalidate() { mIsValid = false; }
    155 
    156    const NodeOffset& GetBegin() const { return mBegin; }
    157    const NodeOffset& GetEnd() const { return mEnd; }
    158 
    159    const nsTArray<DOMTextMapping>& GetDOMMapping() const {
    160      return mDOMMapping;
    161    }
    162 
    163    const nsString& GetValue() const { return mValue; }
    164 
    165    bool mIsValid = false;
    166 
    167   private:
    168    NodeOffset mBegin = NodeOffset(nullptr, 0);
    169    NodeOffset mEnd = NodeOffset(nullptr, 0);
    170 
    171    nsTArray<DOMTextMapping> mDOMMapping;
    172 
    173    // DOM text covering the soft range, with newlines added at block boundaries
    174    nsString mValue;
    175  };
    176 
    177  SoftText mSoftText;
    178 
    179  mozInlineSpellWordUtil(mozilla::dom::Document& aDocument,
    180                         bool aIsContentEditableOrDesignMode, nsINode& aRootNode
    181 
    182                         )
    183      : mDocument(&aDocument),
    184        mIsContentEditableOrDesignMode(aIsContentEditableOrDesignMode),
    185        mRootNode(&aRootNode),
    186        mNextWordIndex(-1) {}
    187 
    188  // cached stuff for the editor
    189  const RefPtr<mozilla::dom::Document> mDocument;
    190  const bool mIsContentEditableOrDesignMode;
    191 
    192  // range to check, see SetPosition and SetEnd
    193  const nsINode* mRootNode;
    194 
    195  // A list of the "real words" in mSoftText.mValue, ordered by mSoftTextOffset
    196  struct RealWord {
    197    int32_t mSoftTextOffset;
    198    uint32_t mLength : 31;
    199    uint32_t mCheckableWord : 1;
    200 
    201    RealWord(int32_t aOffset, uint32_t aLength, bool aCheckable)
    202        : mSoftTextOffset(aOffset),
    203          mLength(aLength),
    204          mCheckableWord(aCheckable) {
    205      static_assert(sizeof(RealWord) == 8,
    206                    "RealWord should be limited to 8 bytes");
    207      MOZ_ASSERT(aLength < INT32_MAX,
    208                 "Word length is too large to fit in the bitfield");
    209    }
    210 
    211    int32_t EndOffset() const { return mSoftTextOffset + mLength; }
    212  };
    213  using RealWords = nsTArray<RealWord>;
    214  RealWords mRealWords;
    215  int32_t mNextWordIndex;
    216 
    217  nsresult EnsureWords(NodeOffset aSoftBegin, NodeOffset aSoftEnd);
    218 
    219  int32_t MapDOMPositionToSoftTextOffset(const NodeOffset& aNodeOffset) const;
    220  // Map an offset into mSoftText.mValue to a DOM position. Note that two DOM
    221  // positions can map to the same mSoftText.mValue offset, e.g. given nodes
    222  // A=aaaa and B=bbbb forming aaaabbbb, (A,4) and (B,0) give the same string
    223  // offset. So, aHintBefore controls which position we return ... if aHint is
    224  // eEnd then the position indicates the END of a range so we return (A,4).
    225  // Otherwise the position indicates the START of a range so we return (B,0).
    226  enum DOMMapHint { HINT_BEGIN, HINT_END };
    227  NodeOffset MapSoftTextOffsetToDOMPosition(int32_t aSoftTextOffset,
    228                                            DOMMapHint aHint) const;
    229 
    230  static void ToString(DOMMapHint aHint, nsACString& aResult);
    231 
    232  // Finds the index of the real word containing aSoftTextOffset, or -1 if none.
    233  //
    234  // If it's exactly between two words, then if aHint is HINT_BEGIN, return the
    235  // later word (favouring the assumption that it's the BEGINning of a word),
    236  // otherwise return the earlier word (assuming it's the END of a word).
    237  // If aSearchForward is true, then if we don't find a word at the given
    238  // position, search forward until we do find a word and return that (if
    239  // found).
    240  int32_t FindRealWordContaining(int32_t aSoftTextOffset, DOMMapHint aHint,
    241                                 bool aSearchForward) const;
    242 
    243  mozilla::Result<RealWords, nsresult> BuildRealWords() const;
    244 
    245  nsresult SplitDOMWordAndAppendTo(int32_t aStart, int32_t aEnd,
    246                                   nsTArray<RealWord>& aRealWords) const;
    247 
    248  nsresult MakeRangeForWord(const RealWord& aWord, nsRange** aRange) const;
    249  void MakeNodeOffsetRangeForWord(const RealWord& aWord,
    250                                  NodeOffsetRange* aNodeOffsetRange);
    251 };
    252 
    253 #endif