nsScanner.h (5243B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 /** 7 * MODULE NOTES: 8 * @update gess 4/1/98 9 * 10 * The scanner is a low-level service class that knows 11 * how to consume characters out of an (internal) stream. 12 * This class also offers a series of utility methods 13 * that most tokenizers want, such as readUntil() 14 * and SkipWhitespace(). 15 */ 16 17 #ifndef SCANNER 18 #define SCANNER 19 20 #include "nsCharsetSource.h" 21 #include "nsCOMPtr.h" 22 #include "nsString.h" 23 #include "nsIParser.h" 24 #include "mozilla/Encoding.h" 25 #include "nsScannerString.h" 26 27 class nsReadEndCondition { 28 public: 29 const char16_t* mChars; 30 char16_t mFilter; 31 explicit nsReadEndCondition(const char16_t* aTerminateChars); 32 33 private: 34 nsReadEndCondition(const nsReadEndCondition& aOther); // No copying 35 void operator=(const nsReadEndCondition& aOther); // No assigning 36 }; 37 38 class nsScanner final { 39 using Encoding = mozilla::Encoding; 40 template <typename T> 41 using NotNull = mozilla::NotNull<T>; 42 43 public: 44 /** 45 * Use this constructor for the XML fragment parsing case 46 */ 47 nsScanner(const nsAString& anHTMLString, bool aIncremental); 48 49 /** 50 * Use this constructor if you want i/o to be based on 51 * a file (therefore a stream) or just data you provide via Append(). 52 */ 53 explicit nsScanner(nsIURI* aURI); 54 55 ~nsScanner(); 56 57 /** 58 * retrieve next char from internal input stream 59 * 60 * @update gess 3/25/98 61 * @param ch is the char to accept new value 62 * @return error code reflecting read status 63 */ 64 nsresult GetChar(char16_t& ch); 65 66 /** 67 * Records current offset position in input stream. This allows us 68 * to back up to this point if the need should arise, such as when 69 * tokenization gets interrupted. 70 * 71 * @update gess 5/12/98 72 * @param 73 * @return 74 */ 75 int32_t Mark(void); 76 77 /** 78 * Resets current offset position of input stream to marked position. 79 * This allows us to back up to this point if the need should arise, 80 * such as when tokenization gets interrupted. 81 * NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST! 82 * 83 * @update gess 5/12/98 84 * @param 85 * @return 86 */ 87 void RewindToMark(void); 88 89 /** 90 * 91 * 92 * @update harishd 01/12/99 93 * @param 94 * @return 95 */ 96 bool UngetReadable(const nsAString& aBuffer); 97 98 /** 99 * 100 * 101 * @update gess 5/13/98 102 * @param 103 * @return 104 */ 105 nsresult Append(const nsAString& aBuffer); 106 107 /** 108 * 109 * 110 * @update gess 5/21/98 111 * @param 112 * @return 113 */ 114 nsresult Append(const char* aBuffer, uint32_t aLen); 115 116 /** 117 * Call this to copy bytes out of the scanner that have not yet been consumed 118 * by the tokenization process. 119 * 120 * @update gess 5/12/98 121 * @param aCopyBuffer is where the scanner buffer will be copied to 122 * @return true if OK or false on OOM 123 */ 124 bool CopyUnusedData(nsString& aCopyBuffer); 125 126 /** 127 * Retrieve the URI of the file that the scanner is reading from. 128 * In some cases, it's just a given name, because the scanner isn't 129 * really reading from a file. 130 */ 131 nsIURI* GetURI(void) const { return mURI; } 132 133 static void SelfTest(); 134 135 /** 136 * Use this setter to change the scanner's unicode decoder 137 * 138 * @update ftang 3/02/99 139 * @param aCharset a normalized (alias resolved) charset name 140 * @param aCharsetSource- where the charset info came from 141 * @return 142 */ 143 nsresult SetDocumentCharset(NotNull<const Encoding*> aEncoding, 144 int32_t aSource); 145 146 void BindSubstring(nsScannerSubstring& aSubstring, 147 const nsScannerIterator& aStart, 148 const nsScannerIterator& aEnd); 149 void CurrentPosition(nsScannerIterator& aPosition); 150 void EndReading(nsScannerIterator& aPosition); 151 void SetPosition(nsScannerIterator& aPosition, bool aTruncate = false); 152 153 /** 154 * Internal method used to cause the internal buffer to 155 * be filled with data. 156 * 157 * @update gess4/3/98 158 */ 159 bool IsIncremental(void) { return mIncremental; } 160 void SetIncremental(bool anIncrValue) { mIncremental = anIncrValue; } 161 162 protected: 163 void AppendToBuffer(nsScannerString::Buffer* aBuffer); 164 bool AppendToBuffer(const nsAString& aStr) { 165 nsScannerString::Buffer* buf = nsScannerString::AllocBufferFromString(aStr); 166 if (!buf) return false; 167 AppendToBuffer(buf); 168 return true; 169 } 170 171 mozilla::UniquePtr<nsScannerString> mSlidingBuffer; 172 nsScannerIterator mCurrentPosition; // The position we will next read from in 173 // the scanner buffer 174 nsScannerIterator 175 mMarkPosition; // The position last marked (we may rewind to here) 176 nsScannerIterator mEndPosition; // The current end of the scanner buffer 177 nsCOMPtr<nsIURI> mURI; 178 bool mIncremental; 179 int32_t mCharsetSource = kCharsetUninitialized; 180 nsCString mCharset; 181 mozilla::UniquePtr<mozilla::Decoder> mUnicodeDecoder; 182 183 private: 184 nsScanner& operator=(const nsScanner&); // Not implemented. 185 }; 186 187 #endif