[ tor-browser ].git.dasho

StreamingLexer.h (32394B)
      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 /**
      8 * StreamingLexer is a lexing framework designed to make it simple to write
      9 * image decoders without worrying about the details of how the data is arriving
     10 * from the network.
     11 */
     12 
     13 #ifndef mozilla_image_StreamingLexer_h
     14 #define mozilla_image_StreamingLexer_h
     15 
     16 #include <algorithm>
     17 #include <cstdint>
     18 #include <utility>
     19 
     20 #include "SourceBuffer.h"
     21 #include "mozilla/Assertions.h"
     22 #include "mozilla/Attributes.h"
     23 #include "mozilla/Maybe.h"
     24 #include "mozilla/Variant.h"
     25 #include "mozilla/Vector.h"
     26 
     27 namespace mozilla {
     28 namespace image {
     29 
     30 /// Buffering behaviors for StreamingLexer transitions.
     31 enum class BufferingStrategy {
     32  BUFFERED,   // Data will be buffered and processed in one chunk.
     33  UNBUFFERED  // Data will be processed as it arrives, in multiple chunks.
     34 };
     35 
     36 /// Control flow behaviors for StreamingLexer transitions.
     37 enum class ControlFlowStrategy {
     38  CONTINUE,  // If there's enough data, proceed to the next state immediately.
     39  YIELD      // Yield to the caller before proceeding to the next state.
     40 };
     41 
     42 /// Possible terminal states for the lexer.
     43 enum class TerminalState { SUCCESS, FAILURE };
     44 
     45 /// Possible yield reasons for the lexer.
     46 enum class Yield {
     47  NEED_MORE_DATA,   // The lexer cannot continue without more data.
     48  OUTPUT_AVAILABLE  // There is output available for the caller to consume.
     49 };
     50 
     51 /// The result of a call to StreamingLexer::Lex().
     52 typedef Variant<TerminalState, Yield> LexerResult;
     53 
     54 /**
     55 * LexerTransition is a type used to give commands to the lexing framework.
     56 * Code that uses StreamingLexer can create LexerTransition values using the
     57 * static methods on Transition, and then return them to the lexing framework
     58 * for execution.
     59 */
     60 template <typename State>
     61 class LexerTransition {
     62 public:
     63  // This is implicit so that Terminate{Success,Failure}() can return a
     64  // TerminalState and have it implicitly converted to a
     65  // LexerTransition<State>, which avoids the need for a "<State>"
     66  // qualification to the Terminate{Success,Failure}() callsite.
     67  MOZ_IMPLICIT LexerTransition(TerminalState aFinalState)
     68      : mNextState(aFinalState) {}
     69 
     70  bool NextStateIsTerminal() const {
     71    return mNextState.template is<TerminalState>();
     72  }
     73 
     74  TerminalState NextStateAsTerminal() const {
     75    return mNextState.template as<TerminalState>();
     76  }
     77 
     78  State NextState() const {
     79    return mNextState.template as<NonTerminalState>().mState;
     80  }
     81 
     82  State UnbufferedState() const {
     83    return *mNextState.template as<NonTerminalState>().mUnbufferedState;
     84  }
     85 
     86  size_t Size() const {
     87    return mNextState.template as<NonTerminalState>().mSize;
     88  }
     89 
     90  BufferingStrategy Buffering() const {
     91    return mNextState.template as<NonTerminalState>().mBufferingStrategy;
     92  }
     93 
     94  ControlFlowStrategy ControlFlow() const {
     95    return mNextState.template as<NonTerminalState>().mControlFlowStrategy;
     96  }
     97 
     98 private:
     99  friend struct Transition;
    100 
    101  LexerTransition(State aNextState, const Maybe<State>& aUnbufferedState,
    102                  size_t aSize, BufferingStrategy aBufferingStrategy,
    103                  ControlFlowStrategy aControlFlowStrategy)
    104      : mNextState(NonTerminalState(aNextState, aUnbufferedState, aSize,
    105                                    aBufferingStrategy, aControlFlowStrategy)) {
    106  }
    107 
    108  struct NonTerminalState {
    109    State mState;
    110    Maybe<State> mUnbufferedState;
    111    size_t mSize;
    112    BufferingStrategy mBufferingStrategy;
    113    ControlFlowStrategy mControlFlowStrategy;
    114 
    115    NonTerminalState(State aState, const Maybe<State>& aUnbufferedState,
    116                     size_t aSize, BufferingStrategy aBufferingStrategy,
    117                     ControlFlowStrategy aControlFlowStrategy)
    118        : mState(aState),
    119          mUnbufferedState(aUnbufferedState),
    120          mSize(aSize),
    121          mBufferingStrategy(aBufferingStrategy),
    122          mControlFlowStrategy(aControlFlowStrategy) {
    123      MOZ_ASSERT_IF(mBufferingStrategy == BufferingStrategy::UNBUFFERED,
    124                    mUnbufferedState);
    125      MOZ_ASSERT_IF(mUnbufferedState,
    126                    mBufferingStrategy == BufferingStrategy::UNBUFFERED);
    127    }
    128  };
    129 
    130  Variant<NonTerminalState, TerminalState> mNextState;
    131 };
    132 
    133 struct Transition {
    134  /// Transition to @aNextState, buffering @aSize bytes of data.
    135  template <typename State>
    136  static LexerTransition<State> To(const State& aNextState, size_t aSize) {
    137    return LexerTransition<State>(aNextState, Nothing(), aSize,
    138                                  BufferingStrategy::BUFFERED,
    139                                  ControlFlowStrategy::CONTINUE);
    140  }
    141 
    142  /// Yield to the caller, transitioning to @aNextState when Lex() is next
    143  /// invoked. The same data that was delivered for the current state will be
    144  /// delivered again.
    145  template <typename State>
    146  static LexerTransition<State> ToAfterYield(const State& aNextState) {
    147    return LexerTransition<State>(aNextState, Nothing(), 0,
    148                                  BufferingStrategy::BUFFERED,
    149                                  ControlFlowStrategy::YIELD);
    150  }
    151 
    152  /**
    153   * Transition to @aNextState via @aUnbufferedState, reading @aSize bytes of
    154   * data unbuffered.
    155   *
    156   * The unbuffered data will be delivered in state @aUnbufferedState, which may
    157   * be invoked repeatedly until all @aSize bytes have been delivered. Then,
    158   * @aNextState will be invoked with no data. No state transitions are allowed
    159   * from @aUnbufferedState except for transitions to a terminal state, so
    160   * @aNextState will always be reached unless lexing terminates early.
    161   */
    162  template <typename State>
    163  static LexerTransition<State> ToUnbuffered(const State& aNextState,
    164                                             const State& aUnbufferedState,
    165                                             size_t aSize) {
    166    return LexerTransition<State>(aNextState, Some(aUnbufferedState), aSize,
    167                                  BufferingStrategy::UNBUFFERED,
    168                                  ControlFlowStrategy::CONTINUE);
    169  }
    170 
    171  /**
    172   * Continue receiving unbuffered data. @aUnbufferedState should be the same
    173   * state as the @aUnbufferedState specified in the preceding call to
    174   * ToUnbuffered().
    175   *
    176   * This should be used during an unbuffered read initiated by ToUnbuffered().
    177   */
    178  template <typename State>
    179  static LexerTransition<State> ContinueUnbuffered(
    180      const State& aUnbufferedState) {
    181    return LexerTransition<State>(aUnbufferedState, Nothing(), 0,
    182                                  BufferingStrategy::BUFFERED,
    183                                  ControlFlowStrategy::CONTINUE);
    184  }
    185 
    186  /**
    187   * Continue receiving unbuffered data. @aUnbufferedState should be the same
    188   * state as the @aUnbufferedState specified in the preceding call to
    189   * ToUnbuffered(). @aSize indicates the amount of data that has already been
    190   * consumed; the next state will receive the same data that was delivered to
    191   * the current state, without the first @aSize bytes.
    192   *
    193   * This should be used during an unbuffered read initiated by ToUnbuffered().
    194   */
    195  template <typename State>
    196  static LexerTransition<State> ContinueUnbufferedAfterYield(
    197      const State& aUnbufferedState, size_t aSize) {
    198    return LexerTransition<State>(aUnbufferedState, Nothing(), aSize,
    199                                  BufferingStrategy::BUFFERED,
    200                                  ControlFlowStrategy::YIELD);
    201  }
    202 
    203  /**
    204   * Terminate lexing, ending up in terminal state SUCCESS. (The implicit
    205   * LexerTransition constructor will convert the result to a LexerTransition
    206   * as needed.)
    207   *
    208   * No more data will be delivered after this function is used.
    209   */
    210  static TerminalState TerminateSuccess() { return TerminalState::SUCCESS; }
    211 
    212  /**
    213   * Terminate lexing, ending up in terminal state FAILURE. (The implicit
    214   * LexerTransition constructor will convert the result to a LexerTransition
    215   * as needed.)
    216   *
    217   * No more data will be delivered after this function is used.
    218   */
    219  static TerminalState TerminateFailure() { return TerminalState::FAILURE; }
    220 
    221 private:
    222  Transition();
    223 };
    224 
    225 /**
    226 * StreamingLexer is a lexing framework designed to make it simple to write
    227 * image decoders without worrying about the details of how the data is arriving
    228 * from the network.
    229 *
    230 * To use StreamingLexer:
    231 *
    232 *  - Create a State type. This should be an |enum class| listing all of the
    233 *    states that you can be in while lexing the image format you're trying to
    234 *    read.
    235 *
    236 *  - Add an instance of StreamingLexer<State> to your decoder class. Initialize
    237 *    it with a Transition::To() the state that you want to start lexing in, and
    238 *    a Transition::To() the state you'd like to use to handle truncated data.
    239 *
    240 *  - In your decoder's DoDecode() method, call Lex(), passing in the input
    241 *    data and length that are passed to DoDecode(). You also need to pass
    242 *    a lambda which dispatches to lexing code for each state based on the State
    243 *    value that's passed in. The lambda generally should just continue a
    244 *    |switch| statement that calls different methods for each State value. Each
    245 *    method should return a LexerTransition<State>, which the lambda should
    246 *    return in turn.
    247 *
    248 *  - Write the methods that actually implement lexing for your image format.
    249 *    These methods should return either Transition::To(), to move on to another
    250 *    state, or Transition::Terminate{Success,Failure}(), if lexing has
    251 *    terminated in either success or failure. (There are also additional
    252 *    transitions for unbuffered reads; see below.)
    253 *
    254 * That's the basics. The StreamingLexer will track your position in the input
    255 * and buffer enough data so that your lexing methods can process everything in
    256 * one pass. Lex() returns Yield::NEED_MORE_DATA if more data is needed, in
    257 * which case you should just return from DoDecode(). If lexing reaches a
    258 * terminal state, Lex() returns TerminalState::SUCCESS or
    259 * TerminalState::FAILURE, and you can check which one to determine if lexing
    260 * succeeded or failed and do any necessary cleanup.
    261 *
    262 * Sometimes, the input data is truncated. StreamingLexer will notify you when
    263 * this happens by invoking the truncated data state you passed to the
    264 * constructor. At this point you can attempt to recover and return
    265 * TerminalState::SUCCESS or TerminalState::FAILURE, depending on whether you
    266 * were successful. Note that you can't return anything other than a terminal
    267 * state in this situation, since there's no more data to read. For the same
    268 * reason, your truncated data state shouldn't require any data. (That is, the
    269 * @aSize argument you pass to Transition::To() must be zero.) Violating these
    270 * requirements will trigger assertions and an immediate transition to
    271 * TerminalState::FAILURE.
    272 *
    273 * Some lexers may want to *avoid* buffering in some cases, and just process the
    274 * data as it comes in. This is useful if, for example, you just want to skip
    275 * over a large section of data; there's no point in buffering data you're just
    276 * going to ignore.
    277 *
    278 * You can begin an unbuffered read with Transition::ToUnbuffered(). This works
    279 * a little differently than Transition::To() in that you specify *two* states.
    280 * The @aUnbufferedState argument specifies a state that will be called
    281 * repeatedly with unbuffered data, as soon as it arrives. The implementation
    282 * for that state should return either a transition to a terminal state, or a
    283 * Transition::ContinueUnbuffered() to the same @aUnbufferedState. (From a
    284 * technical perspective, it's not necessary to specify the state again, but
    285 * it's helpful to human readers.) Once the amount of data requested in the
    286 * original call to Transition::ToUnbuffered() has been delivered, Lex() will
    287 * transition to the @aNextState state specified via Transition::ToUnbuffered().
    288 * That state will be invoked with *no* data; it's just called to signal that
    289 * the unbuffered read is over.
    290 *
    291 * It's sometimes useful for a lexer to provide incremental results, rather
    292 * than simply running to completion and presenting all its output at once. For
    293 * example, when decoding animated images, it may be useful to produce each
    294 * frame incrementally. StreamingLexer supports this by allowing a lexer to
    295 * yield.
    296 *
    297 * To yield back to the caller, a state implementation can simply return
    298 * Transition::ToAfterYield(). ToAfterYield()'s @aNextState argument specifies
    299 * the next state that the lexer should transition to, just like when using
    300 * Transition::To(), but there are two differences. One is that Lex() will
    301 * return to the caller before processing any more data when it encounters a
    302 * yield transition. This provides an opportunity for the caller to interact
    303 * with the lexer's intermediate results. The second difference is that
    304 * @aNextState will be called with *the same data as the state that you returned
    305 * Transition::ToAfterYield() from*. This allows a lexer to partially consume
    306 * the data, return intermediate results, and then finish consuming the data
    307 * when @aNextState is called.
    308 *
    309 * It's also possible to yield during an unbuffered read. Just return a
    310 * Transition::ContinueUnbufferedAfterYield(). Just like with
    311 * Transition::ContinueUnbuffered(), the @aUnbufferedState must be the same as
    312 * the one originally passed to Transition::ToUnbuffered(). The second argument,
    313 * @aSize, specifies the amount of data that the lexer has already consumed.
    314 * When @aUnbufferedState is next invoked, it will get the same data that it
    315 * received previously, except that the first @aSize bytes will be excluded.
    316 * This makes it easy to consume unbuffered data incrementally.
    317 *
    318 * XXX(seth): We should be able to get of the |State| stuff totally once bug
    319 * 1198451 lands, since we can then just return a function representing the next
    320 * state directly.
    321 */
    322 template <typename State, size_t InlineBufferSize = 16>
    323 class StreamingLexer {
    324 public:
    325  StreamingLexer(const LexerTransition<State>& aStartState,
    326                 const LexerTransition<State>& aTruncatedState)
    327      : mTransition(TerminalState::FAILURE),
    328        mTruncatedTransition(aTruncatedState) {
    329    if (!aStartState.NextStateIsTerminal() &&
    330        aStartState.ControlFlow() == ControlFlowStrategy::YIELD) {
    331      // Allowing a StreamingLexer to start in a yield state doesn't make sense
    332      // semantically (since yield states are supposed to deliver the same data
    333      // as previous states, and there's no previous state here), but more
    334      // importantly, it's necessary to advance a SourceBufferIterator at least
    335      // once before you can read from it, and adding the necessary checks to
    336      // Lex() to avoid that issue has the potential to mask real bugs. So
    337      // instead, it's better to forbid starting in a yield state.
    338      MOZ_ASSERT_UNREACHABLE("Starting in a yield state");
    339      return;
    340    }
    341 
    342    if (!aTruncatedState.NextStateIsTerminal() &&
    343        (aTruncatedState.ControlFlow() == ControlFlowStrategy::YIELD ||
    344         aTruncatedState.Buffering() == BufferingStrategy::UNBUFFERED ||
    345         aTruncatedState.Size() != 0)) {
    346      // The truncated state can't receive any data because, by definition,
    347      // there is no more data to receive. That means that yielding or an
    348      // unbuffered read would not make sense, and that the state must require
    349      // zero bytes.
    350      MOZ_ASSERT_UNREACHABLE("Truncated state makes no sense");
    351      return;
    352    }
    353 
    354    SetTransition(aStartState);
    355  }
    356 
    357  /**
    358   * From the given SourceBufferIterator, aIterator, create a new iterator at
    359   * the same position, with the given read limit, aReadLimit. The read limit
    360   * applies after adjusting for the position. If the given iterator has been
    361   * advanced, but required buffering inside StreamingLexer, the position
    362   * of the cloned iterator will be at the beginning of buffered data; this
    363   * should match the perspective of the caller.
    364   */
    365  Maybe<SourceBufferIterator> Clone(SourceBufferIterator& aIterator,
    366                                    size_t aReadLimit) const {
    367    // In order to advance to the current position of the iterator from the
    368    // perspective of the caller, we need to take into account if we are
    369    // buffering data.
    370    size_t pos = aIterator.Position();
    371    if (!mBuffer.empty()) {
    372      pos += aIterator.Length();
    373      MOZ_ASSERT(pos > mBuffer.length());
    374      pos -= mBuffer.length();
    375    }
    376 
    377    size_t readLimit = aReadLimit;
    378    if (aReadLimit != SIZE_MAX) {
    379      readLimit += pos;
    380    }
    381 
    382    SourceBufferIterator other = aIterator.Owner()->Iterator(readLimit);
    383 
    384    // Since the current iterator has already advanced to this point, we
    385    // know that the state can only be READY or COMPLETE. That does not mean
    386    // everything is stored in a single chunk, and may require multiple Advance
    387    // calls to get where we want to be.
    388    SourceBufferIterator::State state;
    389    do {
    390      state = other.Advance(pos);
    391      if (state != SourceBufferIterator::READY) {
    392        // The only way we should fail to advance over data we already seen is
    393        // if we hit an error while inserting data into the buffer. This will
    394        // cause an early exit.
    395        MOZ_ASSERT(NS_FAILED(other.CompletionStatus()));
    396        return Nothing();
    397      }
    398      MOZ_ASSERT(pos >= other.Length());
    399      pos -= other.Length();
    400    } while (pos > 0);
    401 
    402    // Force the data pointer to be where we expect it to be.
    403    state = other.Advance(0);
    404    if (state != SourceBufferIterator::READY) {
    405      // The current position could be the end of the buffer, in which case
    406      // there is no point cloning with no more data to read.
    407      MOZ_ASSERT(state == SourceBufferIterator::COMPLETE);
    408      return Nothing();
    409    }
    410    return Some(std::move(other));
    411  }
    412 
    413  template <typename Func>
    414  LexerResult Lex(SourceBufferIterator& aIterator, IResumable* aOnResume,
    415                  Func aFunc) {
    416    if (mTransition.NextStateIsTerminal()) {
    417      // We've already reached a terminal state. We never deliver any more data
    418      // in this case; just return the terminal state again immediately.
    419      return LexerResult(mTransition.NextStateAsTerminal());
    420    }
    421 
    422    Maybe<LexerResult> result;
    423 
    424    // If the lexer requested a yield last time, we deliver the same data again
    425    // before we read anything else from |aIterator|. Note that although to the
    426    // callers of Lex(), Yield::NEED_MORE_DATA is just another type of yield,
    427    // internally they're different in that we don't redeliver the same data in
    428    // the Yield::NEED_MORE_DATA case, and |mYieldingToState| is not set. This
    429    // means that for Yield::NEED_MORE_DATA, we go directly to the loop below.
    430    if (mYieldingToState) {
    431      result = mTransition.Buffering() == BufferingStrategy::UNBUFFERED
    432                   ? UnbufferedReadAfterYield(aIterator, aFunc)
    433                   : BufferedReadAfterYield(aIterator, aFunc);
    434    }
    435 
    436    while (!result) {
    437      MOZ_ASSERT_IF(mTransition.Buffering() == BufferingStrategy::UNBUFFERED,
    438                    mUnbufferedState);
    439 
    440      // Figure out how much we need to read.
    441      const size_t toRead =
    442          mTransition.Buffering() == BufferingStrategy::UNBUFFERED
    443              ? mUnbufferedState->mBytesRemaining
    444              : mTransition.Size() - mBuffer.length();
    445 
    446      // Attempt to advance the iterator by |toRead| bytes.
    447      switch (aIterator.AdvanceOrScheduleResume(toRead, aOnResume)) {
    448        case SourceBufferIterator::WAITING:
    449          // We can't continue because the rest of the data hasn't arrived from
    450          // the network yet. We don't have to do anything special; the
    451          // SourceBufferIterator will ensure that |aOnResume| gets called when
    452          // more data is available.
    453          result = Some(LexerResult(Yield::NEED_MORE_DATA));
    454          break;
    455 
    456        case SourceBufferIterator::COMPLETE:
    457          // The data is truncated; if not, the lexer would've reached a
    458          // terminal state by now. We only get to
    459          // SourceBufferIterator::COMPLETE after every byte of data has been
    460          // delivered to the lexer.
    461          result = Truncated(aIterator, aFunc);
    462          break;
    463 
    464        case SourceBufferIterator::READY:
    465          // Process the new data that became available.
    466          MOZ_ASSERT(aIterator.Data());
    467 
    468          result = mTransition.Buffering() == BufferingStrategy::UNBUFFERED
    469                       ? UnbufferedRead(aIterator, aFunc)
    470                       : BufferedRead(aIterator, aFunc);
    471          break;
    472 
    473        default:
    474          MOZ_ASSERT_UNREACHABLE("Unknown SourceBufferIterator state");
    475          result = SetTransition(Transition::TerminateFailure());
    476      }
    477    };
    478 
    479    return *result;
    480  }
    481 
    482 private:
    483  template <typename Func>
    484  Maybe<LexerResult> UnbufferedRead(SourceBufferIterator& aIterator,
    485                                    Func aFunc) {
    486    MOZ_ASSERT(mTransition.Buffering() == BufferingStrategy::UNBUFFERED);
    487    MOZ_ASSERT(mUnbufferedState);
    488    MOZ_ASSERT(!mYieldingToState);
    489    MOZ_ASSERT(mBuffer.empty(),
    490               "Buffered read at the same time as unbuffered read?");
    491    MOZ_ASSERT(aIterator.Length() <= mUnbufferedState->mBytesRemaining,
    492               "Read too much data during unbuffered read?");
    493    MOZ_ASSERT(mUnbufferedState->mBytesConsumedInCurrentChunk == 0,
    494               "Already consumed data in the current chunk, but not yielding?");
    495 
    496    if (mUnbufferedState->mBytesRemaining == 0) {
    497      // We're done with the unbuffered read, so transition to the next state.
    498      return SetTransition(aFunc(mTransition.NextState(), nullptr, 0));
    499    }
    500 
    501    return ContinueUnbufferedRead(aIterator.Data(), aIterator.Length(),
    502                                  aIterator.Length(), aFunc);
    503  }
    504 
    505  template <typename Func>
    506  Maybe<LexerResult> UnbufferedReadAfterYield(SourceBufferIterator& aIterator,
    507                                              Func aFunc) {
    508    MOZ_ASSERT(mTransition.Buffering() == BufferingStrategy::UNBUFFERED);
    509    MOZ_ASSERT(mUnbufferedState);
    510    MOZ_ASSERT(mYieldingToState);
    511    MOZ_ASSERT(mBuffer.empty(),
    512               "Buffered read at the same time as unbuffered read?");
    513    MOZ_ASSERT(aIterator.Length() <= mUnbufferedState->mBytesRemaining,
    514               "Read too much data during unbuffered read?");
    515    MOZ_ASSERT(
    516        mUnbufferedState->mBytesConsumedInCurrentChunk <= aIterator.Length(),
    517        "Consumed more data than the current chunk holds?");
    518    MOZ_ASSERT(mTransition.UnbufferedState() == *mYieldingToState);
    519 
    520    mYieldingToState = Nothing();
    521 
    522    if (mUnbufferedState->mBytesRemaining == 0) {
    523      // We're done with the unbuffered read, so transition to the next state.
    524      return SetTransition(aFunc(mTransition.NextState(), nullptr, 0));
    525    }
    526 
    527    // Since we've yielded, we may have already consumed some data in this
    528    // chunk. Make the necessary adjustments. (Note that the std::min call is
    529    // just belt-and-suspenders to keep this code memory safe even if there's
    530    // a bug somewhere.)
    531    const size_t toSkip = std::min(
    532        mUnbufferedState->mBytesConsumedInCurrentChunk, aIterator.Length());
    533    const char* data = aIterator.Data() + toSkip;
    534    const size_t length = aIterator.Length() - toSkip;
    535 
    536    // If |length| is zero, we've hit the end of the current chunk. This only
    537    // happens if we yield right at the end of a chunk. Rather than call |aFunc|
    538    // with a |length| of zero bytes (which seems potentially surprising to
    539    // decoder authors), we go ahead and read more data.
    540    if (length == 0) {
    541      return FinishCurrentChunkOfUnbufferedRead(aIterator.Length());
    542    }
    543 
    544    return ContinueUnbufferedRead(data, length, aIterator.Length(), aFunc);
    545  }
    546 
    547  template <typename Func>
    548  Maybe<LexerResult> ContinueUnbufferedRead(const char* aData, size_t aLength,
    549                                            size_t aChunkLength, Func aFunc) {
    550    // Call aFunc with the unbuffered state to indicate that we're in the
    551    // middle of an unbuffered read. We enforce that any state transition
    552    // passed back to us is either a terminal state or takes us back to the
    553    // unbuffered state.
    554    LexerTransition<State> unbufferedTransition =
    555        aFunc(mTransition.UnbufferedState(), aData, aLength);
    556 
    557    // If we reached a terminal state, we're done.
    558    if (unbufferedTransition.NextStateIsTerminal()) {
    559      return SetTransition(unbufferedTransition);
    560    }
    561 
    562    MOZ_ASSERT(mTransition.UnbufferedState() ==
    563               unbufferedTransition.NextState());
    564 
    565    // Perform bookkeeping.
    566    if (unbufferedTransition.ControlFlow() == ControlFlowStrategy::YIELD) {
    567      mUnbufferedState->mBytesConsumedInCurrentChunk +=
    568          unbufferedTransition.Size();
    569      return SetTransition(unbufferedTransition);
    570    }
    571 
    572    MOZ_ASSERT(unbufferedTransition.Size() == 0);
    573    return FinishCurrentChunkOfUnbufferedRead(aChunkLength);
    574  }
    575 
    576  Maybe<LexerResult> FinishCurrentChunkOfUnbufferedRead(size_t aChunkLength) {
    577    // We've finished an unbuffered read of a chunk of length |aChunkLength|, so
    578    // update |myBytesRemaining| to reflect that we're |aChunkLength| closer to
    579    // the end of the unbuffered read. (The std::min call is just
    580    // belt-and-suspenders to keep this code memory safe even if there's a bug
    581    // somewhere.)
    582    mUnbufferedState->mBytesRemaining -=
    583        std::min(mUnbufferedState->mBytesRemaining, aChunkLength);
    584 
    585    // Since we're moving on to a new chunk, we can forget about the count of
    586    // bytes consumed by yielding in the current chunk.
    587    mUnbufferedState->mBytesConsumedInCurrentChunk = 0;
    588 
    589    return Nothing();  // Keep processing.
    590  }
    591 
    592  template <typename Func>
    593  Maybe<LexerResult> BufferedRead(SourceBufferIterator& aIterator, Func aFunc) {
    594    MOZ_ASSERT(mTransition.Buffering() == BufferingStrategy::BUFFERED);
    595    MOZ_ASSERT(!mYieldingToState);
    596    MOZ_ASSERT(!mUnbufferedState,
    597               "Buffered read at the same time as unbuffered read?");
    598    MOZ_ASSERT(mBuffer.length() < mTransition.Size() ||
    599                   (mBuffer.length() == 0 && mTransition.Size() == 0),
    600               "Buffered more than we needed?");
    601 
    602    // If we have all the data, we don't actually need to buffer anything.
    603    if (mBuffer.empty() && aIterator.Length() == mTransition.Size()) {
    604      return SetTransition(
    605          aFunc(mTransition.NextState(), aIterator.Data(), aIterator.Length()));
    606    }
    607 
    608    // We do need to buffer, so make sure the buffer has enough capacity. We
    609    // deliberately wait until we know for sure we need to buffer to call
    610    // reserve() since it could require memory allocation.
    611    if (!mBuffer.reserve(mTransition.Size())) {
    612      return SetTransition(Transition::TerminateFailure());
    613    }
    614 
    615    // Append the new data we just got to the buffer.
    616    if (!mBuffer.append(aIterator.Data(), aIterator.Length())) {
    617      return SetTransition(Transition::TerminateFailure());
    618    }
    619 
    620    if (mBuffer.length() != mTransition.Size()) {
    621      return Nothing();  // Keep processing.
    622    }
    623 
    624    // We've buffered everything, so transition to the next state.
    625    return SetTransition(
    626        aFunc(mTransition.NextState(), mBuffer.begin(), mBuffer.length()));
    627  }
    628 
    629  template <typename Func>
    630  Maybe<LexerResult> BufferedReadAfterYield(SourceBufferIterator& aIterator,
    631                                            Func aFunc) {
    632    MOZ_ASSERT(mTransition.Buffering() == BufferingStrategy::BUFFERED);
    633    MOZ_ASSERT(mYieldingToState);
    634    MOZ_ASSERT(!mUnbufferedState,
    635               "Buffered read at the same time as unbuffered read?");
    636    MOZ_ASSERT(mBuffer.length() <= mTransition.Size(),
    637               "Buffered more than we needed?");
    638 
    639    State nextState = std::move(*mYieldingToState);
    640 
    641    // After a yield, we need to take the same data that we delivered to the
    642    // last state, and deliver it again to the new state. We know that this is
    643    // happening right at a state transition, and that the last state was a
    644    // buffered read, so there are two cases:
    645 
    646    // 1. We got the data from the SourceBufferIterator directly.
    647    if (mBuffer.empty() && aIterator.Length() == mTransition.Size()) {
    648      return SetTransition(
    649          aFunc(nextState, aIterator.Data(), aIterator.Length()));
    650    }
    651 
    652    // 2. We got the data from the buffer.
    653    if (mBuffer.length() == mTransition.Size()) {
    654      return SetTransition(aFunc(nextState, mBuffer.begin(), mBuffer.length()));
    655    }
    656 
    657    // Anything else indicates a bug.
    658    MOZ_ASSERT_UNREACHABLE("Unexpected state encountered during yield");
    659    return SetTransition(Transition::TerminateFailure());
    660  }
    661 
    662  template <typename Func>
    663  Maybe<LexerResult> Truncated(SourceBufferIterator& aIterator, Func aFunc) {
    664    // The data is truncated. Let the lexer clean up and decide which terminal
    665    // state we should end up in.
    666    LexerTransition<State> transition =
    667        mTruncatedTransition.NextStateIsTerminal()
    668            ? mTruncatedTransition
    669            : aFunc(mTruncatedTransition.NextState(), nullptr, 0);
    670 
    671    if (!transition.NextStateIsTerminal()) {
    672      MOZ_ASSERT_UNREACHABLE("Truncated state didn't lead to terminal state?");
    673      return SetTransition(Transition::TerminateFailure());
    674    }
    675 
    676    // If the SourceBuffer was completed with a failing state, we end in
    677    // TerminalState::FAILURE no matter what. This only happens in exceptional
    678    // situations like SourceBuffer itself encountering a failure due to OOM.
    679    if (NS_FAILED(aIterator.CompletionStatus())) {
    680      return SetTransition(Transition::TerminateFailure());
    681    }
    682 
    683    return SetTransition(transition);
    684  }
    685 
    686  Maybe<LexerResult> SetTransition(const LexerTransition<State>& aTransition) {
    687    // There should be no transitions while we're buffering for a buffered read
    688    // unless they're to terminal states. (The terminal state transitions would
    689    // generally be triggered by error handling code.)
    690    MOZ_ASSERT_IF(!mBuffer.empty(), aTransition.NextStateIsTerminal() ||
    691                                        mBuffer.length() == mTransition.Size());
    692 
    693    // Similarly, the only transitions allowed in the middle of an unbuffered
    694    // read are to a terminal state, or a yield to the same state. Otherwise, we
    695    // should remain in the same state until the unbuffered read completes.
    696    MOZ_ASSERT_IF(
    697        mUnbufferedState,
    698        aTransition.NextStateIsTerminal() ||
    699            (aTransition.ControlFlow() == ControlFlowStrategy::YIELD &&
    700             aTransition.NextState() == mTransition.UnbufferedState()) ||
    701            mUnbufferedState->mBytesRemaining == 0);
    702 
    703    // If this transition is a yield, save the next state and return. We'll
    704    // handle the rest when Lex() gets called again.
    705    if (!aTransition.NextStateIsTerminal() &&
    706        aTransition.ControlFlow() == ControlFlowStrategy::YIELD) {
    707      mYieldingToState = Some(aTransition.NextState());
    708      return Some(LexerResult(Yield::OUTPUT_AVAILABLE));
    709    }
    710 
    711    // Update our transition.
    712    mTransition = aTransition;
    713 
    714    // Get rid of anything left over from the previous state.
    715    mBuffer.clear();
    716    mYieldingToState = Nothing();
    717    mUnbufferedState = Nothing();
    718 
    719    // If we reached a terminal state, let the caller know.
    720    if (mTransition.NextStateIsTerminal()) {
    721      return Some(LexerResult(mTransition.NextStateAsTerminal()));
    722    }
    723 
    724    // If we're entering an unbuffered state, record how long we'll stay in it.
    725    if (mTransition.Buffering() == BufferingStrategy::UNBUFFERED) {
    726      mUnbufferedState.emplace(mTransition.Size());
    727    }
    728 
    729    return Nothing();  // Keep processing.
    730  }
    731 
    732  // State that tracks our position within an unbuffered read.
    733  struct UnbufferedState {
    734    explicit UnbufferedState(size_t aBytesRemaining)
    735        : mBytesRemaining(aBytesRemaining), mBytesConsumedInCurrentChunk(0) {}
    736 
    737    size_t mBytesRemaining;
    738    size_t mBytesConsumedInCurrentChunk;
    739  };
    740 
    741  Vector<char, InlineBufferSize> mBuffer;
    742  LexerTransition<State> mTransition;
    743  const LexerTransition<State> mTruncatedTransition;
    744  Maybe<State> mYieldingToState;
    745  Maybe<UnbufferedState> mUnbufferedState;
    746 };
    747 
    748 }  // namespace image
    749 }  // namespace mozilla
    750 
    751 #endif  // mozilla_image_StreamingLexer_h
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE