gecko-dev/image/StreamingLexer.h

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/**
 * StreamingLexer is a lexing framework designed to make it simple to write
 * image decoders without worrying about the details of how the data is arriving
 * from the network.
 */

#ifndef mozilla_image_StreamingLexer_h
#define mozilla_image_StreamingLexer_h

#include <algorithm>
#include <cstdint>
#include "mozilla/Assertions.h"
#include "mozilla/Attributes.h"
#include "mozilla/Maybe.h"
#include "mozilla/Variant.h"
#include "mozilla/Vector.h"

namespace mozilla {
namespace image {

/// Buffering behaviors for StreamingLexer transitions.
enum class BufferingStrategy
{
  BUFFERED,   // Data will be buffered and processed in one chunk.
  UNBUFFERED  // Data will be processed as it arrives, in multiple chunks.
};

/// Possible terminal states for the lexer.
enum class TerminalState
{
  SUCCESS,
  FAILURE
};

/// Possible yield reasons for the lexer.
enum class Yield
{
  NEED_MORE_DATA  // The lexer cannot continue without more data.
};

/// The result of a call to StreamingLexer::Lex().
typedef Variant<TerminalState, Yield> LexerResult;

/**
 * LexerTransition is a type used to give commands to the lexing framework.
 * Code that uses StreamingLexer can create LexerTransition values using the
 * static methods on Transition, and then return them to the lexing framework
 * for execution.
 */
template <typename State>
class LexerTransition
{
public:
  // This is implicit so that Terminate{Success,Failure}() can return a
  // TerminalState and have it implicitly converted to a
  // LexerTransition<State>, which avoids the need for a "<State>"
  // qualification to the Terminate{Success,Failure}() callsite.
  MOZ_IMPLICIT LexerTransition(TerminalState aFinalState)
    : mNextState(aFinalState)
  {}

  bool NextStateIsTerminal() const
  {
    return mNextState.template is<TerminalState>();
  }

  TerminalState NextStateAsTerminal() const
  {
    return mNextState.template as<TerminalState>();
  }

  State NextState() const
  {
    return mNextState.template as<NonTerminalState>().mState;
  }

  State UnbufferedState() const
  {
    return *mNextState.template as<NonTerminalState>().mUnbufferedState;
  }

  size_t Size() const
  {
    return mNextState.template as<NonTerminalState>().mSize;
  }

  BufferingStrategy Buffering() const
  {
    return mNextState.template as<NonTerminalState>().mBufferingStrategy;
  }

private:
  friend struct Transition;

  LexerTransition(State aNextState,
                  const Maybe<State>& aUnbufferedState,
                  size_t aSize,
                  BufferingStrategy aBufferingStrategy)
    : mNextState(NonTerminalState(aNextState, aUnbufferedState, aSize,
                                  aBufferingStrategy))
  {}

  struct NonTerminalState
  {
    State mState;
    Maybe<State> mUnbufferedState;
    size_t mSize;
    BufferingStrategy mBufferingStrategy;

    NonTerminalState(State aState,
                     const Maybe<State>& aUnbufferedState,
                     size_t aSize,
                     BufferingStrategy aBufferingStrategy)
      : mState(aState)
      , mUnbufferedState(aUnbufferedState)
      , mSize(aSize)
      , mBufferingStrategy(aBufferingStrategy)
    {
      MOZ_ASSERT_IF(mBufferingStrategy == BufferingStrategy::UNBUFFERED,
                    mUnbufferedState);
      MOZ_ASSERT_IF(mUnbufferedState,
                    mBufferingStrategy == BufferingStrategy::UNBUFFERED);
    }
  };
  Variant<NonTerminalState, TerminalState> mNextState;
};

struct Transition
{
  /// Transition to @aNextState, buffering @aSize bytes of data.
  template <typename State>
  static LexerTransition<State>
  To(const State& aNextState, size_t aSize)
  {
    return LexerTransition<State>(aNextState, Nothing(), aSize,
                                  BufferingStrategy::BUFFERED);
  }

  /**
   * Transition to @aNextState via @aUnbufferedState, reading @aSize bytes of
   * data unbuffered.
   *
   * The unbuffered data will be delivered in state @aUnbufferedState, which may
   * be invoked repeatedly until all @aSize bytes have been delivered. Then,
   * @aNextState will be invoked with no data. No state transitions are allowed
   * from @aUnbufferedState except for transitions to a terminal state, so
   * @aNextState will always be reached unless lexing terminates early.
   */
  template <typename State>
  static LexerTransition<State>
  ToUnbuffered(const State& aNextState,
               const State& aUnbufferedState,
               size_t aSize)
  {
    return LexerTransition<State>(aNextState, Some(aUnbufferedState), aSize,
                                  BufferingStrategy::UNBUFFERED);
  }

  /**
   * Continue receiving unbuffered data. @aUnbufferedState should be the same
   * state as the @aUnbufferedState specified in the preceding call to
   * ToUnbuffered().
   *
   * This should be used during an unbuffered read initiated by ToUnbuffered().
   */
  template <typename State>
  static LexerTransition<State>
  ContinueUnbuffered(const State& aUnbufferedState)
  {
    return LexerTransition<State>(aUnbufferedState, Nothing(), 0,
                                  BufferingStrategy::BUFFERED);
  }

  /**
   * Terminate lexing, ending up in terminal state SUCCESS. (The implicit
   * LexerTransition constructor will convert the result to a LexerTransition
   * as needed.)
   *
   * No more data will be delivered after this function is used.
   */
  static TerminalState
  TerminateSuccess()
  {
    return TerminalState::SUCCESS;
  }

  /**
   * Terminate lexing, ending up in terminal state FAILURE. (The implicit
   * LexerTransition constructor will convert the result to a LexerTransition
   * as needed.)
   *
   * No more data will be delivered after this function is used.
   */
  static TerminalState
  TerminateFailure()
  {
    return TerminalState::FAILURE;
  }

private:
  Transition();
};

/**
 * StreamingLexer is a lexing framework designed to make it simple to write
 * image decoders without worrying about the details of how the data is arriving
 * from the network.
 *
 * To use StreamingLexer:
 *
 *  - Create a State type. This should be an |enum class| listing all of the
 *    states that you can be in while lexing the image format you're trying to
 *    read.
 *
 *  - Add an instance of StreamingLexer<State> to your decoder class. Initialize
 *    it with a Transition::To() the state that you want to start lexing in.
 *
 *  - In your decoder's DoDecode() method, call Lex(), passing in the input
 *    data and length that are passed to DoDecode(). You also need to pass
 *    a lambda which dispatches to lexing code for each state based on the State
 *    value that's passed in. The lambda generally should just continue a
 *    |switch| statement that calls different methods for each State value. Each
 *    method should return a LexerTransition<State>, which the lambda should
 *    return in turn.
 *
 *  - Write the methods that actually implement lexing for your image format.
 *    These methods should return either Transition::To(), to move on to another
 *    state, or Transition::Terminate{Success,Failure}(), if lexing has
 *    terminated in either success or failure. (There are also additional
 *    transitions for unbuffered reads; see below.)
 *
 * That's the basics. The StreamingLexer will track your position in the input
 * and buffer enough data so that your lexing methods can process everything in
 * one pass. Lex() returns Yield::NEED_MORE_DATA if more data is needed, in
 * which case you should just return from DoDecode(). If lexing reaches a
 * terminal state, Lex() returns TerminalState::SUCCESS or
 * TerminalState::FAILURE, and you can check which one to determine if lexing
 * succeeded or failed and do any necessary cleanup.
 *
 * There's one more wrinkle: some lexers may want to *avoid* buffering in some
 * cases, and just process the data as it comes in. This is useful if, for
 * example, you just want to skip over a large section of data; there's no point
 * in buffering data you're just going to ignore.
 *
 * You can begin an unbuffered read with Transition::ToUnbuffered(). This works
 * a little differently than Transition::To() in that you specify *two* states.
 * The @aUnbufferedState argument specifies a state that will be called
 * repeatedly with unbuffered data, as soon as it arrives. The implementation
 * for that state should return either a transition to a terminal state, or
 * Transition::ContinueUnbuffered(). Once the amount of data requested in the
 * original call to Transition::ToUnbuffered() has been delivered, Lex() will
 * transition to the @aNextState state specified via Transition::ToUnbuffered().
 * That state will be invoked with *no* data; it's just called to signal that
 * the unbuffered read is over.
 *
 * XXX(seth): We should be able to get of the |State| stuff totally once bug
 * 1198451 lands, since we can then just return a function representing the next
 * state directly.
 */
template <typename State, size_t InlineBufferSize = 16>
class StreamingLexer
{
public:
  explicit StreamingLexer(LexerTransition<State> aStartState)
    : mTransition(TerminalState::FAILURE)
    , mToReadUnbuffered(0)
  {
    SetTransition(aStartState);
  }

  template <typename Func>
  LexerResult Lex(SourceBufferIterator& aIterator,
                  IResumable* aOnResume,
                  Func aFunc)
  {
    if (mTransition.NextStateIsTerminal()) {
      // We've already reached a terminal state. We never deliver any more data
      // in this case; just return the terminal state again immediately.
      return LexerResult(mTransition.NextStateAsTerminal());
    }

    Maybe<LexerResult> result;
    do {
      // Figure out how much we need to read.
      const size_t toRead = mTransition.Buffering() == BufferingStrategy::UNBUFFERED
                          ? mToReadUnbuffered
                          : mTransition.Size() - mBuffer.length();

      // Attempt to advance the iterator by |toRead| bytes.
      switch (aIterator.AdvanceOrScheduleResume(toRead, aOnResume)) {
        case SourceBufferIterator::WAITING:
          // We can't continue because the rest of the data hasn't arrived from
          // the network yet. We don't have to do anything special; the
          // SourceBufferIterator will ensure that |aOnResume| gets called when
          // more data is available.
          result = Some(LexerResult(Yield::NEED_MORE_DATA));
          break;

        case SourceBufferIterator::COMPLETE:
          // Normally even if the data is truncated, we want decoding to
          // succeed so we can display whatever we got. However, if the
          // SourceBuffer was completed with a failing status, we want to fail.
          // This happens only in exceptional situations like SourceBuffer
          // itself encountering a failure due to OOM.
          result = SetTransition(NS_SUCCEEDED(aIterator.CompletionStatus())
                 ? Transition::TerminateSuccess()
                 : Transition::TerminateFailure());
          break;

        case SourceBufferIterator::READY:
          // Process the new data that became available.
          MOZ_ASSERT(aIterator.Data());

          result = mTransition.Buffering() == BufferingStrategy::UNBUFFERED
                 ? UnbufferedRead(aIterator, aFunc)
                 : BufferedRead(aIterator, aFunc);
          break;

        default:
          MOZ_ASSERT_UNREACHABLE("Unknown SourceBufferIterator state");
          result = SetTransition(Transition::TerminateFailure());
      }
    } while (!result);

    return *result;
  }

private:
  template <typename Func>
  Maybe<LexerResult> UnbufferedRead(SourceBufferIterator& aIterator, Func aFunc)
  {
    MOZ_ASSERT(mTransition.Buffering() == BufferingStrategy::UNBUFFERED);
    MOZ_ASSERT(mBuffer.empty(),
               "Buffered read at the same time as unbuffered read?");

    if (mToReadUnbuffered > 0) {
      // Call aFunc with the unbuffered state to indicate that we're in the
      // middle of an unbuffered read. We enforce that any state transition
      // passed back to us is either a terminal state or takes us back to the
      // unbuffered state.
      LexerTransition<State> unbufferedTransition =
        aFunc(mTransition.UnbufferedState(), aIterator.Data(), aIterator.Length());
      if (unbufferedTransition.NextStateIsTerminal()) {
        return SetTransition(unbufferedTransition);
      }

      MOZ_ASSERT(mTransition.UnbufferedState() ==
                   unbufferedTransition.NextState());

      mToReadUnbuffered -= aIterator.Length();
      if (mToReadUnbuffered != 0) {
        return Nothing();  // Keep processing.
      }
    }

    // We're done with the unbuffered read, so transition to the next state.
    return SetTransition(aFunc(mTransition.NextState(), nullptr, 0));
  }

  template <typename Func>
  Maybe<LexerResult> BufferedRead(SourceBufferIterator& aIterator, Func aFunc)
  {
    MOZ_ASSERT(mTransition.Buffering() == BufferingStrategy::BUFFERED);
    MOZ_ASSERT(mToReadUnbuffered == 0,
               "Buffered read at the same time as unbuffered read?");
    MOZ_ASSERT(mBuffer.length() < mTransition.Size() ||
               (mBuffer.length() == 0 && mTransition.Size() == 0),
               "Buffered more than we needed?");

    // If we have all the data, we don't actually need to buffer anything.
    if (mBuffer.empty() && aIterator.Length() == mTransition.Size()) {
      return SetTransition(aFunc(mTransition.NextState(),
                                 aIterator.Data(),
                                 aIterator.Length()));
    }

    // We do need to buffer, so make sure the buffer has enough capacity. We
    // deliberately wait until we know for sure we need to buffer to call
    // reserve() since it could require memory allocation.
    if (!mBuffer.reserve(mTransition.Size())) {
      return SetTransition(Transition::TerminateFailure());
    }

    // Append the new data we just got to the buffer.
    if (!mBuffer.append(aIterator.Data(), aIterator.Length())) {
      return SetTransition(Transition::TerminateFailure());
    }

    if (mBuffer.length() != mTransition.Size()) {
      return Nothing();  // Keep processing.
    }

    // We've buffered everything, so transition to the next state.
    return SetTransition(aFunc(mTransition.NextState(),
                               mBuffer.begin(),
                               mBuffer.length()));
  }

  Maybe<LexerResult> SetTransition(const LexerTransition<State>& aTransition)
  {
    mTransition = aTransition;

    // Get rid of anything left over from the previous state.
    mBuffer.clear();
    mToReadUnbuffered = 0;

    // If we reached a terminal state, let the caller know.
    if (mTransition.NextStateIsTerminal()) {
      return Some(LexerResult(mTransition.NextStateAsTerminal()));
    }

    // If we're entering an unbuffered state, record how long we'll stay in it.
    if (mTransition.Buffering() == BufferingStrategy::UNBUFFERED) {
      mToReadUnbuffered = mTransition.Size();
    }

    return Nothing();  // Keep processing.
  }

  Vector<char, InlineBufferSize> mBuffer;
  LexerTransition<State> mTransition;
  size_t mToReadUnbuffered;
};

} // namespace image
} // namespace mozilla

#endif // mozilla_image_StreamingLexer_h