gecko-dev/parser/html/nsHtml5Highlighter.h

/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef nsHtml5Highlighter_h
#define nsHtml5Highlighter_h

#include "nsCOMPtr.h"
#include "nsHtml5TreeOperation.h"
#include "nsHtml5UTF16Buffer.h"
#include "nsHtml5TreeOperation.h"
#include "nsAHtml5TreeOpSink.h"

#define NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH 512

/**
 * A state machine for generating HTML for display in View Source based on
 * the transitions the tokenizer makes on the source being viewed.
 */
class nsHtml5Highlighter {
 public:
  /**
   * The constructor.
   *
   * @param aOpSink the sink for the tree ops generated by this highlighter
   */
  explicit nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink);

  /**
   * The destructor.
   */
  ~nsHtml5Highlighter();

  /**
   * Set the op sink (for speculation).
   */
  void SetOpSink(nsAHtml5TreeOpSink* aOpSink);

  /**
   * Reset state to after generated head but before processing any of the input
   * stream.
   */
  void Rewind();

  /**
   * Starts the generated document.
   */
  void Start(const nsAutoString& aTitle);

  /**
   * Updates the charset source via the op queue.
   */
  void UpdateCharsetSource(nsCharsetSource aCharsetSource);

  /**
   * Report a tokenizer state transition.
   *
   * @param aState the state being transitioned to
   * @param aReconsume whether this is a reconsuming transition
   * @param aPos the tokenizer's current position into the buffer
   */
  int32_t Transition(int32_t aState, bool aReconsume, int32_t aPos);

  /**
   * Report end of file.
   *
   * Returns `true` normally and `false` on OOM.
   */
  [[nodiscard]] bool End();

  /**
   * Set the current buffer being tokenized
   */
  void SetBuffer(nsHtml5UTF16Buffer* aBuffer);

  /**
   * Let go of the buffer being tokenized but first, flush text from it.
   *
   * @param aPos the first UTF-16 code unit not to flush
   */
  void DropBuffer(int32_t aPos);

  /**
   * Query whether there are some many ops in the queue
   * that they should be flushed now.
   *
   * @return true if FlushOps() should be called now
   */
  bool ShouldFlushOps();

  /**
   * Flush the tree ops into the sink.
   *
   * @return Ok(true) if there were ops to flush, Ok(false)
   *         if there were no ops to flush and Err() on OOM.
   */
  mozilla::Result<bool, nsresult> FlushOps();

  /**
   * Linkify the current attribute value if the attribute name is one of
   * known URL attributes. (When executing tree ops, javascript: URLs will
   * not be linkified, though.)
   *
   * @param aName the name of the attribute
   * @param aValue the value of the attribute
   */
  void MaybeLinkifyAttributeValue(nsHtml5AttributeName* aName,
                                  nsHtml5String aValue);

  /**
   * Inform the highlighter that the tokenizer successfully completed a
   * named character reference.
   */
  void CompletedNamedCharacterReference();

  /**
   * Adds an error annotation to the node that's currently on top of
   * mStack.
   *
   * @param aMsgId the id of the message in the property file
   */
  void AddErrorToCurrentNode(const char* aMsgId);

  /**
   * Adds an error annotation to the node that corresponds to the most
   * recently opened markup declaration/tag span, character reference or
   * run of text.
   *
   * @param aMsgId the id of the message in the property file
   */
  void AddErrorToCurrentRun(const char* aMsgId);

  /**
   * Adds an error annotation to the node that corresponds to the most
   * recently opened markup declaration/tag span, character reference or
   * run of text with one atom to use when formatting the message.
   *
   * @param aMsgId the id of the message in the property file
   * @param aName the atom
   */
  void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName);

  /**
   * Adds an error annotation to the node that corresponds to the most
   * recently opened markup declaration/tag span, character reference or
   * run of text with two atoms to use when formatting the message.
   *
   * @param aMsgId the id of the message in the property file
   * @param aName the first atom
   * @param aOther the second atom
   */
  void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName, nsAtom* aOther);

  /**
   * Adds an error annotation to the node that corresponds to the most
   * recent potentially character reference-starting ampersand.
   *
   * @param aMsgId the id of the message in the property file
   */
  void AddErrorToCurrentAmpersand(const char* aMsgId);

  /**
   * Adds an error annotation to the node that corresponds to the most
   * recent potentially self-closing slash.
   *
   * @param aMsgId the id of the message in the property file
   */
  void AddErrorToCurrentSlash(const char* aMsgId);

  /**
   * Enqueues a tree op for adding base to the urls with the view-source:
   *
   * @param aValue the base URL to add
   */
  void AddBase(nsHtml5String aValue);

  /**
   * Starts a wrapper around a run of characters.
   */
  void StartCharacters();

 private:
  /**
   * Starts a span with no class.
   */
  void StartSpan();

  /**
   * Starts a <span> and sets the class attribute on it.
   *
   * @param aClass the class to set (MUST be a static string that does not
   *        need to be released!)
   */
  void StartSpan(const char16_t* aClass);

  /**
   * End the current <span> or <a> in the highlighter output.
   */
  void EndSpanOrA();

  /**
   * Ends a wrapper around a run of characters.
   */
  void EndCharactersAndStartMarkupRun();

  /**
   * Starts an <a>.
   */
  void StartA();

  /**
   * Flushes characters up to but not including the current one.
   */
  void FlushChars();

  /**
   * Flushes characters up to and including the current one.
   */
  void FlushCurrent();

  /**
   * Finishes highlighting a tag in the input data by closing the open
   * <span> and <a> elements in the highlighter output and then starts
   * another <span> for potentially highlighting characters potentially
   * appearing next.
   */
  void FinishTag();

  /**
   * Adds a class attribute to the current node.
   *
   * @param aClass the class to set (MUST be a static string that does not
   *        need to be released!)
   */
  void AddClass(const char16_t* aClass);

  /**
   * Allocates a handle for an element.
   *
   * See the documentation for nsHtml5TreeBuilder::AllocateContentHandle()
   * in nsHtml5TreeBuilderHSupplement.h.
   *
   * @return the handle
   */
  nsIContent** AllocateContentHandle();

  /**
   * Enqueues an element creation tree operation.
   *
   * @param aName the name of the element
   * @param aAttributes the attribute holder (ownership will be taken) or
   *        nullptr for no attributes
   * @param aIntendedParent the intended parent node for the created element
   * @param aCreator the content creator function
   * @return the handle for the element that will be created
   */
  nsIContent** CreateElement(nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
                             nsIContent** aIntendedParent,
                             mozilla::dom::HTMLContentCreatorFunction aCreator);

  /**
   * Gets the handle for the current node. May be called only after the
   * root element has been set.
   *
   * @return the handle for the current node
   */
  nsIContent** CurrentNode();

  /**
   * Create an element and push it (its handle) on the stack.
   *
   * @param aName the name of the element
   * @param aAttributes the attribute holder (ownership will be taken) or
   *        nullptr for no attributes
   * @param aCreator the content creator function
   */
  void Push(nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
            mozilla::dom::HTMLContentCreatorFunction aCreator);

  /**
   * Pops the current node off the stack.
   */
  void Pop();

  /**
   * Appends text content to the current node.
   *
   * @param aBuffer the buffer to copy from
   * @param aStart the index of the first code unit to copy
   * @param aLength the number of code units to copy
   */
  void AppendCharacters(const char16_t* aBuffer, int32_t aStart,
                        int32_t aLength);

  /**
   * Enqueues a tree op for adding an href attribute with the view-source:
   * URL scheme to the current node.
   *
   * @param aValue the (potentially relative) URL to link to
   */
  void AddViewSourceHref(nsHtml5String aValue);

  /**
   * The state we are transitioning away from.
   */
  int32_t mState;

  /**
   * The index of the first UTF-16 code unit in mBuffer that hasn't been
   * flushed yet.
   */
  int32_t mCStart;

  /**
   * The position of the code unit in mBuffer that caused the current
   * transition.
   */
  int32_t mPos;

  /**
   * The current line number.
   */
  int32_t mLineNumber;

  /**
   * The number of inline elements open inside the <pre> excluding the
   * span potentially wrapping a run of characters.
   */
  int32_t mInlinesOpen;

  /**
   * Whether there's a span wrapping a run of characters (excluding CDATA
   * section) open.
   */
  bool mInCharacters;

  /**
   * The current buffer being tokenized.
   */
  nsHtml5UTF16Buffer* mBuffer;

  /**
   * The outgoing tree op queue.
   */
  nsTArray<nsHtml5TreeOperation> mOpQueue;

  /**
   * The tree op stage for the tree op executor or a speculation when looking
   * for meta charset.
   *
   * The op sink is owned by the nsHtml5TreeOpExecutor, which outlives this
   * object, because this object is owned by the nsHtml5Tokenizer instance that
   * is owned by the nsHtml5StreamParser, which keeps the executor alive via
   * nsHtml5Streamparser::mExecutorFlusher.
   */
  nsAHtml5TreeOpSink* mOpSink;

  /**
   * The most recently opened markup declaration/tag or run of characters.
   */
  nsIContent** mCurrentRun;

  /**
   * The most recent ampersand in a place where character references were
   * allowed.
   */
  nsIContent** mAmpersand;

  /**
   * The most recent slash that might become a self-closing slash.
   */
  nsIContent** mSlash;

  /**
   * Memory for element handles.
   */
  mozilla::UniquePtr<nsIContent*[]> mHandles;

  /**
   * Number of handles used in mHandles
   */
  int32_t mHandlesUsed;

  /**
   * A holder for old contents of mHandles
   */
  nsTArray<mozilla::UniquePtr<nsIContent*[]>> mOldHandles;

  /**
   * The element stack.
   */
  nsTArray<nsIContent**> mStack;

  /**
   * The string "comment"
   */
  static char16_t sComment[];

  /**
   * The string "cdata"
   */
  static char16_t sCdata[];

  /**
   * The string "start-tag"
   */
  static char16_t sStartTag[];

  /**
   * The string "attribute-name"
   */
  static char16_t sAttributeName[];

  /**
   * The string "attribute-value"
   */
  static char16_t sAttributeValue[];

  /**
   * The string "end-tag"
   */
  static char16_t sEndTag[];

  /**
   * The string "doctype"
   */
  static char16_t sDoctype[];

  /**
   * The string "entity"
   */
  static char16_t sEntity[];

  /**
   * The string "pi"
   */
  static char16_t sPi[];

  /**
   * Whether base is already visited once.
   */
  bool mSeenBase;
};

#endif  // nsHtml5Highlighter_h