gecko-dev/parser/html/nsHtml5Highlighter.h

434 lines
11 KiB
C++

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef nsHtml5Highlighter_h
#define nsHtml5Highlighter_h
#include "nsCOMPtr.h"
#include "nsHtml5TreeOperation.h"
#include "nsHtml5UTF16Buffer.h"
#include "nsHtml5TreeOperation.h"
#include "nsAHtml5TreeOpSink.h"
#define NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH 512
/**
* A state machine for generating HTML for display in View Source based on
* the transitions the tokenizer makes on the source being viewed.
*/
class nsHtml5Highlighter {
public:
/**
* The constructor.
*
* @param aOpSink the sink for the tree ops generated by this highlighter
*/
explicit nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink);
/**
* The destructor.
*/
~nsHtml5Highlighter();
/**
* Set the op sink (for speculation).
*/
void SetOpSink(nsAHtml5TreeOpSink* aOpSink);
/**
* Reset state to after generated head but before processing any of the input
* stream.
*/
void Rewind();
/**
* Starts the generated document.
*/
void Start(const nsAutoString& aTitle);
/**
* Updates the charset source via the op queue.
*/
void UpdateCharsetSource(nsCharsetSource aCharsetSource);
/**
* Report a tokenizer state transition.
*
* @param aState the state being transitioned to
* @param aReconsume whether this is a reconsuming transition
* @param aPos the tokenizer's current position into the buffer
*/
int32_t Transition(int32_t aState, bool aReconsume, int32_t aPos);
/**
* Report end of file.
*/
void End();
/**
* Set the current buffer being tokenized
*/
void SetBuffer(nsHtml5UTF16Buffer* aBuffer);
/**
* Let go of the buffer being tokenized but first, flush text from it.
*
* @param aPos the first UTF-16 code unit not to flush
*/
void DropBuffer(int32_t aPos);
/**
* Flush the tree ops into the sink.
*
* @return true if there were ops to flush
*/
bool FlushOps();
/**
* Linkify the current attribute value if the attribute name is one of
* known URL attributes. (When executing tree ops, javascript: URLs will
* not be linkified, though.)
*
* @param aName the name of the attribute
* @param aValue the value of the attribute
*/
void MaybeLinkifyAttributeValue(nsHtml5AttributeName* aName,
nsHtml5String aValue);
/**
* Inform the highlighter that the tokenizer successfully completed a
* named character reference.
*/
void CompletedNamedCharacterReference();
/**
* Adds an error annotation to the node that's currently on top of
* mStack.
*
* @param aMsgId the id of the message in the property file
*/
void AddErrorToCurrentNode(const char* aMsgId);
/**
* Adds an error annotation to the node that corresponds to the most
* recently opened markup declaration/tag span, character reference or
* run of text.
*
* @param aMsgId the id of the message in the property file
*/
void AddErrorToCurrentRun(const char* aMsgId);
/**
* Adds an error annotation to the node that corresponds to the most
* recently opened markup declaration/tag span, character reference or
* run of text with one atom to use when formatting the message.
*
* @param aMsgId the id of the message in the property file
* @param aName the atom
*/
void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName);
/**
* Adds an error annotation to the node that corresponds to the most
* recently opened markup declaration/tag span, character reference or
* run of text with two atoms to use when formatting the message.
*
* @param aMsgId the id of the message in the property file
* @param aName the first atom
* @param aOther the second atom
*/
void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName, nsAtom* aOther);
/**
* Adds an error annotation to the node that corresponds to the most
* recent potentially character reference-starting ampersand.
*
* @param aMsgId the id of the message in the property file
*/
void AddErrorToCurrentAmpersand(const char* aMsgId);
/**
* Adds an error annotation to the node that corresponds to the most
* recent potentially self-closing slash.
*
* @param aMsgId the id of the message in the property file
*/
void AddErrorToCurrentSlash(const char* aMsgId);
/**
* Enqueues a tree op for adding base to the urls with the view-source:
*
* @param aValue the base URL to add
*/
void AddBase(nsHtml5String aValue);
/**
* Starts a wrapper around a run of characters.
*/
void StartCharacters();
private:
/**
* Starts a span with no class.
*/
void StartSpan();
/**
* Starts a <span> and sets the class attribute on it.
*
* @param aClass the class to set (MUST be a static string that does not
* need to be released!)
*/
void StartSpan(const char16_t* aClass);
/**
* End the current <span> or <a> in the highlighter output.
*/
void EndSpanOrA();
/**
* Ends a wrapper around a run of characters.
*/
void EndCharactersAndStartMarkupRun();
/**
* Starts an <a>.
*/
void StartA();
/**
* Flushes characters up to but not including the current one.
*/
void FlushChars();
/**
* Flushes characters up to and including the current one.
*/
void FlushCurrent();
/**
* Finishes highlighting a tag in the input data by closing the open
* <span> and <a> elements in the highlighter output and then starts
* another <span> for potentially highlighting characters potentially
* appearing next.
*/
void FinishTag();
/**
* Adds a class attribute to the current node.
*
* @param aClass the class to set (MUST be a static string that does not
* need to be released!)
*/
void AddClass(const char16_t* aClass);
/**
* Allocates a handle for an element.
*
* See the documentation for nsHtml5TreeBuilder::AllocateContentHandle()
* in nsHtml5TreeBuilderHSupplement.h.
*
* @return the handle
*/
nsIContent** AllocateContentHandle();
/**
* Enqueues an element creation tree operation.
*
* @param aName the name of the element
* @param aAttributes the attribute holder (ownership will be taken) or
* nullptr for no attributes
* @param aIntendedParent the intended parent node for the created element
* @param aCreator the content creator function
* @return the handle for the element that will be created
*/
nsIContent** CreateElement(nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
nsIContent** aIntendedParent,
mozilla::dom::HTMLContentCreatorFunction aCreator);
/**
* Gets the handle for the current node. May be called only after the
* root element has been set.
*
* @return the handle for the current node
*/
nsIContent** CurrentNode();
/**
* Create an element and push it (its handle) on the stack.
*
* @param aName the name of the element
* @param aAttributes the attribute holder (ownership will be taken) or
* nullptr for no attributes
* @param aCreator the content creator function
*/
void Push(nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
mozilla::dom::HTMLContentCreatorFunction aCreator);
/**
* Pops the current node off the stack.
*/
void Pop();
/**
* Appends text content to the current node.
*
* @param aBuffer the buffer to copy from
* @param aStart the index of the first code unit to copy
* @param aLength the number of code units to copy
*/
void AppendCharacters(const char16_t* aBuffer, int32_t aStart,
int32_t aLength);
/**
* Enqueues a tree op for adding an href attribute with the view-source:
* URL scheme to the current node.
*
* @param aValue the (potentially relative) URL to link to
*/
void AddViewSourceHref(nsHtml5String aValue);
/**
* The state we are transitioning away from.
*/
int32_t mState;
/**
* The index of the first UTF-16 code unit in mBuffer that hasn't been
* flushed yet.
*/
int32_t mCStart;
/**
* The position of the code unit in mBuffer that caused the current
* transition.
*/
int32_t mPos;
/**
* The current line number.
*/
int32_t mLineNumber;
/**
* The number of inline elements open inside the <pre> excluding the
* span potentially wrapping a run of characters.
*/
int32_t mInlinesOpen;
/**
* Whether there's a span wrapping a run of characters (excluding CDATA
* section) open.
*/
bool mInCharacters;
/**
* The current buffer being tokenized.
*/
nsHtml5UTF16Buffer* mBuffer;
/**
* The outgoing tree op queue.
*/
nsTArray<nsHtml5TreeOperation> mOpQueue;
/**
* The tree op stage for the tree op executor or a speculation when looking
* for meta charset.
*
* The op sink is owned by the nsHtml5TreeOpExecutor, which outlives this
* object, because this object is owned by the nsHtml5Tokenizer instance that
* is owned by the nsHtml5StreamParser, which keeps the executor alive via
* nsHtml5Streamparser::mExecutorFlusher.
*/
nsAHtml5TreeOpSink* mOpSink;
/**
* The most recently opened markup declaration/tag or run of characters.
*/
nsIContent** mCurrentRun;
/**
* The most recent ampersand in a place where character references were
* allowed.
*/
nsIContent** mAmpersand;
/**
* The most recent slash that might become a self-closing slash.
*/
nsIContent** mSlash;
/**
* Memory for element handles.
*/
mozilla::UniquePtr<nsIContent*[]> mHandles;
/**
* Number of handles used in mHandles
*/
int32_t mHandlesUsed;
/**
* A holder for old contents of mHandles
*/
nsTArray<mozilla::UniquePtr<nsIContent*[]>> mOldHandles;
/**
* The element stack.
*/
nsTArray<nsIContent**> mStack;
/**
* The string "comment"
*/
static char16_t sComment[];
/**
* The string "cdata"
*/
static char16_t sCdata[];
/**
* The string "start-tag"
*/
static char16_t sStartTag[];
/**
* The string "attribute-name"
*/
static char16_t sAttributeName[];
/**
* The string "attribute-value"
*/
static char16_t sAttributeValue[];
/**
* The string "end-tag"
*/
static char16_t sEndTag[];
/**
* The string "doctype"
*/
static char16_t sDoctype[];
/**
* The string "entity"
*/
static char16_t sEntity[];
/**
* The string "pi"
*/
static char16_t sPi[];
/**
* Whether base is already visited once.
*/
bool mSeenBase;
};
#endif // nsHtml5Highlighter_h