mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-24 13:21:05 +00:00
97da4f66b8
Differential Revision: https://phabricator.services.mozilla.com/D133996
434 lines
11 KiB
C++
434 lines
11 KiB
C++
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
#ifndef nsHtml5Highlighter_h
|
|
#define nsHtml5Highlighter_h
|
|
|
|
#include "nsCOMPtr.h"
|
|
#include "nsHtml5TreeOperation.h"
|
|
#include "nsHtml5UTF16Buffer.h"
|
|
#include "nsHtml5TreeOperation.h"
|
|
#include "nsAHtml5TreeOpSink.h"
|
|
|
|
#define NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH 512
|
|
|
|
/**
|
|
* A state machine for generating HTML for display in View Source based on
|
|
* the transitions the tokenizer makes on the source being viewed.
|
|
*/
|
|
class nsHtml5Highlighter {
|
|
public:
|
|
/**
|
|
* The constructor.
|
|
*
|
|
* @param aOpSink the sink for the tree ops generated by this highlighter
|
|
*/
|
|
explicit nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink);
|
|
|
|
/**
|
|
* The destructor.
|
|
*/
|
|
~nsHtml5Highlighter();
|
|
|
|
/**
|
|
* Set the op sink (for speculation).
|
|
*/
|
|
void SetOpSink(nsAHtml5TreeOpSink* aOpSink);
|
|
|
|
/**
|
|
* Reset state to after generated head but before processing any of the input
|
|
* stream.
|
|
*/
|
|
void Rewind();
|
|
|
|
/**
|
|
* Starts the generated document.
|
|
*/
|
|
void Start(const nsAutoString& aTitle);
|
|
|
|
/**
|
|
* Updates the charset source via the op queue.
|
|
*/
|
|
void UpdateCharsetSource(nsCharsetSource aCharsetSource);
|
|
|
|
/**
|
|
* Report a tokenizer state transition.
|
|
*
|
|
* @param aState the state being transitioned to
|
|
* @param aReconsume whether this is a reconsuming transition
|
|
* @param aPos the tokenizer's current position into the buffer
|
|
*/
|
|
int32_t Transition(int32_t aState, bool aReconsume, int32_t aPos);
|
|
|
|
/**
|
|
* Report end of file.
|
|
*/
|
|
void End();
|
|
|
|
/**
|
|
* Set the current buffer being tokenized
|
|
*/
|
|
void SetBuffer(nsHtml5UTF16Buffer* aBuffer);
|
|
|
|
/**
|
|
* Let go of the buffer being tokenized but first, flush text from it.
|
|
*
|
|
* @param aPos the first UTF-16 code unit not to flush
|
|
*/
|
|
void DropBuffer(int32_t aPos);
|
|
|
|
/**
|
|
* Flush the tree ops into the sink.
|
|
*
|
|
* @return true if there were ops to flush
|
|
*/
|
|
bool FlushOps();
|
|
|
|
/**
|
|
* Linkify the current attribute value if the attribute name is one of
|
|
* known URL attributes. (When executing tree ops, javascript: URLs will
|
|
* not be linkified, though.)
|
|
*
|
|
* @param aName the name of the attribute
|
|
* @param aValue the value of the attribute
|
|
*/
|
|
void MaybeLinkifyAttributeValue(nsHtml5AttributeName* aName,
|
|
nsHtml5String aValue);
|
|
|
|
/**
|
|
* Inform the highlighter that the tokenizer successfully completed a
|
|
* named character reference.
|
|
*/
|
|
void CompletedNamedCharacterReference();
|
|
|
|
/**
|
|
* Adds an error annotation to the node that's currently on top of
|
|
* mStack.
|
|
*
|
|
* @param aMsgId the id of the message in the property file
|
|
*/
|
|
void AddErrorToCurrentNode(const char* aMsgId);
|
|
|
|
/**
|
|
* Adds an error annotation to the node that corresponds to the most
|
|
* recently opened markup declaration/tag span, character reference or
|
|
* run of text.
|
|
*
|
|
* @param aMsgId the id of the message in the property file
|
|
*/
|
|
void AddErrorToCurrentRun(const char* aMsgId);
|
|
|
|
/**
|
|
* Adds an error annotation to the node that corresponds to the most
|
|
* recently opened markup declaration/tag span, character reference or
|
|
* run of text with one atom to use when formatting the message.
|
|
*
|
|
* @param aMsgId the id of the message in the property file
|
|
* @param aName the atom
|
|
*/
|
|
void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName);
|
|
|
|
/**
|
|
* Adds an error annotation to the node that corresponds to the most
|
|
* recently opened markup declaration/tag span, character reference or
|
|
* run of text with two atoms to use when formatting the message.
|
|
*
|
|
* @param aMsgId the id of the message in the property file
|
|
* @param aName the first atom
|
|
* @param aOther the second atom
|
|
*/
|
|
void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName, nsAtom* aOther);
|
|
|
|
/**
|
|
* Adds an error annotation to the node that corresponds to the most
|
|
* recent potentially character reference-starting ampersand.
|
|
*
|
|
* @param aMsgId the id of the message in the property file
|
|
*/
|
|
void AddErrorToCurrentAmpersand(const char* aMsgId);
|
|
|
|
/**
|
|
* Adds an error annotation to the node that corresponds to the most
|
|
* recent potentially self-closing slash.
|
|
*
|
|
* @param aMsgId the id of the message in the property file
|
|
*/
|
|
void AddErrorToCurrentSlash(const char* aMsgId);
|
|
|
|
/**
|
|
* Enqueues a tree op for adding base to the urls with the view-source:
|
|
*
|
|
* @param aValue the base URL to add
|
|
*/
|
|
void AddBase(nsHtml5String aValue);
|
|
|
|
/**
|
|
* Starts a wrapper around a run of characters.
|
|
*/
|
|
void StartCharacters();
|
|
|
|
private:
|
|
/**
|
|
* Starts a span with no class.
|
|
*/
|
|
void StartSpan();
|
|
|
|
/**
|
|
* Starts a <span> and sets the class attribute on it.
|
|
*
|
|
* @param aClass the class to set (MUST be a static string that does not
|
|
* need to be released!)
|
|
*/
|
|
void StartSpan(const char16_t* aClass);
|
|
|
|
/**
|
|
* End the current <span> or <a> in the highlighter output.
|
|
*/
|
|
void EndSpanOrA();
|
|
|
|
/**
|
|
* Ends a wrapper around a run of characters.
|
|
*/
|
|
void EndCharactersAndStartMarkupRun();
|
|
|
|
/**
|
|
* Starts an <a>.
|
|
*/
|
|
void StartA();
|
|
|
|
/**
|
|
* Flushes characters up to but not including the current one.
|
|
*/
|
|
void FlushChars();
|
|
|
|
/**
|
|
* Flushes characters up to and including the current one.
|
|
*/
|
|
void FlushCurrent();
|
|
|
|
/**
|
|
* Finishes highlighting a tag in the input data by closing the open
|
|
* <span> and <a> elements in the highlighter output and then starts
|
|
* another <span> for potentially highlighting characters potentially
|
|
* appearing next.
|
|
*/
|
|
void FinishTag();
|
|
|
|
/**
|
|
* Adds a class attribute to the current node.
|
|
*
|
|
* @param aClass the class to set (MUST be a static string that does not
|
|
* need to be released!)
|
|
*/
|
|
void AddClass(const char16_t* aClass);
|
|
|
|
/**
|
|
* Allocates a handle for an element.
|
|
*
|
|
* See the documentation for nsHtml5TreeBuilder::AllocateContentHandle()
|
|
* in nsHtml5TreeBuilderHSupplement.h.
|
|
*
|
|
* @return the handle
|
|
*/
|
|
nsIContent** AllocateContentHandle();
|
|
|
|
/**
|
|
* Enqueues an element creation tree operation.
|
|
*
|
|
* @param aName the name of the element
|
|
* @param aAttributes the attribute holder (ownership will be taken) or
|
|
* nullptr for no attributes
|
|
* @param aIntendedParent the intended parent node for the created element
|
|
* @param aCreator the content creator function
|
|
* @return the handle for the element that will be created
|
|
*/
|
|
nsIContent** CreateElement(nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
|
|
nsIContent** aIntendedParent,
|
|
mozilla::dom::HTMLContentCreatorFunction aCreator);
|
|
|
|
/**
|
|
* Gets the handle for the current node. May be called only after the
|
|
* root element has been set.
|
|
*
|
|
* @return the handle for the current node
|
|
*/
|
|
nsIContent** CurrentNode();
|
|
|
|
/**
|
|
* Create an element and push it (its handle) on the stack.
|
|
*
|
|
* @param aName the name of the element
|
|
* @param aAttributes the attribute holder (ownership will be taken) or
|
|
* nullptr for no attributes
|
|
* @param aCreator the content creator function
|
|
*/
|
|
void Push(nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
|
|
mozilla::dom::HTMLContentCreatorFunction aCreator);
|
|
|
|
/**
|
|
* Pops the current node off the stack.
|
|
*/
|
|
void Pop();
|
|
|
|
/**
|
|
* Appends text content to the current node.
|
|
*
|
|
* @param aBuffer the buffer to copy from
|
|
* @param aStart the index of the first code unit to copy
|
|
* @param aLength the number of code units to copy
|
|
*/
|
|
void AppendCharacters(const char16_t* aBuffer, int32_t aStart,
|
|
int32_t aLength);
|
|
|
|
/**
|
|
* Enqueues a tree op for adding an href attribute with the view-source:
|
|
* URL scheme to the current node.
|
|
*
|
|
* @param aValue the (potentially relative) URL to link to
|
|
*/
|
|
void AddViewSourceHref(nsHtml5String aValue);
|
|
|
|
/**
|
|
* The state we are transitioning away from.
|
|
*/
|
|
int32_t mState;
|
|
|
|
/**
|
|
* The index of the first UTF-16 code unit in mBuffer that hasn't been
|
|
* flushed yet.
|
|
*/
|
|
int32_t mCStart;
|
|
|
|
/**
|
|
* The position of the code unit in mBuffer that caused the current
|
|
* transition.
|
|
*/
|
|
int32_t mPos;
|
|
|
|
/**
|
|
* The current line number.
|
|
*/
|
|
int32_t mLineNumber;
|
|
|
|
/**
|
|
* The number of inline elements open inside the <pre> excluding the
|
|
* span potentially wrapping a run of characters.
|
|
*/
|
|
int32_t mInlinesOpen;
|
|
|
|
/**
|
|
* Whether there's a span wrapping a run of characters (excluding CDATA
|
|
* section) open.
|
|
*/
|
|
bool mInCharacters;
|
|
|
|
/**
|
|
* The current buffer being tokenized.
|
|
*/
|
|
nsHtml5UTF16Buffer* mBuffer;
|
|
|
|
/**
|
|
* The outgoing tree op queue.
|
|
*/
|
|
nsTArray<nsHtml5TreeOperation> mOpQueue;
|
|
|
|
/**
|
|
* The tree op stage for the tree op executor or a speculation when looking
|
|
* for meta charset.
|
|
*
|
|
* The op sink is owned by the nsHtml5TreeOpExecutor, which outlives this
|
|
* object, because this object is owned by the nsHtml5Tokenizer instance that
|
|
* is owned by the nsHtml5StreamParser, which keeps the executor alive via
|
|
* nsHtml5Streamparser::mExecutorFlusher.
|
|
*/
|
|
nsAHtml5TreeOpSink* mOpSink;
|
|
|
|
/**
|
|
* The most recently opened markup declaration/tag or run of characters.
|
|
*/
|
|
nsIContent** mCurrentRun;
|
|
|
|
/**
|
|
* The most recent ampersand in a place where character references were
|
|
* allowed.
|
|
*/
|
|
nsIContent** mAmpersand;
|
|
|
|
/**
|
|
* The most recent slash that might become a self-closing slash.
|
|
*/
|
|
nsIContent** mSlash;
|
|
|
|
/**
|
|
* Memory for element handles.
|
|
*/
|
|
mozilla::UniquePtr<nsIContent*[]> mHandles;
|
|
|
|
/**
|
|
* Number of handles used in mHandles
|
|
*/
|
|
int32_t mHandlesUsed;
|
|
|
|
/**
|
|
* A holder for old contents of mHandles
|
|
*/
|
|
nsTArray<mozilla::UniquePtr<nsIContent*[]>> mOldHandles;
|
|
|
|
/**
|
|
* The element stack.
|
|
*/
|
|
nsTArray<nsIContent**> mStack;
|
|
|
|
/**
|
|
* The string "comment"
|
|
*/
|
|
static char16_t sComment[];
|
|
|
|
/**
|
|
* The string "cdata"
|
|
*/
|
|
static char16_t sCdata[];
|
|
|
|
/**
|
|
* The string "start-tag"
|
|
*/
|
|
static char16_t sStartTag[];
|
|
|
|
/**
|
|
* The string "attribute-name"
|
|
*/
|
|
static char16_t sAttributeName[];
|
|
|
|
/**
|
|
* The string "attribute-value"
|
|
*/
|
|
static char16_t sAttributeValue[];
|
|
|
|
/**
|
|
* The string "end-tag"
|
|
*/
|
|
static char16_t sEndTag[];
|
|
|
|
/**
|
|
* The string "doctype"
|
|
*/
|
|
static char16_t sDoctype[];
|
|
|
|
/**
|
|
* The string "entity"
|
|
*/
|
|
static char16_t sEntity[];
|
|
|
|
/**
|
|
* The string "pi"
|
|
*/
|
|
static char16_t sPi[];
|
|
|
|
/**
|
|
* Whether base is already visited once.
|
|
*/
|
|
bool mSeenBase;
|
|
};
|
|
|
|
#endif // nsHtml5Highlighter_h
|