Fix for bug 8703. We now display errors for incomplete XML files like "<b><c></c>". XML_Parse() was not being called with the isFinal parameter set to TRUE after the last chunk of data was passed to expat. Fixed.

This commit is contained in:
nisheeth%netscape.com 1999-07-15 08:23:37 +00:00
parent c4b09f11e6
commit 76b19c9c12
14 changed files with 310 additions and 226 deletions

View File

@ -115,7 +115,7 @@ NS_IMPL_ADDREF(nsExpatTokenizer)
NS_IMPL_RELEASE(nsExpatTokenizer)
/**
* Sets up the callbacks for the expat parser
* Sets up the callbacks for the expat parser
* @update nra 2/24/99
* @param none
* @return none
@ -145,7 +145,6 @@ void nsExpatTokenizer::SetupExpatCallbacks(void) {
nsExpatTokenizer::nsExpatTokenizer() : nsHTMLTokenizer() {
NS_INIT_REFCNT();
mBytesParsed = 0;
mSeenError = PR_FALSE;
nsAutoString buffer("UTF-16");
const PRUnichar* encoding = buffer.GetUnicode();
if (encoding) {
@ -168,7 +167,7 @@ nsExpatTokenizer::nsExpatTokenizer() : nsHTMLTokenizer() {
* @return
*/
nsExpatTokenizer::~nsExpatTokenizer(){
if (mExpatParser) {
if (mExpatParser) {
XML_ParserFree(mExpatParser);
mExpatParser = nsnull;
}
@ -179,20 +178,27 @@ nsExpatTokenizer::~nsExpatTokenizer(){
Here begins the real working methods for the tokenizer.
*******************************************************************/
void nsExpatTokenizer::SetErrorContextInfo(nsParserError* aError, PRUint32 aByteIndex,
const char* aSourceBuffer, PRUint32 aLength)
/*
* Parameters:
*
* aSourceBuffer (in): String buffer.
* aLength (in): Length of input buffer.
* aOffset (in): Offset in buffer
* aLine (out): Line on which the character, aSourceBuffer[aOffset], is located.
*/
void nsExpatTokenizer::GetLine(const char* aSourceBuffer, PRUint32 aLength,
PRUint32 aOffset, nsString& aLine)
{
/* Figure out the substring inside aSourceBuffer that contains the line on which the error
occurred. Copy the line into aError->sourceLine */
PR_ASSERT(aByteIndex > 0 && aByteIndex < aLength);
/* Figure out the line inside aSourceBuffer that contains character specified by aOffset.
Copy it into aLine. */
PR_ASSERT(aOffset > 0 && aOffset < aLength);
/* Assert that the byteIndex and the length of the buffer is even */
PR_ASSERT(aByteIndex % 2 == 0 && aLength % 2 == 0);
PRUnichar* start = (PRUnichar* ) &aSourceBuffer[aByteIndex]; /* Will try to find the start of the line */
PRUnichar* end = (PRUnichar* ) &aSourceBuffer[aByteIndex]; /* Will try to find the end of the line */
PRUint32 startIndex = aByteIndex / 2; /* Track the position of the 'start' pointer into the buffer */
PRUint32 endIndex = aByteIndex / 2; /* Track the position of the 'end' pointer into the buffer */
PRUint32 numCharsInBuffer = aLength / 2;
PR_ASSERT(aOffset % 2 == 0 && aLength % 2 == 0);
PRUnichar* start = (PRUnichar* ) &aSourceBuffer[aOffset]; /* Will try to find the start of the line */
PRUnichar* end = (PRUnichar* ) &aSourceBuffer[aOffset]; /* Will try to find the end of the line */
PRUint32 startIndex = aOffset / sizeof(PRUnichar); /* Track the position of the 'start' pointer into the buffer */
PRUint32 endIndex = aOffset / sizeof(PRUnichar); /* Track the position of the 'end' pointer into the buffer */
PRUint32 numCharsInBuffer = aLength / sizeof(PRUnichar);
PRBool reachedStart;
PRBool reachedEnd;
@ -214,12 +220,13 @@ void nsExpatTokenizer::SetErrorContextInfo(nsParserError* aError, PRUint32 aByte
}
}
aLine.Truncate(0);
if (startIndex == endIndex) {
/* Special case if the error is on a line where the only character is a newline */
aError->sourceLine.Append("");
aLine.Append("");
}
else {
PR_ASSERT(endIndex - startIndex >= 2);
PR_ASSERT(endIndex - startIndex >= sizeof(PRUnichar));
/* At this point, there are two cases. Either the error is on the first line or
on subsequent lines. If the error is on the first line, startIndex will decrement
all the way to zero. If not, startIndex will decrement to the position of the
@ -232,7 +239,7 @@ void nsExpatTokenizer::SetErrorContextInfo(nsParserError* aError, PRUint32 aByte
/* At this point, the substring starting at startPosn and ending at (endIndex - 1),
is the line on which the error occurred. Copy that substring into the error structure. */
const PRUnichar* unicodeBuffer = (const PRUnichar*) aSourceBuffer;
aError->sourceLine.Append(&unicodeBuffer[startPosn], endIndex - startPosn);
aLine.Append(&unicodeBuffer[startPosn], endIndex - startPosn);
}
}
@ -241,19 +248,25 @@ void nsExpatTokenizer::SetErrorContextInfo(nsParserError* aError, PRUint32 aByte
* an error token and pushes it onto the token queue.
*
*/
void nsExpatTokenizer::PushXMLErrorToken(const char *aBuffer, PRUint32 aLength)
void nsExpatTokenizer::PushXMLErrorToken(const char *aBuffer, PRUint32 aLength, PRBool aIsFinal)
{
CErrorToken* token= (CErrorToken *) gTokenRecycler->CreateTokenOfType(eToken_error, eHTMLTag_unknown);
nsParserError *error = new nsParserError;
PRUint32 byteIndexRelativeToFile = 0;
if(error){
error->code = XML_GetErrorCode(mExpatParser);
error->lineNumber = XML_GetCurrentLineNumber(mExpatParser);
error->colNumber = XML_GetCurrentColumnNumber(mExpatParser);
error->description = XML_ErrorString(error->code);
byteIndexRelativeToFile = XML_GetCurrentByteIndex(mExpatParser);
SetErrorContextInfo(error, (byteIndexRelativeToFile - mBytesParsed), aBuffer, aLength);
if (!aIsFinal) {
PRInt32 byteIndexRelativeToFile = 0;
byteIndexRelativeToFile = XML_GetCurrentByteIndex(mExpatParser);
GetLine(aBuffer, aLength, (byteIndexRelativeToFile - mBytesParsed), error->sourceLine);
}
else {
error->sourceLine.Append(mLastLine);
}
token->SetError(error);
CToken* theToken = (CToken* )token;
@ -261,14 +274,20 @@ void nsExpatTokenizer::PushXMLErrorToken(const char *aBuffer, PRUint32 aLength)
}
}
nsresult nsExpatTokenizer::ParseXMLBuffer(const char* aBuffer, PRUint32 aLength){
nsresult nsExpatTokenizer::ParseXMLBuffer(const char* aBuffer, PRUint32 aLength, PRBool aIsFinal)
{
nsresult result=NS_OK;
if (mExpatParser) {
if (!XML_Parse(mExpatParser, aBuffer, aLength, PR_FALSE)) {
PushXMLErrorToken(aBuffer, aLength);
PR_ASSERT((aBuffer && aLength) || (aBuffer == nsnull && aLength == 0));
if (mExpatParser) {
if (!XML_Parse(mExpatParser, aBuffer, aLength, aIsFinal)) {
PushXMLErrorToken(aBuffer, aLength, aIsFinal);
result=NS_ERROR_HTMLPARSER_STOPPARSING;
}
mBytesParsed += aLength;
else if (aBuffer && aLength) {
// Cache the last line in the buffer
GetLine(aBuffer, aLength, aLength - sizeof(PRUnichar), mLastLine);
}
mBytesParsed += aLength;
}
else {
result = NS_ERROR_FAILURE;
@ -296,23 +315,26 @@ nsresult nsExpatTokenizer::ConsumeToken(nsScanner& aScanner) {
// Ask the scanner to send us all the data it has
// scanned and pass that data to expat.
nsresult result = NS_OK;
nsString& theBuffer = aScanner.GetBuffer();
PRInt32 length = theBuffer.Length();
if(0 < length) {
const PRUnichar* expatBuffer = theBuffer.GetUnicode();
PRUint32 bufLength = theBuffer.Length() * 2;
if (expatBuffer) {
gTokenDeque=&mTokenDeque;
gExpatParser = mExpatParser;
result = ParseXMLBuffer((const char *)expatBuffer, bufLength);
}
theBuffer.Truncate(0);
}
nsString& theBuffer = aScanner.GetBuffer();
PRUint32 bufLength = theBuffer.Length() * sizeof(PRUnichar);
const PRUnichar* expatBuffer = (bufLength) ? theBuffer.GetUnicode() : nsnull;
gTokenDeque=&mTokenDeque;
gExpatParser = mExpatParser;
result = ParseXMLBuffer((const char *)expatBuffer, bufLength);
theBuffer.Truncate(0);
if(NS_OK==result)
result=aScanner.Eof();
return result;
}
nsresult nsExpatTokenizer::DidTokenize(PRBool aIsFinalChunk)
{
return ParseXMLBuffer(nsnull, 0, aIsFinalChunk);
}
/**
*

View File

@ -53,70 +53,73 @@ public:
NS_DECL_ISUPPORTS
/* nsITokenizer methods */
virtual nsresult ConsumeToken(nsScanner& aScanner);
/* nsITokenizer methods */
virtual nsresult ConsumeToken(nsScanner& aScanner);
virtual nsresult DidTokenize(PRBool aIsFinalChunk);
virtual void FrontloadMisplacedContent(nsDeque& aDeque);
protected:
/**
* Parse an XML buffer using expat
* @update nra 2/29/99
* @return NS_ERROR_FAILURE if expat encounters an error, else NS_OK
*/
nsresult ParseXMLBuffer(const char *aBuffer, PRUint32 aLength);
/**
* Parse an XML buffer using expat
* @update nra 2/29/99
* @return NS_ERROR_FAILURE if expat encounters an error, else NS_OK
*/
nsresult ParseXMLBuffer(const char *aBuffer, PRUint32 aLength, PRBool aIsFinal=PR_FALSE);
/**
* Sets up the callbacks for the expat parser
* @update nra 2/24/99
* @param none
* @return none
*/
void SetupExpatCallbacks(void);
/**
* Sets up the callbacks for the expat parser
* @update nra 2/24/99
* @param none
* @return none
*/
void SetupExpatCallbacks(void);
void PushXMLErrorToken(const char *aBuffer, PRUint32 aLength);
void SetErrorContextInfo(nsParserError* aError, PRUint32 aByteIndex,
const char* aSourceBuffer, PRUint32 aLength);
// Propagate XML errors to the content sink
void PushXMLErrorToken(const char *aBuffer, PRUint32 aLength, PRBool aIsFinal);
void GetLine(const char* aSourceBuffer, PRUint32 aLength,
PRUint32 aByteIndex, nsString& aLine);
/* The callback handlers that get called from the expat parser */
static void HandleStartElement(void *userData, const XML_Char *name, const XML_Char **atts);
static void HandleEndElement(void *userData, const XML_Char *name);
static void HandleCharacterData(void *userData, const XML_Char *s, int len);
static void HandleComment(void *userData, const XML_Char *name);
static void HandleProcessingInstruction(void *userData,
const XML_Char *target,
const XML_Char *data);
static void HandleDefault(void *userData, const XML_Char *s, int len);
static void HandleUnparsedEntityDecl(void *userData,
const XML_Char *entityName,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId,
const XML_Char *notationName);
static void HandleNotationDecl(void *userData,
const XML_Char *notationName,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId);
static int HandleExternalEntityRef(XML_Parser parser,
const XML_Char *openEntityNames,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId);
static int HandleUnknownEncoding(void *encodingHandlerData,
const XML_Char *name,
XML_Encoding *info);
// Load up an external stream to get external entity information
static nsresult OpenInputStream(nsString2& aURLStr,
nsIInputStream*& in);
static nsresult LoadStream(nsIInputStream* in,
PRUnichar* &uniBuf, PRUint32 &retLen);
static nsresult OpenInputStream(nsString2& aURLStr,
nsIInputStream*& in);
static nsresult LoadStream(nsIInputStream* in,
PRUnichar* &uniBuf, PRUint32 &retLen);
/* The callback handlers that get called from the expat parser */
static void HandleStartElement(void *userData, const XML_Char *name, const XML_Char **atts);
static void HandleEndElement(void *userData, const XML_Char *name);
static void HandleCharacterData(void *userData, const XML_Char *s, int len);
static void HandleComment(void *userData, const XML_Char *name);
static void HandleProcessingInstruction(void *userData,
const XML_Char *target,
const XML_Char *data);
static void HandleDefault(void *userData, const XML_Char *s, int len);
static void HandleUnparsedEntityDecl(void *userData,
const XML_Char *entityName,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId,
const XML_Char *notationName);
static void HandleNotationDecl(void *userData,
const XML_Char *notationName,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId);
static int HandleExternalEntityRef(XML_Parser parser,
const XML_Char *openEntityNames,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId);
static int HandleUnknownEncoding(void *encodingHandlerData,
const XML_Char *name,
XML_Encoding *info);
XML_Parser mExpatParser;
XML_Parser mExpatParser;
PRUint32 mBytesParsed;
PRBool mSeenError;
nsString mLastLine;
};
extern NS_HTMLPARS nsresult NS_Expat_Tokenizer(nsIDTD** aInstancePtrResult);

View File

@ -215,6 +215,10 @@ CToken* nsHTMLTokenizer::GetTokenAt(PRInt32 anIndex){
return (CToken*)mTokenDeque.ObjectAt(anIndex);
}
nsresult nsHTMLTokenizer::WillTokenize(PRBool aIsFinalChunk)
{
return NS_OK;
}
/**
*
@ -236,6 +240,10 @@ void nsHTMLTokenizer::PrependTokens(nsDeque& aDeque){
}
nsresult nsHTMLTokenizer::DidTokenize(PRBool aIsFinalChunk)
{
return NS_OK;
}
/**
* This method repeatedly called by the tokenizer.

View File

@ -55,7 +55,9 @@ public:
NS_DECL_ISUPPORTS
virtual nsresult WillTokenize(PRBool aIsFinalChunk);
virtual nsresult ConsumeToken(nsScanner& aScanner);
virtual nsresult DidTokenize(PRBool aIsFinalChunk);
virtual nsITokenRecycler* GetTokenRecycler(void);
virtual CToken* PushTokenFront(CToken* theToken);

View File

@ -63,7 +63,9 @@ public:
class nsITokenizer : public nsISupports {
public:
virtual nsresult WillTokenize(PRBool aIsFinalChunk)=0;
virtual nsresult ConsumeToken(nsScanner& aScanner)=0;
virtual nsresult DidTokenize(PRBool aIsFinalChunk)=0;
virtual nsITokenRecycler* GetTokenRecycler(void)=0;
virtual CToken* PushTokenFront(CToken* aToken)=0;
@ -74,6 +76,7 @@ public:
virtual CToken* GetTokenAt(PRInt32 anIndex)=0;
virtual void PrependTokens(nsDeque& aDeque)=0;
};

View File

@ -867,16 +867,15 @@ nsresult nsParser::ParseFragment(const nsString& aSourceBuffer,void* aKey,nsITag
* @param
* @return error code -- 0 if ok, non-zero if error.
*/
nsresult nsParser::ResumeParse(nsIDTD* aDefaultDTD) {
nsresult nsParser::ResumeParse(nsIDTD* aDefaultDTD, PRBool aIsFinalChunk) {
nsresult result=NS_OK;
if(mParserContext->mParserEnabled && !mParserContext->mParserTerminated) {
result=WillBuildModel(mParserContext->mScanner->GetFilename(),aDefaultDTD);
if(mParserContext->mDTD) {
mParserContext->mDTD->WillResumeParse();
if(NS_OK==result) {
result=Tokenize();
if(NS_OK==result) {
result=Tokenize(aIsFinalChunk);
result=BuildModel();
if((!mParserContext->mMultipart) || (mParserContext->mParserTerminated) ||
@ -1176,7 +1175,7 @@ nsresult nsParser::OnStopRequest(nsIURI* aURL, nsresult status, const PRUnichar*
if(mParserFilter)
mParserFilter->Finish();
nsresult result=ResumeParse();
nsresult result=ResumeParse(nsnull, PR_TRUE);
// If the parser isn't enabled, we don't finish parsing till
// it is reenabled.
@ -1216,9 +1215,13 @@ nsresult nsParser::OnStopRequest(nsIURI* aURL, nsresult status, const PRUnichar*
* @param
* @return TRUE if it's ok to proceed
*/
PRBool nsParser::WillTokenize(){
PRBool result=PR_TRUE;
return result;
PRBool nsParser::WillTokenize(PRBool aIsFinalChunk){
nsresult rv = NS_OK;
nsITokenizer* theTokenizer=mParserContext->mDTD->GetTokenizer();
if (theTokenizer) {
rv = theTokenizer->WillTokenize(aIsFinalChunk);
}
return rv;
}
@ -1230,7 +1233,7 @@ PRBool nsParser::WillTokenize(){
* @update gess 01/04/99
* @return error code -- 0 if ok, non-zero if error.
*/
nsresult nsParser::Tokenize(){
nsresult nsParser::Tokenize(PRBool aIsFinalChunk){
nsresult result=NS_OK;
@ -1238,7 +1241,7 @@ nsresult nsParser::Tokenize(){
nsITokenizer* theTokenizer=mParserContext->mDTD->GetTokenizer();
if(theTokenizer){
WillTokenize();
WillTokenize(aIsFinalChunk);
while(NS_SUCCEEDED(result)) {
mParserContext->mScanner->Mark();
++mMinorIteration;
@ -1253,7 +1256,7 @@ nsresult nsParser::Tokenize(){
mParserContext->mParserTerminated=PR_TRUE;
}
}
DidTokenize();
DidTokenize(aIsFinalChunk);
}
else{
result=mInternalState=NS_ERROR_HTMLPARSER_BADTOKENIZER;
@ -1270,21 +1273,22 @@ nsresult nsParser::Tokenize(){
* @param
* @return TRUE if all went well
*/
PRBool nsParser::DidTokenize(){
PRBool nsParser::DidTokenize(PRBool aIsFinalChunk){
PRBool result=PR_TRUE;
if(mTokenObserver) {
nsITokenizer* theTokenizer=mParserContext->mDTD->GetTokenizer();
if(theTokenizer) {
nsITokenizer* theTokenizer=mParserContext->mDTD->GetTokenizer();
if (theTokenizer) {
result = theTokenizer->DidTokenize(aIsFinalChunk);
if(mTokenObserver) {
PRInt32 theCount=theTokenizer->GetCount();
PRInt32 theIndex;
for(theIndex=0;theIndex<theCount;theIndex++){
if((*mTokenObserver)(theTokenizer->GetTokenAt(theIndex))){
//add code here to pull unwanted tokens out of the stack...
}
}//for
}//for
}//if
}//if
}
return result;
}

View File

@ -232,7 +232,7 @@ friend class CTokenHandler;
* @update gess5/11/98
* @return TRUE if all went well, otherwise FALSE
*/
virtual nsresult ResumeParse(nsIDTD* mDefaultDTD=0);
virtual nsresult ResumeParse(nsIDTD* mDefaultDTD=0, PRBool aIsFinalChunk=PR_FALSE);
void DebugDumpSource(ostream& anOutput);
@ -319,7 +319,7 @@ private:
* @param
* @return TRUE if it's ok to proceed
*/
PRBool WillTokenize();
PRBool WillTokenize(PRBool aIsFinalChunk = PR_FALSE);
/**
@ -330,7 +330,7 @@ private:
* @update gess 3/25/98
* @return error code
*/
nsresult Tokenize();
nsresult Tokenize(PRBool aIsFinalChunk = PR_FALSE);
/**
* This is the tail-end of the code sandwich for the
@ -341,7 +341,7 @@ private:
* @param
* @return TRUE if all went well
*/
PRBool DidTokenize();
PRBool DidTokenize(PRBool aIsFinalChunk = PR_FALSE);
protected:

View File

@ -115,7 +115,7 @@ NS_IMPL_ADDREF(nsExpatTokenizer)
NS_IMPL_RELEASE(nsExpatTokenizer)
/**
* Sets up the callbacks for the expat parser
* Sets up the callbacks for the expat parser
* @update nra 2/24/99
* @param none
* @return none
@ -145,7 +145,6 @@ void nsExpatTokenizer::SetupExpatCallbacks(void) {
nsExpatTokenizer::nsExpatTokenizer() : nsHTMLTokenizer() {
NS_INIT_REFCNT();
mBytesParsed = 0;
mSeenError = PR_FALSE;
nsAutoString buffer("UTF-16");
const PRUnichar* encoding = buffer.GetUnicode();
if (encoding) {
@ -168,7 +167,7 @@ nsExpatTokenizer::nsExpatTokenizer() : nsHTMLTokenizer() {
* @return
*/
nsExpatTokenizer::~nsExpatTokenizer(){
if (mExpatParser) {
if (mExpatParser) {
XML_ParserFree(mExpatParser);
mExpatParser = nsnull;
}
@ -179,20 +178,27 @@ nsExpatTokenizer::~nsExpatTokenizer(){
Here begins the real working methods for the tokenizer.
*******************************************************************/
void nsExpatTokenizer::SetErrorContextInfo(nsParserError* aError, PRUint32 aByteIndex,
const char* aSourceBuffer, PRUint32 aLength)
/*
* Parameters:
*
* aSourceBuffer (in): String buffer.
* aLength (in): Length of input buffer.
* aOffset (in): Offset in buffer
* aLine (out): Line on which the character, aSourceBuffer[aOffset], is located.
*/
void nsExpatTokenizer::GetLine(const char* aSourceBuffer, PRUint32 aLength,
PRUint32 aOffset, nsString& aLine)
{
/* Figure out the substring inside aSourceBuffer that contains the line on which the error
occurred. Copy the line into aError->sourceLine */
PR_ASSERT(aByteIndex > 0 && aByteIndex < aLength);
/* Figure out the line inside aSourceBuffer that contains character specified by aOffset.
Copy it into aLine. */
PR_ASSERT(aOffset > 0 && aOffset < aLength);
/* Assert that the byteIndex and the length of the buffer is even */
PR_ASSERT(aByteIndex % 2 == 0 && aLength % 2 == 0);
PRUnichar* start = (PRUnichar* ) &aSourceBuffer[aByteIndex]; /* Will try to find the start of the line */
PRUnichar* end = (PRUnichar* ) &aSourceBuffer[aByteIndex]; /* Will try to find the end of the line */
PRUint32 startIndex = aByteIndex / 2; /* Track the position of the 'start' pointer into the buffer */
PRUint32 endIndex = aByteIndex / 2; /* Track the position of the 'end' pointer into the buffer */
PRUint32 numCharsInBuffer = aLength / 2;
PR_ASSERT(aOffset % 2 == 0 && aLength % 2 == 0);
PRUnichar* start = (PRUnichar* ) &aSourceBuffer[aOffset]; /* Will try to find the start of the line */
PRUnichar* end = (PRUnichar* ) &aSourceBuffer[aOffset]; /* Will try to find the end of the line */
PRUint32 startIndex = aOffset / sizeof(PRUnichar); /* Track the position of the 'start' pointer into the buffer */
PRUint32 endIndex = aOffset / sizeof(PRUnichar); /* Track the position of the 'end' pointer into the buffer */
PRUint32 numCharsInBuffer = aLength / sizeof(PRUnichar);
PRBool reachedStart;
PRBool reachedEnd;
@ -214,12 +220,13 @@ void nsExpatTokenizer::SetErrorContextInfo(nsParserError* aError, PRUint32 aByte
}
}
aLine.Truncate(0);
if (startIndex == endIndex) {
/* Special case if the error is on a line where the only character is a newline */
aError->sourceLine.Append("");
aLine.Append("");
}
else {
PR_ASSERT(endIndex - startIndex >= 2);
PR_ASSERT(endIndex - startIndex >= sizeof(PRUnichar));
/* At this point, there are two cases. Either the error is on the first line or
on subsequent lines. If the error is on the first line, startIndex will decrement
all the way to zero. If not, startIndex will decrement to the position of the
@ -232,7 +239,7 @@ void nsExpatTokenizer::SetErrorContextInfo(nsParserError* aError, PRUint32 aByte
/* At this point, the substring starting at startPosn and ending at (endIndex - 1),
is the line on which the error occurred. Copy that substring into the error structure. */
const PRUnichar* unicodeBuffer = (const PRUnichar*) aSourceBuffer;
aError->sourceLine.Append(&unicodeBuffer[startPosn], endIndex - startPosn);
aLine.Append(&unicodeBuffer[startPosn], endIndex - startPosn);
}
}
@ -241,19 +248,25 @@ void nsExpatTokenizer::SetErrorContextInfo(nsParserError* aError, PRUint32 aByte
* an error token and pushes it onto the token queue.
*
*/
void nsExpatTokenizer::PushXMLErrorToken(const char *aBuffer, PRUint32 aLength)
void nsExpatTokenizer::PushXMLErrorToken(const char *aBuffer, PRUint32 aLength, PRBool aIsFinal)
{
CErrorToken* token= (CErrorToken *) gTokenRecycler->CreateTokenOfType(eToken_error, eHTMLTag_unknown);
nsParserError *error = new nsParserError;
PRUint32 byteIndexRelativeToFile = 0;
if(error){
error->code = XML_GetErrorCode(mExpatParser);
error->lineNumber = XML_GetCurrentLineNumber(mExpatParser);
error->colNumber = XML_GetCurrentColumnNumber(mExpatParser);
error->description = XML_ErrorString(error->code);
byteIndexRelativeToFile = XML_GetCurrentByteIndex(mExpatParser);
SetErrorContextInfo(error, (byteIndexRelativeToFile - mBytesParsed), aBuffer, aLength);
if (!aIsFinal) {
PRInt32 byteIndexRelativeToFile = 0;
byteIndexRelativeToFile = XML_GetCurrentByteIndex(mExpatParser);
GetLine(aBuffer, aLength, (byteIndexRelativeToFile - mBytesParsed), error->sourceLine);
}
else {
error->sourceLine.Append(mLastLine);
}
token->SetError(error);
CToken* theToken = (CToken* )token;
@ -261,14 +274,20 @@ void nsExpatTokenizer::PushXMLErrorToken(const char *aBuffer, PRUint32 aLength)
}
}
nsresult nsExpatTokenizer::ParseXMLBuffer(const char* aBuffer, PRUint32 aLength){
nsresult nsExpatTokenizer::ParseXMLBuffer(const char* aBuffer, PRUint32 aLength, PRBool aIsFinal)
{
nsresult result=NS_OK;
if (mExpatParser) {
if (!XML_Parse(mExpatParser, aBuffer, aLength, PR_FALSE)) {
PushXMLErrorToken(aBuffer, aLength);
PR_ASSERT((aBuffer && aLength) || (aBuffer == nsnull && aLength == 0));
if (mExpatParser) {
if (!XML_Parse(mExpatParser, aBuffer, aLength, aIsFinal)) {
PushXMLErrorToken(aBuffer, aLength, aIsFinal);
result=NS_ERROR_HTMLPARSER_STOPPARSING;
}
mBytesParsed += aLength;
else if (aBuffer && aLength) {
// Cache the last line in the buffer
GetLine(aBuffer, aLength, aLength - sizeof(PRUnichar), mLastLine);
}
mBytesParsed += aLength;
}
else {
result = NS_ERROR_FAILURE;
@ -296,23 +315,26 @@ nsresult nsExpatTokenizer::ConsumeToken(nsScanner& aScanner) {
// Ask the scanner to send us all the data it has
// scanned and pass that data to expat.
nsresult result = NS_OK;
nsString& theBuffer = aScanner.GetBuffer();
PRInt32 length = theBuffer.Length();
if(0 < length) {
const PRUnichar* expatBuffer = theBuffer.GetUnicode();
PRUint32 bufLength = theBuffer.Length() * 2;
if (expatBuffer) {
gTokenDeque=&mTokenDeque;
gExpatParser = mExpatParser;
result = ParseXMLBuffer((const char *)expatBuffer, bufLength);
}
theBuffer.Truncate(0);
}
nsString& theBuffer = aScanner.GetBuffer();
PRUint32 bufLength = theBuffer.Length() * sizeof(PRUnichar);
const PRUnichar* expatBuffer = (bufLength) ? theBuffer.GetUnicode() : nsnull;
gTokenDeque=&mTokenDeque;
gExpatParser = mExpatParser;
result = ParseXMLBuffer((const char *)expatBuffer, bufLength);
theBuffer.Truncate(0);
if(NS_OK==result)
result=aScanner.Eof();
return result;
}
nsresult nsExpatTokenizer::DidTokenize(PRBool aIsFinalChunk)
{
return ParseXMLBuffer(nsnull, 0, aIsFinalChunk);
}
/**
*

View File

@ -53,70 +53,73 @@ public:
NS_DECL_ISUPPORTS
/* nsITokenizer methods */
virtual nsresult ConsumeToken(nsScanner& aScanner);
/* nsITokenizer methods */
virtual nsresult ConsumeToken(nsScanner& aScanner);
virtual nsresult DidTokenize(PRBool aIsFinalChunk);
virtual void FrontloadMisplacedContent(nsDeque& aDeque);
protected:
/**
* Parse an XML buffer using expat
* @update nra 2/29/99
* @return NS_ERROR_FAILURE if expat encounters an error, else NS_OK
*/
nsresult ParseXMLBuffer(const char *aBuffer, PRUint32 aLength);
/**
* Parse an XML buffer using expat
* @update nra 2/29/99
* @return NS_ERROR_FAILURE if expat encounters an error, else NS_OK
*/
nsresult ParseXMLBuffer(const char *aBuffer, PRUint32 aLength, PRBool aIsFinal=PR_FALSE);
/**
* Sets up the callbacks for the expat parser
* @update nra 2/24/99
* @param none
* @return none
*/
void SetupExpatCallbacks(void);
/**
* Sets up the callbacks for the expat parser
* @update nra 2/24/99
* @param none
* @return none
*/
void SetupExpatCallbacks(void);
void PushXMLErrorToken(const char *aBuffer, PRUint32 aLength);
void SetErrorContextInfo(nsParserError* aError, PRUint32 aByteIndex,
const char* aSourceBuffer, PRUint32 aLength);
// Propagate XML errors to the content sink
void PushXMLErrorToken(const char *aBuffer, PRUint32 aLength, PRBool aIsFinal);
void GetLine(const char* aSourceBuffer, PRUint32 aLength,
PRUint32 aByteIndex, nsString& aLine);
/* The callback handlers that get called from the expat parser */
static void HandleStartElement(void *userData, const XML_Char *name, const XML_Char **atts);
static void HandleEndElement(void *userData, const XML_Char *name);
static void HandleCharacterData(void *userData, const XML_Char *s, int len);
static void HandleComment(void *userData, const XML_Char *name);
static void HandleProcessingInstruction(void *userData,
const XML_Char *target,
const XML_Char *data);
static void HandleDefault(void *userData, const XML_Char *s, int len);
static void HandleUnparsedEntityDecl(void *userData,
const XML_Char *entityName,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId,
const XML_Char *notationName);
static void HandleNotationDecl(void *userData,
const XML_Char *notationName,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId);
static int HandleExternalEntityRef(XML_Parser parser,
const XML_Char *openEntityNames,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId);
static int HandleUnknownEncoding(void *encodingHandlerData,
const XML_Char *name,
XML_Encoding *info);
// Load up an external stream to get external entity information
static nsresult OpenInputStream(nsString2& aURLStr,
nsIInputStream*& in);
static nsresult LoadStream(nsIInputStream* in,
PRUnichar* &uniBuf, PRUint32 &retLen);
static nsresult OpenInputStream(nsString2& aURLStr,
nsIInputStream*& in);
static nsresult LoadStream(nsIInputStream* in,
PRUnichar* &uniBuf, PRUint32 &retLen);
/* The callback handlers that get called from the expat parser */
static void HandleStartElement(void *userData, const XML_Char *name, const XML_Char **atts);
static void HandleEndElement(void *userData, const XML_Char *name);
static void HandleCharacterData(void *userData, const XML_Char *s, int len);
static void HandleComment(void *userData, const XML_Char *name);
static void HandleProcessingInstruction(void *userData,
const XML_Char *target,
const XML_Char *data);
static void HandleDefault(void *userData, const XML_Char *s, int len);
static void HandleUnparsedEntityDecl(void *userData,
const XML_Char *entityName,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId,
const XML_Char *notationName);
static void HandleNotationDecl(void *userData,
const XML_Char *notationName,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId);
static int HandleExternalEntityRef(XML_Parser parser,
const XML_Char *openEntityNames,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId);
static int HandleUnknownEncoding(void *encodingHandlerData,
const XML_Char *name,
XML_Encoding *info);
XML_Parser mExpatParser;
XML_Parser mExpatParser;
PRUint32 mBytesParsed;
PRBool mSeenError;
nsString mLastLine;
};
extern NS_HTMLPARS nsresult NS_Expat_Tokenizer(nsIDTD** aInstancePtrResult);

View File

@ -215,6 +215,10 @@ CToken* nsHTMLTokenizer::GetTokenAt(PRInt32 anIndex){
return (CToken*)mTokenDeque.ObjectAt(anIndex);
}
nsresult nsHTMLTokenizer::WillTokenize(PRBool aIsFinalChunk)
{
return NS_OK;
}
/**
*
@ -236,6 +240,10 @@ void nsHTMLTokenizer::PrependTokens(nsDeque& aDeque){
}
nsresult nsHTMLTokenizer::DidTokenize(PRBool aIsFinalChunk)
{
return NS_OK;
}
/**
* This method repeatedly called by the tokenizer.

View File

@ -55,7 +55,9 @@ public:
NS_DECL_ISUPPORTS
virtual nsresult WillTokenize(PRBool aIsFinalChunk);
virtual nsresult ConsumeToken(nsScanner& aScanner);
virtual nsresult DidTokenize(PRBool aIsFinalChunk);
virtual nsITokenRecycler* GetTokenRecycler(void);
virtual CToken* PushTokenFront(CToken* theToken);

View File

@ -63,7 +63,9 @@ public:
class nsITokenizer : public nsISupports {
public:
virtual nsresult WillTokenize(PRBool aIsFinalChunk)=0;
virtual nsresult ConsumeToken(nsScanner& aScanner)=0;
virtual nsresult DidTokenize(PRBool aIsFinalChunk)=0;
virtual nsITokenRecycler* GetTokenRecycler(void)=0;
virtual CToken* PushTokenFront(CToken* aToken)=0;
@ -74,6 +76,7 @@ public:
virtual CToken* GetTokenAt(PRInt32 anIndex)=0;
virtual void PrependTokens(nsDeque& aDeque)=0;
};

View File

@ -867,16 +867,15 @@ nsresult nsParser::ParseFragment(const nsString& aSourceBuffer,void* aKey,nsITag
* @param
* @return error code -- 0 if ok, non-zero if error.
*/
nsresult nsParser::ResumeParse(nsIDTD* aDefaultDTD) {
nsresult nsParser::ResumeParse(nsIDTD* aDefaultDTD, PRBool aIsFinalChunk) {
nsresult result=NS_OK;
if(mParserContext->mParserEnabled && !mParserContext->mParserTerminated) {
result=WillBuildModel(mParserContext->mScanner->GetFilename(),aDefaultDTD);
if(mParserContext->mDTD) {
mParserContext->mDTD->WillResumeParse();
if(NS_OK==result) {
result=Tokenize();
if(NS_OK==result) {
result=Tokenize(aIsFinalChunk);
result=BuildModel();
if((!mParserContext->mMultipart) || (mParserContext->mParserTerminated) ||
@ -1176,7 +1175,7 @@ nsresult nsParser::OnStopRequest(nsIURI* aURL, nsresult status, const PRUnichar*
if(mParserFilter)
mParserFilter->Finish();
nsresult result=ResumeParse();
nsresult result=ResumeParse(nsnull, PR_TRUE);
// If the parser isn't enabled, we don't finish parsing till
// it is reenabled.
@ -1216,9 +1215,13 @@ nsresult nsParser::OnStopRequest(nsIURI* aURL, nsresult status, const PRUnichar*
* @param
* @return TRUE if it's ok to proceed
*/
PRBool nsParser::WillTokenize(){
PRBool result=PR_TRUE;
return result;
PRBool nsParser::WillTokenize(PRBool aIsFinalChunk){
nsresult rv = NS_OK;
nsITokenizer* theTokenizer=mParserContext->mDTD->GetTokenizer();
if (theTokenizer) {
rv = theTokenizer->WillTokenize(aIsFinalChunk);
}
return rv;
}
@ -1230,7 +1233,7 @@ PRBool nsParser::WillTokenize(){
* @update gess 01/04/99
* @return error code -- 0 if ok, non-zero if error.
*/
nsresult nsParser::Tokenize(){
nsresult nsParser::Tokenize(PRBool aIsFinalChunk){
nsresult result=NS_OK;
@ -1238,7 +1241,7 @@ nsresult nsParser::Tokenize(){
nsITokenizer* theTokenizer=mParserContext->mDTD->GetTokenizer();
if(theTokenizer){
WillTokenize();
WillTokenize(aIsFinalChunk);
while(NS_SUCCEEDED(result)) {
mParserContext->mScanner->Mark();
++mMinorIteration;
@ -1253,7 +1256,7 @@ nsresult nsParser::Tokenize(){
mParserContext->mParserTerminated=PR_TRUE;
}
}
DidTokenize();
DidTokenize(aIsFinalChunk);
}
else{
result=mInternalState=NS_ERROR_HTMLPARSER_BADTOKENIZER;
@ -1270,21 +1273,22 @@ nsresult nsParser::Tokenize(){
* @param
* @return TRUE if all went well
*/
PRBool nsParser::DidTokenize(){
PRBool nsParser::DidTokenize(PRBool aIsFinalChunk){
PRBool result=PR_TRUE;
if(mTokenObserver) {
nsITokenizer* theTokenizer=mParserContext->mDTD->GetTokenizer();
if(theTokenizer) {
nsITokenizer* theTokenizer=mParserContext->mDTD->GetTokenizer();
if (theTokenizer) {
result = theTokenizer->DidTokenize(aIsFinalChunk);
if(mTokenObserver) {
PRInt32 theCount=theTokenizer->GetCount();
PRInt32 theIndex;
for(theIndex=0;theIndex<theCount;theIndex++){
if((*mTokenObserver)(theTokenizer->GetTokenAt(theIndex))){
//add code here to pull unwanted tokens out of the stack...
}
}//for
}//for
}//if
}//if
}
return result;
}

View File

@ -232,7 +232,7 @@ friend class CTokenHandler;
* @update gess5/11/98
* @return TRUE if all went well, otherwise FALSE
*/
virtual nsresult ResumeParse(nsIDTD* mDefaultDTD=0);
virtual nsresult ResumeParse(nsIDTD* mDefaultDTD=0, PRBool aIsFinalChunk=PR_FALSE);
void DebugDumpSource(ostream& anOutput);
@ -319,7 +319,7 @@ private:
* @param
* @return TRUE if it's ok to proceed
*/
PRBool WillTokenize();
PRBool WillTokenize(PRBool aIsFinalChunk = PR_FALSE);
/**
@ -330,7 +330,7 @@ private:
* @update gess 3/25/98
* @return error code
*/
nsresult Tokenize();
nsresult Tokenize(PRBool aIsFinalChunk = PR_FALSE);
/**
* This is the tail-end of the code sandwich for the
@ -341,7 +341,7 @@ private:
* @param
* @return TRUE if all went well
*/
PRBool DidTokenize();
PRBool DidTokenize(PRBool aIsFinalChunk = PR_FALSE);
protected: