34458 - Added static APIs to determine DTD for a given

DOCTYPE,MIMETYPE,COMMAND.
r=rickg,pollmann
33397 - Fixed the NOSCRIPT interference on residual style handling.
r=rickg.
35456 - Emulating Nav. for documents with unclosed STYLE and SCRIPT.
37052 - Handling XMP content.
r=pollmann
This commit is contained in:
harishd%netscape.com 2000-05-03 22:09:09 +00:00
parent bc9cc0db50
commit 9bc8378e24
20 changed files with 896 additions and 430 deletions

View File

@ -1332,7 +1332,7 @@ nsresult CNavDTD::HandleStartToken(CToken* aToken) {
break;
case eHTMLTag_noscript:
mHasOpenNoXXX++;
//mHasOpenNoXXX++; // Fix for 33397 - Enable this when we handle NOSCRIPTS.
isTokenHandled=PR_TRUE; // XXX - Throwing NOSCRIPT to the floor...yet another time..
break;

View File

@ -142,186 +142,4 @@ void CParserContext::SetMimeType(const nsString& aMimeType){
mDocType=eXMLText;
}
/*************************************************************************************************
First, let's define our modalities:
1. compatibility-mode: behave as much like nav4 as possible (unless it's too broken to bother)
2. standard-mode: do html as well as you can per spec, and throw out navigator quirks
3. strict-mode: adhere to the strict DTD specificiation to the highest degree possible
Assume the doctype is in the following form:
<!DOCTYPE [Top Level Element] [Availability] "[Registration]// [Owner-ID] // [Type] [desc-text] // [Language]" "URI|text-identifier">
[HTML] [PUBLIC|...] [+|-] [W3C|IETF|...] [DTD] "..." [EN]|...] "..."
Here are the new rules for DTD handling; comments welcome:
XHTML and XML documents are always strict-mode:
example: <!DOCTYPE \"-//W3C//DTD XHTML 1.0 Strict//EN\">
HTML strict dtd's enable strict-mode:
example: <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN">
example: <!DOCTYPE \"ISO/IEC 15445:1999//DTD HTML//EN\">
HTML 4.0 (or greater) transitional, frameset, (etc), without URI enables compatibility-mode:
example: <!DOCTYPE \"-//W3C//DTD HTML 4.01 Transitional//EN\">
HTML 4.0 (or greater) transitional, frameset, (etc), with a URI that points to the strict.dtd will become strict:
example: <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
"http://www.w3.org/TR/REC-html40/strict.dtd">
doctypes with systemID's or internal subset are handled in strict-mode:
example: <!DOCTYPE HTML PUBLIC PublicID SystemID>
example: <!DOCTYPE HTML SYSTEM SystemID>
example: <!DOCTYPE HTML (PUBLIC PublicID SystemID? | SYSTEM SystemID) [ Internal-SS ]>
All other doctypes (<4.0), and documents without a doctype are handled in compatibility-mode.
*****************************************************************************************************/
/**
* This is called when it's time to find out
* what mode the parser/DTD should run for this document.
* (Each parsercontext can have it's own mode).
*
* @update gess 02/17/00
* @return parsermode (define in nsIParser.h)
*/
eParseMode CParserContext::DetermineParseMode(const nsString& theBuffer) {
const char* theModeStr= PR_GetEnv("PARSE_MODE");
mParseMode = eParseMode_unknown;
PRInt32 theIndex=theBuffer.Find("DOCTYPE",PR_TRUE,0,10);
if(kNotFound<theIndex) {
//good, we found "DOCTYPE" -- now go find it's end delimiter '>'
PRInt32 theGTPos=theBuffer.FindChar(kGreaterThan,theIndex+1);
PRInt32 theEnd=(kNotFound==theGTPos) ? 512 : MinInt(512,theGTPos);
PRInt32 theSubIndex=theBuffer.Find("//DTD",PR_TRUE,theIndex+8,theEnd-(theIndex+8)); //skip to the type and desc-text...
PRInt32 theErr=0;
PRInt32 theMajorVersion=3;
//note that if we don't find '>', then we just scan the first 512 bytes.
if(0<=theSubIndex) {
PRInt32 theStartPos=theSubIndex+5;
PRInt32 theCount=theEnd-theStartPos;
if(kNotFound<theSubIndex) {
theSubIndex=theBuffer.Find("XHTML",PR_TRUE,theStartPos,theCount);
if(0<=theSubIndex) {
mDocType=eXHTMLText;
mParseMode=eParseMode_strict;
return mParseMode;
}
else {
theSubIndex=theBuffer.Find("ISO/IEC 15445:",PR_TRUE,theIndex+8,theEnd-(theIndex+8));
if(0<=theSubIndex) {
mDocType=eHTML4Text;
mParseMode=eParseMode_strict;
theMajorVersion=4;
theSubIndex+=15;
}
else {
theSubIndex=theBuffer.Find("HTML",PR_TRUE,theStartPos,theCount);
if(0<=theSubIndex) {
mDocType=eHTML4Text;
mParseMode=eParseMode_strict;
theMajorVersion=3;
}
else {
theSubIndex=theBuffer.Find("HYPERTEXT MARKUP",PR_TRUE,theStartPos,theCount);
if(0<=theSubIndex) {
mDocType=eHTML3Text;
mParseMode=eParseMode_quirks;
theSubIndex+=20;
}
}
}
}
}
theStartPos=theSubIndex+5;
theCount=theEnd-theStartPos;
nsAutoString theNum;
//get the next substring from the buffer, which should be a number.
//now see what the version number is...
theStartPos=theBuffer.FindCharInSet("123456789",theStartPos);
if(0<=theStartPos) {
PRInt32 theTerminal=theBuffer.FindCharInSet(" />",theStartPos+1);
if(theTerminal) {
theBuffer.Mid(theNum,theStartPos,theTerminal-theStartPos);
}
else theBuffer.Mid(theNum,theStartPos,3);
theMajorVersion=theNum.ToInteger(&theErr);
}
//now see what the
theStartPos+=theNum.Length();
theCount=theEnd-theStartPos;
if((theBuffer.Find("TRANSITIONAL",PR_TRUE,theStartPos,theCount)>kNotFound)||
(theBuffer.Find("LOOSE",PR_TRUE,theStartPos,theCount)>kNotFound) ||
(theBuffer.Find("FRAMESET",PR_TRUE,theStartPos,theCount)>kNotFound) ||
(theBuffer.Find("LATIN1", PR_TRUE,theStartPos,theCount) >kNotFound) ||
(theBuffer.Find("SYMBOLS",PR_TRUE,theStartPos,theCount) >kNotFound) ||
(theBuffer.Find("SPECIAL",PR_TRUE,theStartPos,theCount) >kNotFound)) {
mParseMode=eParseMode_quirks;
}
//one last thing: look for a URI that specifies the strict.dtd
theStartPos+=6;
theCount=theEnd-theStartPos;
theSubIndex=theBuffer.Find("STRICT.DTD",PR_TRUE,theStartPos,theCount);
if(0<theSubIndex) {
//Since we found it, regardless of what's in the descr-text, kick into strict mode.
mParseMode=eParseMode_strict;
mDocType=eHTML4Text;
}
if (0==theErr){
switch(theMajorVersion) {
case 0: case 1: case 2: case 3:
if(mDocType!=eXHTMLText){
mParseMode=eParseMode_quirks; //be as backward compatible as possible
mDocType=eHTML3Text;
}
break;
default:
//XXX hack -- someday, the next line of code will be criticized
//for it's lack of vision...
if(theMajorVersion>20) {
mParseMode=eParseMode_noquirks;
}
break;
} //switch
}
} //if
else {
PRInt32 thePos=theBuffer.Find("HTML",PR_TRUE,1,50);
if(kNotFound!=thePos) {
mDocType=eHTML4Text;
PRInt32 theIDPos=theBuffer.Find("PublicID",thePos);
if(kNotFound==theIDPos)
theIDPos=theBuffer.Find("SystemID",thePos);
mParseMode=(kNotFound==theIDPos) ? eParseMode_quirks : eParseMode_strict;
}
}
}
else if(kNotFound<(theIndex=theBuffer.Find("?XML",PR_TRUE,0,128))) {
mParseMode=eParseMode_strict;
}
if(theModeStr) {
if(0==nsCRT::strcasecmp(theModeStr,"strict"))
mParseMode=eParseMode_strict;
}
else mParseMode = (eParseMode_unknown==mParseMode)? eParseMode_quirks : mParseMode;
return mParseMode;
}

View File

@ -58,8 +58,6 @@ public:
PRBool aCopyUnused=PR_FALSE);
CParserContext( const CParserContext& aContext);
eParseMode DetermineParseMode(const nsString& theBuffer);
~CParserContext();

View File

@ -992,7 +992,7 @@ nsHTMLContentSinkStream::AddDocTypeDecl(const nsIParserNode& aNode, PRInt32 aMod
DebugDump("<",aNode.GetText(),(mNodeStackPos)*2);
#endif
Write("<!");
// Write("<!");
return NS_OK;
}

View File

@ -540,7 +540,7 @@ nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,CToken*& aToken,nsScan
if(NS_SUCCEEDED(result)) {
//XXX - Find a better soution to record content
if(theTag==eHTMLTag_textarea && !mRecordTrailingContent) {
if((theTag==eHTMLTag_textarea || theTag==eHTMLTag_xmp) && !mRecordTrailingContent) {
mRecordTrailingContent=PR_TRUE;
}
@ -589,7 +589,8 @@ nsresult nsHTMLTokenizer::ConsumeEndTag(PRUnichar aChar,CToken*& aToken,nsScanne
nsresult result=NS_OK;
if(aToken) {
if(aToken->GetTypeID()==eHTMLTag_textarea && mRecordTrailingContent) {
eHTMLTags theTag=(eHTMLTags)aToken->GetTypeID();
if((theTag==eHTMLTag_textarea || theTag==eHTMLTag_xmp) && mRecordTrailingContent) {
mRecordTrailingContent=PR_FALSE;
}
result= aToken->Consume(aChar,aScanner,mParseMode); //tell new token to finish consuming text...

View File

@ -282,16 +282,17 @@ void CStartToken::DebugDumpSource(nsOutputStream& out) {
void CStartToken::GetSource(nsString& anOutputString){
anOutputString.AssignWithConversion("<");
/*
* mTextValue used to contain the name of the tag.
* But for the sake of performance we now rely on the tagID
* rather than tag name. This however, caused bug 15204
* to reincarnate. Since, mTextvalue is not being used here..
* I'm just going to comment it out.
*
* Watch out for Bug 15204
*/
// anOutputString+=mTextValue;
if(mTrailingContent.Length()>0)
anOutputString+=mTrailingContent;
anOutputString=mTrailingContent;
else {
if(mTextValue.Length()>0)
anOutputString=mTextValue;
else
anOutputString.AssignWithConversion(GetTagName(mTypeID));
anOutputString+='>';
}
}
/*
@ -304,16 +305,17 @@ void CStartToken::GetSource(nsString& anOutputString){
void CStartToken::AppendSource(nsString& anOutputString){
anOutputString.AppendWithConversion("<");
/*
* mTextValue used to contain the name of the tag.
* But for the sake of performance we now rely on the tagID
* rather than tag name. This however, caused bug 15204
* to reincarnate. Since, mTextvalue is not being used here..
* I'm just going to comment it out.
*
* Watch out for Bug 15204
*/
// anOutputString+=mTextValue;
if(mTrailingContent.Length()>0)
anOutputString+=mTrailingContent;
else {
if(mTextValue.Length()>0)
anOutputString+=mTextValue;
else
anOutputString.AppendWithConversion(GetTagName(mTypeID));
anOutputString+='>';
}
}
/*
@ -655,10 +657,16 @@ nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScann
else disaster=PR_TRUE;
if(disaster) {
if((!aScanner.IsIncremental()) && (theAltTermStrPos>kNotFound)) {
// If you're here it means..we hit the rock bottom and therefore switch to plan B.
theCurrOffset=theAltTermStrPos;
theLastIteration=PR_TRUE;
if(!aScanner.IsIncremental()) {
if(theAltTermStrPos>kNotFound) {
// If you're here it means..we hit the rock bottom and therefore switch to plan B.
theCurrOffset=theAltTermStrPos;
theLastIteration=PR_TRUE;
}
else {
aTerminalString.Cut(0,2); // Do this to fix Bug. 35456
done=PR_TRUE;
}
}
else
result=kEOF;

View File

@ -42,15 +42,24 @@
#include "CRtfDTD.h"
#include "CNavDTD.h"
#include "COtherDTD.h"
//#define rickgdebug
#include "prenv.h"
#include "nsParserCIID.h"
//#define rickgdebug
#define TEST_DOCTYPES 1
static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
static NS_DEFINE_IID(kClassIID, NS_PARSER_IID);
static NS_DEFINE_IID(kIParserIID, NS_IPARSER_IID);
static NS_DEFINE_IID(kIStreamListenerIID, NS_ISTREAMLISTENER_IID);
static NS_DEFINE_CID(kWellFormedDTDCID, NS_WELLFORMEDDTD_CID);
static NS_DEFINE_CID(kNavDTDCID, NS_CNAVDTD_CID);
static NS_DEFINE_CID(kCOtherDTDCID, NS_COTHER_DTD_CID);
static NS_DEFINE_CID(kViewSourceDTDCID, NS_VIEWSOURCE_DTD_CID);
static NS_DEFINE_CID(kRtfDTDCID, NS_CRTF_DTD_CID);
static const char* kNullURL = "Error: Null URL given";
static const char* kOnStartNotCalled = "Error: OnStartRequest() must be called before OnDataAvailable()";
static const char* kBadListenerInit = "Error: Parser's IStreamListener API was not setup correctly in constructor.";
@ -427,6 +436,194 @@ eParseMode nsParser::GetParseMode(void){
}
/*************************************************************************************************
First, let's define our modalities:
1. compatibility-mode: behave as much like nav4 as possible (unless it's too broken to bother)
2. standard-mode: do html as well as you can per spec, and throw out navigator quirks
3. strict-mode: adhere to the strict DTD specificiation to the highest degree possible
Assume the doctype is in the following form:
<!DOCTYPE [Top Level Element] [Availability] "[Registration]// [Owner-ID] // [Type] [desc-text] // [Language]" "URI|text-identifier">
[HTML] [PUBLIC|...] [+|-] [W3C|IETF|...] [DTD] "..." [EN]|...] "..."
Here are the new rules for DTD handling; comments welcome:
XHTML and XML documents are always strict-mode:
example: <!DOCTYPE \"-//W3C//DTD XHTML 1.0 Strict//EN\">
HTML strict dtd's enable strict-mode:
example: <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN">
example: <!DOCTYPE \"ISO/IEC 15445:1999//DTD HTML//EN\">
HTML 4.0 (or greater) transitional, frameset, (etc), without URI enables compatibility-mode:
example: <!DOCTYPE \"-//W3C//DTD HTML 4.01 Transitional//EN\">
HTML 4.0 (or greater) transitional, frameset, (etc), with a URI that points to the strict.dtd will become strict:
example: <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
"http://www.w3.org/TR/REC-html40/strict.dtd">
doctypes with systemID's or internal subset are handled in strict-mode:
example: <!DOCTYPE HTML PUBLIC PublicID SystemID>
example: <!DOCTYPE HTML SYSTEM SystemID>
example: <!DOCTYPE HTML (PUBLIC PublicID SystemID? | SYSTEM SystemID) [ Internal-SS ]>
All other doctypes (<4.0), and documents without a doctype are handled in compatibility-mode.
*****************************************************************************************************/
/**
* This is called when it's time to find out
* what mode the parser/DTD should run for this document.
* (Each parsercontext can have it's own mode).
*
* @update gess 02/17/00
* @return parsermode (define in nsIParser.h)
*/
static
void DetermineParseMode(nsString& aBuffer,eParseMode& aParseMode,eParserDocType aDocType) {
const char* theModeStr= PR_GetEnv("PARSE_MODE");
aParseMode = eParseMode_unknown;
PRInt32 theIndex=aBuffer.Find("DOCTYPE",PR_TRUE,0,10);
if(kNotFound<theIndex) {
//good, we found "DOCTYPE" -- now go find it's end delimiter '>'
PRInt32 theGTPos=aBuffer.FindChar(kGreaterThan,theIndex+1);
PRInt32 theEnd=(kNotFound==theGTPos) ? 512 : MinInt(512,theGTPos);
PRInt32 theSubIndex=aBuffer.Find("//DTD",PR_TRUE,theIndex+8,theEnd-(theIndex+8)); //skip to the type and desc-text...
PRInt32 theErr=0;
PRInt32 theMajorVersion=3;
//note that if we don't find '>', then we just scan the first 512 bytes.
if(0<=theSubIndex) {
PRInt32 theStartPos=theSubIndex+5;
PRInt32 theCount=theEnd-theStartPos;
if(kNotFound<theSubIndex) {
theSubIndex=aBuffer.Find("XHTML",PR_TRUE,theStartPos,theCount);
if(0<=theSubIndex) {
aDocType=eXHTMLText;
aParseMode=eParseMode_strict;
}
else {
theSubIndex=aBuffer.Find("ISO/IEC 15445:",PR_TRUE,theIndex+8,theEnd-(theIndex+8));
if(0<=theSubIndex) {
aDocType=eHTML4Text;
aParseMode=eParseMode_strict;
theMajorVersion=4;
theSubIndex+=15;
}
else {
theSubIndex=aBuffer.Find("HTML",PR_TRUE,theStartPos,theCount);
if(0<=theSubIndex) {
aDocType=eHTML4Text;
aParseMode=eParseMode_strict;
theMajorVersion=3;
}
else {
theSubIndex=aBuffer.Find("HYPERTEXT MARKUP",PR_TRUE,theStartPos,theCount);
if(0<=theSubIndex) {
aDocType=eHTML3Text;
aParseMode=eParseMode_quirks;
theSubIndex+=20;
}
}
}
}
}
theStartPos=theSubIndex+5;
theCount=theEnd-theStartPos;
nsAutoString theNum;
//get the next substring from the buffer, which should be a number.
//now see what the version number is...
theStartPos=aBuffer.FindCharInSet("123456789",theStartPos);
if(0<=theStartPos) {
PRInt32 theTerminal=aBuffer.FindCharInSet(" />",theStartPos+1);
if(theTerminal) {
aBuffer.Mid(theNum,theStartPos,theTerminal-theStartPos);
}
else aBuffer.Mid(theNum,theStartPos,3);
theMajorVersion=theNum.ToInteger(&theErr);
}
//now see what the
theStartPos+=theNum.Length();
theCount=theEnd-theStartPos;
if((aBuffer.Find("TRANSITIONAL",PR_TRUE,theStartPos,theCount)>kNotFound)||
(aBuffer.Find("LOOSE",PR_TRUE,theStartPos,theCount)>kNotFound) ||
(aBuffer.Find("FRAMESET",PR_TRUE,theStartPos,theCount)>kNotFound) ||
(aBuffer.Find("LATIN1", PR_TRUE,theStartPos,theCount) >kNotFound) ||
(aBuffer.Find("SYMBOLS",PR_TRUE,theStartPos,theCount) >kNotFound) ||
(aBuffer.Find("SPECIAL",PR_TRUE,theStartPos,theCount) >kNotFound)) {
aParseMode=eParseMode_quirks;
}
//one last thing: look for a URI that specifies the strict.dtd
theStartPos+=6;
theCount=theEnd-theStartPos;
theSubIndex=aBuffer.Find("STRICT.DTD",PR_TRUE,theStartPos,theCount);
if(0<theSubIndex) {
//Since we found it, regardless of what's in the descr-text, kick into strict mode.
aParseMode=eParseMode_strict;
aDocType=eHTML4Text;
}
if (0==theErr){
switch(theMajorVersion) {
case 0: case 1: case 2: case 3:
if(aDocType!=eXHTMLText){
aParseMode=eParseMode_quirks; //be as backward compatible as possible
aDocType=eHTML3Text;
}
break;
default:
//XXX hack -- someday, the next line of code will be criticized
//for it's lack of vision...
if(theMajorVersion>20) {
aParseMode=eParseMode_noquirks;
}
break;
} //switch
}
} //if
else {
PRInt32 thePos=aBuffer.Find("HTML",PR_TRUE,1,50);
if(kNotFound!=thePos) {
aDocType=eHTML4Text;
PRInt32 theIDPos=aBuffer.Find("PublicID",thePos);
if(kNotFound==theIDPos)
theIDPos=aBuffer.Find("SystemID",thePos);
aParseMode=(kNotFound==theIDPos) ? eParseMode_quirks : eParseMode_strict;
}
}
}
else if(kNotFound<(theIndex=aBuffer.Find("?XML",PR_TRUE,0,128))) {
aDocType=eXMLText;
aParseMode=eParseMode_strict;
}
if(theModeStr) {
if(0==nsCRT::strcasecmp(theModeStr,"strict"))
aParseMode=eParseMode_strict;
}
else {
if(eParseMode_unknown==aParseMode) {
aBuffer.InsertWithConversion("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\">\n",0);
aDocType=eHTML3Text;
aParseMode=eParseMode_quirks;
}
}
}
/**
*
@ -496,6 +693,131 @@ PRBool FindSuitableDTD( CParserContext& aParserContext,nsString& aBuffer) {
return PR_FALSE;
}
/**
* Call this method to determine a DTD for a DOCTYPE
*
* @update harishd 05/01/00
* @param aDTD -- Carries the deduced ( from DOCTYPE ) DTD.
* @param aDocTypeStr -- A doctype for which a DTD is to be selected.
* @param aMimeType -- A mimetype for which a DTD is to be selected.
* Note: aParseMode might be required.
* @param aCommand -- A command for which a DTD is to be selected.
* @param aParseMode -- Used with aMimeType to choose the correct DTD.
* @return NS_OK if succeeded else ERROR.
*/
nsresult nsParser::CreateCompatibleDTDForDocType(nsIDTD** aDTD, nsString* aDocTypeStr)
{
nsresult result=NS_OK;
const nsCID* theDTDClassID=0;
if(aDocTypeStr) {
eParseMode theParseMode=eParseMode_unknown;
eParserDocType theDocType=ePlainText;
DetermineParseMode(*aDocTypeStr,theParseMode,theDocType);
switch(theDocType) {
case eHTML4Text:
if(theParseMode==eParseMode_strict) {
theDTDClassID=&kCOtherDTDCID;
break;
}
case eHTML3Text:
theDTDClassID=&kNavDTDCID;
break;
case eXHTMLText:
case eXMLText:
theDTDClassID=&kWellFormedDTDCID;
break;
default:
theDTDClassID=&kNavDTDCID;
break;
}
}
result=(theDTDClassID)? nsComponentManager::CreateInstance(*theDTDClassID, nsnull, NS_GET_IID(nsIDTD),(void**)aDTD):NS_OK;
return result;
}
/**
* Call this method to determine a DTD for a given mime type.
*
* @update harishd 05/01/00
* @param aDTD -- Carries the deduced ( from DOCTYPE ) DTD.
* @param aMimeType -- A mimetype for which a DTD is to be selected.
* Note: aParseMode might be required.
* @param aParseMode -- Used with aMimeType to choose the correct DTD.
* @return NS_OK if succeeded else ERROR.
*/
nsresult CreateCompatibleDTDForMimeType(nsIDTD** aDTD, const nsString* aMimeType=nsnull,
eParseMode aParseMode=eParseMode_unknown)
{
nsresult result=NS_OK;
const nsCID* theDTDClassID=0;
if(aMimeType) {
NS_ASSERTION(aParseMode!=eParseMode_unknown,"DTD selection might require a parsemode");
if(aMimeType->EqualsWithConversion(kHTMLTextContentType)) {
if(aParseMode==eParseMode_strict) {
theDTDClassID=&kCOtherDTDCID;
}
else {
theDTDClassID=&kNavDTDCID;
}
}
else if(aMimeType->EqualsWithConversion(kPlainTextContentType)) {
theDTDClassID=&kNavDTDCID;
}
else if(aMimeType->EqualsWithConversion(kXMLTextContentType) ||
aMimeType->EqualsWithConversion(kXULTextContentType) ||
aMimeType->EqualsWithConversion(kRDFTextContentType)) {
theDTDClassID=&kWellFormedDTDCID;
}
else if(aMimeType->EqualsWithConversion(kXIFTextContentType)) {
theDTDClassID=&kRtfDTDCID;
}
else {
theDTDClassID=&kNavDTDCID;
}
result=(theDTDClassID)? nsComponentManager::CreateInstance(*theDTDClassID, nsnull, NS_GET_IID(nsIDTD),(void**)aDTD):NS_OK;
}
return result;
}
/**
* Call this method to determine a DTD for a given command
*
* @update harishd 05/01/00
* @param aDTD -- Carries the deduced ( from DOCTYPE ) DTD.
* @param aCommand -- A command for which a DTD is to be selected.
* @return NS_OK if succeeded else ERROR.
*/
nsresult CreateCompatibleDTDForCommand(nsIDTD** aDTD, eParserCommands aCommand=eViewNormal)
{
nsresult result=NS_OK;
const nsCID* theDTDClassID=0;
switch(aCommand) {
case eViewSource:
theDTDClassID=&kViewSourceDTDCID;
break;
case eViewNormal:
theDTDClassID=&kNavDTDCID;
break;
case eViewErrors:
default:
break;
}
result=(theDTDClassID)? nsComponentManager::CreateInstance(*theDTDClassID, nsnull, NS_GET_IID(nsIDTD),(void**)aDTD):NS_OK;
return result;
}
#ifdef TEST_DOCTYPES
static const char* doctypes[] = {
@ -608,7 +930,7 @@ static const char* doctypes[] = {
*/
nsresult nsParser::WillBuildModel(nsString& aFilename){
nsresult result=NS_OK;
nsresult result=NS_OK;
#if TEST_DOCTYPES
@ -617,9 +939,13 @@ nsresult nsParser::WillBuildModel(nsString& aFilename){
if(!tested) {
tested=PR_TRUE;
eParseMode theParseMode=eParseMode_unknown;
eParserDocType theDocumentType=ePlainText;
while(*theDocType) {
nsAutoString theType(*theDocType);
eParseMode result=mParserContext->DetermineParseMode(theType);
nsAutoString theType;
theType.AssignWithConversion(*theDocType);
DetermineParseMode(theType,theParseMode,theDocumentType);
theDocType++;
}
}
@ -631,7 +957,7 @@ nsresult nsParser::WillBuildModel(nsString& aFilename){
mMinorIteration=-1;
nsString& theBuffer=mParserContext->mScanner->GetBuffer();
mParserContext->DetermineParseMode(theBuffer);
DetermineParseMode(theBuffer,mParserContext->mParseMode,mParserContext->mDocType);
if(PR_TRUE==FindSuitableDTD(*mParserContext,theBuffer)) {
mParserContext->mDTD->WillBuildModel( *mParserContext,mSink);

View File

@ -360,7 +360,40 @@ private:
*/
PRBool DidTokenize(PRBool aIsFinalChunk = PR_FALSE);
/**
* Call this method to determine a DTD for a DOCTYPE
*
* @update harishd 05/01/00
* @param aDTD -- Carries the deduced ( from DOCTYPE ) DTD.
* @param aDocTypeStr -- A doctype for which a DTD is to be selected.
* @param aMimeType -- A mimetype for which a DTD is to be selected.
Note: aParseMode might be required.
* @param aCommand -- A command for which a DTD is to be selected.
* @param aParseMode -- Used with aMimeType to choose the correct DTD.
* @return NS_OK if succeeded else ERROR.
*/
static nsresult CreateCompatibleDTDForDocType(nsIDTD** aDTD, nsString* aDocTypeStr);
/**
* Call this method to determine a DTD for a given mime type.
*
* @update harishd 05/01/00
* @param aDTD -- Carries the deduced ( from DOCTYPE ) DTD.
* @param aMimeType -- A mimetype for which a DTD is to be selected.
Note: aParseMode might be required.
* @param aParseMode -- Used with aMimeType to choose the correct DTD.
* @return NS_OK if succeeded else ERROR.
*/
static nsresult CreateCompatibleDTDForMimeType(nsIDTD** aDTD, const nsString* aMimeType=nsnull,
eParseMode aParseMode=eParseMode_unknown);
/**
* Call this method to determine a DTD for a given command
*
* @update harishd 05/01/00
* @param aDTD -- Carries the deduced ( from DOCTYPE ) DTD.
* @param aCommand -- A command for which a DTD is to be selected.
* @return NS_OK if succeeded else ERROR.
*/
static nsresult CreateCompatibleDTDForCommand(nsIDTD** aDTD, eParserCommands aCommand=eViewNormal);
protected:
//*********************************************
// And now, some data members...

View File

@ -54,6 +54,19 @@
#define NS_XIF_DTD_CID \
{ 0xa6cf910e, 0x15b3, 0x11d2, { 0x93, 0x2e, 0x0, 0x80, 0x5f, 0x8a, 0xdd, 0x32 } }
// {CCF5BED0-1AF8-11d4-812B-0010A4E0C706}
#define NS_COTHER_DTD_CID \
{ 0xccf5bed0, 0x1af8, 0x11d4, { 0x81, 0x2b, 0x0, 0x10, 0xa4, 0xe0, 0xc7, 0x6 } };
// {8323FAD0-2102-11d4-8142-000064657374}
#define NS_VIEWSOURCE_DTD_CID \
{ 0x8323fad0, 0x2102, 0x11d4, { 0x81, 0x42, 0x0, 0x0, 0x64, 0x65, 0x73, 0x74 } };
// {8323FAD1-2102-11d4-8142-000064657374}
#define NS_CRTF_DTD_CID \
{ 0x8323fad1, 0x2102, 0x11d4, { 0x81, 0x42, 0x0, 0x0, 0x64, 0x65, 0x73, 0x74 } };
// {a6cf910f-15b3-11d2-932e-00805f8add32}
#define NS_HTMLCONTENTSINKSTREAM_CID \
{ 0xa6cf910f, 0x15b3, 0x11d2, { 0x93, 0x2e, 0x0, 0x80, 0x5f, 0x8a, 0xdd, 0x32 } }

View File

@ -31,6 +31,9 @@
#include "nsWellFormedDTD.h"
#include "CNavDTD.h"
#include "nsXIFDTD.h"
#include "COtherDTD.h"
#include "CRtfDTD.h"
#include "nsViewSourceHTML.h"
#include "nsHTMLContentSinkStream.h"
#include "nsHTMLToTXTSinkStream.h"
#include "nsHTMLEntities.h"
@ -98,6 +101,9 @@ static NS_DEFINE_CID(kLoggingSinkCID, NS_LOGGING_SINK_CID);
static NS_DEFINE_CID(kWellFormedDTDCID, NS_WELLFORMEDDTD_CID);
static NS_DEFINE_CID(kNavDTDCID, NS_CNAVDTD_CID);
static NS_DEFINE_CID(kXIFDTDCID, NS_XIF_DTD_CID);
static NS_DEFINE_CID(kCOtherDTDCID, NS_COTHER_DTD_CID);
static NS_DEFINE_CID(kViewSourceDTDCID, NS_VIEWSOURCE_DTD_CID);
static NS_DEFINE_CID(kRtfDTDCID, NS_CRTF_DTD_CID);
static NS_DEFINE_CID(kHTMLContentSinkStreamCID, NS_HTMLCONTENTSINKSTREAM_CID);
static NS_DEFINE_CID(kHTMLToTXTSinkStreamCID, NS_HTMLTOTXTSINKSTREAM_CID);
static NS_DEFINE_CID(kParserServiceCID, NS_PARSERSERVICE_CID);
@ -114,6 +120,9 @@ static Components gComponents[] = {
{ "Well formed DTD", &kWellFormedDTDCID },
{ "Navigator HTML DTD", &kNavDTDCID },
{ "XIF DTD", &kXIFDTDCID },
{ "OTHER DTD", &kCOtherDTDCID },
{ "ViewSource DTD", &kViewSourceDTDCID },
{ "Rtf DTD", &kRtfDTDCID },
{ "HTML Content Sink Stream", &kHTMLContentSinkStreamCID },
{ "HTML To Text Sink Stream", &kHTMLToTXTSinkStreamCID },
{ "ParserService", &kParserServiceCID },
@ -127,6 +136,9 @@ NS_GENERIC_FACTORY_CONSTRUCTOR(nsLoggingSink)
NS_GENERIC_FACTORY_CONSTRUCTOR(CWellFormedDTD)
NS_GENERIC_FACTORY_CONSTRUCTOR(CNavDTD)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsXIFDTD)
NS_GENERIC_FACTORY_CONSTRUCTOR(COtherDTD)
NS_GENERIC_FACTORY_CONSTRUCTOR(CViewSourceHTML)
NS_GENERIC_FACTORY_CONSTRUCTOR(CRtfDTD)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsHTMLContentSinkStream)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsHTMLToTXTSinkStream)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsParserService)
@ -154,6 +166,9 @@ protected:
nsCOMPtr<nsIGenericFactory> mWellFormedDTDFactory;
nsCOMPtr<nsIGenericFactory> mNavHTMLDTDFactory;
nsCOMPtr<nsIGenericFactory> mXIFDTDFactory;
nsCOMPtr<nsIGenericFactory> mOtherHTMLDTDFactory;
nsCOMPtr<nsIGenericFactory> mViewSourceHTMLDTDFactory;
nsCOMPtr<nsIGenericFactory> mRtfHTMLDTDFactory;
nsCOMPtr<nsIGenericFactory> mHTMLContentSinkStreamFactory;
nsCOMPtr<nsIGenericFactory> mHTMLToTXTSinkStreamFactory;
nsCOMPtr<nsIGenericFactory> mParserServiceFactory;
@ -260,6 +275,27 @@ nsParserModule::GetClassObject(nsIComponentManager *aCompMgr,
&nsXIFDTDConstructor);
}
fact = mXIFDTDFactory;
}
else if (aClass.Equals(kCOtherDTDCID)) {
if (!mOtherHTMLDTDFactory) {
rv = NS_NewGenericFactory(getter_AddRefs(mOtherHTMLDTDFactory),
&COtherDTDConstructor);
}
fact = mOtherHTMLDTDFactory;
}
else if (aClass.Equals(kViewSourceDTDCID)) {
if (!mViewSourceHTMLDTDFactory) {
rv = NS_NewGenericFactory(getter_AddRefs(mViewSourceHTMLDTDFactory),
&CViewSourceHTMLConstructor);
}
fact = mViewSourceHTMLDTDFactory;
}
else if (aClass.Equals(kRtfDTDCID)) {
if (!mRtfHTMLDTDFactory) {
rv = NS_NewGenericFactory(getter_AddRefs(mRtfHTMLDTDFactory),
&CRtfDTDConstructor);
}
fact = mRtfHTMLDTDFactory;
}
else if (aClass.Equals(kHTMLContentSinkStreamCID)) {
if (!mHTMLContentSinkStreamFactory) {

View File

@ -1332,7 +1332,7 @@ nsresult CNavDTD::HandleStartToken(CToken* aToken) {
break;
case eHTMLTag_noscript:
mHasOpenNoXXX++;
//mHasOpenNoXXX++; // Fix for 33397 - Enable this when we handle NOSCRIPTS.
isTokenHandled=PR_TRUE; // XXX - Throwing NOSCRIPT to the floor...yet another time..
break;

View File

@ -142,186 +142,4 @@ void CParserContext::SetMimeType(const nsString& aMimeType){
mDocType=eXMLText;
}
/*************************************************************************************************
First, let's define our modalities:
1. compatibility-mode: behave as much like nav4 as possible (unless it's too broken to bother)
2. standard-mode: do html as well as you can per spec, and throw out navigator quirks
3. strict-mode: adhere to the strict DTD specificiation to the highest degree possible
Assume the doctype is in the following form:
<!DOCTYPE [Top Level Element] [Availability] "[Registration]// [Owner-ID] // [Type] [desc-text] // [Language]" "URI|text-identifier">
[HTML] [PUBLIC|...] [+|-] [W3C|IETF|...] [DTD] "..." [EN]|...] "..."
Here are the new rules for DTD handling; comments welcome:
XHTML and XML documents are always strict-mode:
example: <!DOCTYPE \"-//W3C//DTD XHTML 1.0 Strict//EN\">
HTML strict dtd's enable strict-mode:
example: <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN">
example: <!DOCTYPE \"ISO/IEC 15445:1999//DTD HTML//EN\">
HTML 4.0 (or greater) transitional, frameset, (etc), without URI enables compatibility-mode:
example: <!DOCTYPE \"-//W3C//DTD HTML 4.01 Transitional//EN\">
HTML 4.0 (or greater) transitional, frameset, (etc), with a URI that points to the strict.dtd will become strict:
example: <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
"http://www.w3.org/TR/REC-html40/strict.dtd">
doctypes with systemID's or internal subset are handled in strict-mode:
example: <!DOCTYPE HTML PUBLIC PublicID SystemID>
example: <!DOCTYPE HTML SYSTEM SystemID>
example: <!DOCTYPE HTML (PUBLIC PublicID SystemID? | SYSTEM SystemID) [ Internal-SS ]>
All other doctypes (<4.0), and documents without a doctype are handled in compatibility-mode.
*****************************************************************************************************/
/**
* This is called when it's time to find out
* what mode the parser/DTD should run for this document.
* (Each parsercontext can have it's own mode).
*
* @update gess 02/17/00
* @return parsermode (define in nsIParser.h)
*/
eParseMode CParserContext::DetermineParseMode(const nsString& theBuffer) {
const char* theModeStr= PR_GetEnv("PARSE_MODE");
mParseMode = eParseMode_unknown;
PRInt32 theIndex=theBuffer.Find("DOCTYPE",PR_TRUE,0,10);
if(kNotFound<theIndex) {
//good, we found "DOCTYPE" -- now go find it's end delimiter '>'
PRInt32 theGTPos=theBuffer.FindChar(kGreaterThan,theIndex+1);
PRInt32 theEnd=(kNotFound==theGTPos) ? 512 : MinInt(512,theGTPos);
PRInt32 theSubIndex=theBuffer.Find("//DTD",PR_TRUE,theIndex+8,theEnd-(theIndex+8)); //skip to the type and desc-text...
PRInt32 theErr=0;
PRInt32 theMajorVersion=3;
//note that if we don't find '>', then we just scan the first 512 bytes.
if(0<=theSubIndex) {
PRInt32 theStartPos=theSubIndex+5;
PRInt32 theCount=theEnd-theStartPos;
if(kNotFound<theSubIndex) {
theSubIndex=theBuffer.Find("XHTML",PR_TRUE,theStartPos,theCount);
if(0<=theSubIndex) {
mDocType=eXHTMLText;
mParseMode=eParseMode_strict;
return mParseMode;
}
else {
theSubIndex=theBuffer.Find("ISO/IEC 15445:",PR_TRUE,theIndex+8,theEnd-(theIndex+8));
if(0<=theSubIndex) {
mDocType=eHTML4Text;
mParseMode=eParseMode_strict;
theMajorVersion=4;
theSubIndex+=15;
}
else {
theSubIndex=theBuffer.Find("HTML",PR_TRUE,theStartPos,theCount);
if(0<=theSubIndex) {
mDocType=eHTML4Text;
mParseMode=eParseMode_strict;
theMajorVersion=3;
}
else {
theSubIndex=theBuffer.Find("HYPERTEXT MARKUP",PR_TRUE,theStartPos,theCount);
if(0<=theSubIndex) {
mDocType=eHTML3Text;
mParseMode=eParseMode_quirks;
theSubIndex+=20;
}
}
}
}
}
theStartPos=theSubIndex+5;
theCount=theEnd-theStartPos;
nsAutoString theNum;
//get the next substring from the buffer, which should be a number.
//now see what the version number is...
theStartPos=theBuffer.FindCharInSet("123456789",theStartPos);
if(0<=theStartPos) {
PRInt32 theTerminal=theBuffer.FindCharInSet(" />",theStartPos+1);
if(theTerminal) {
theBuffer.Mid(theNum,theStartPos,theTerminal-theStartPos);
}
else theBuffer.Mid(theNum,theStartPos,3);
theMajorVersion=theNum.ToInteger(&theErr);
}
//now see what the
theStartPos+=theNum.Length();
theCount=theEnd-theStartPos;
if((theBuffer.Find("TRANSITIONAL",PR_TRUE,theStartPos,theCount)>kNotFound)||
(theBuffer.Find("LOOSE",PR_TRUE,theStartPos,theCount)>kNotFound) ||
(theBuffer.Find("FRAMESET",PR_TRUE,theStartPos,theCount)>kNotFound) ||
(theBuffer.Find("LATIN1", PR_TRUE,theStartPos,theCount) >kNotFound) ||
(theBuffer.Find("SYMBOLS",PR_TRUE,theStartPos,theCount) >kNotFound) ||
(theBuffer.Find("SPECIAL",PR_TRUE,theStartPos,theCount) >kNotFound)) {
mParseMode=eParseMode_quirks;
}
//one last thing: look for a URI that specifies the strict.dtd
theStartPos+=6;
theCount=theEnd-theStartPos;
theSubIndex=theBuffer.Find("STRICT.DTD",PR_TRUE,theStartPos,theCount);
if(0<theSubIndex) {
//Since we found it, regardless of what's in the descr-text, kick into strict mode.
mParseMode=eParseMode_strict;
mDocType=eHTML4Text;
}
if (0==theErr){
switch(theMajorVersion) {
case 0: case 1: case 2: case 3:
if(mDocType!=eXHTMLText){
mParseMode=eParseMode_quirks; //be as backward compatible as possible
mDocType=eHTML3Text;
}
break;
default:
//XXX hack -- someday, the next line of code will be criticized
//for it's lack of vision...
if(theMajorVersion>20) {
mParseMode=eParseMode_noquirks;
}
break;
} //switch
}
} //if
else {
PRInt32 thePos=theBuffer.Find("HTML",PR_TRUE,1,50);
if(kNotFound!=thePos) {
mDocType=eHTML4Text;
PRInt32 theIDPos=theBuffer.Find("PublicID",thePos);
if(kNotFound==theIDPos)
theIDPos=theBuffer.Find("SystemID",thePos);
mParseMode=(kNotFound==theIDPos) ? eParseMode_quirks : eParseMode_strict;
}
}
}
else if(kNotFound<(theIndex=theBuffer.Find("?XML",PR_TRUE,0,128))) {
mParseMode=eParseMode_strict;
}
if(theModeStr) {
if(0==nsCRT::strcasecmp(theModeStr,"strict"))
mParseMode=eParseMode_strict;
}
else mParseMode = (eParseMode_unknown==mParseMode)? eParseMode_quirks : mParseMode;
return mParseMode;
}

View File

@ -58,8 +58,6 @@ public:
PRBool aCopyUnused=PR_FALSE);
CParserContext( const CParserContext& aContext);
eParseMode DetermineParseMode(const nsString& theBuffer);
~CParserContext();

View File

@ -992,7 +992,7 @@ nsHTMLContentSinkStream::AddDocTypeDecl(const nsIParserNode& aNode, PRInt32 aMod
DebugDump("<",aNode.GetText(),(mNodeStackPos)*2);
#endif
Write("<!");
// Write("<!");
return NS_OK;
}

View File

@ -540,7 +540,7 @@ nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,CToken*& aToken,nsScan
if(NS_SUCCEEDED(result)) {
//XXX - Find a better soution to record content
if(theTag==eHTMLTag_textarea && !mRecordTrailingContent) {
if((theTag==eHTMLTag_textarea || theTag==eHTMLTag_xmp) && !mRecordTrailingContent) {
mRecordTrailingContent=PR_TRUE;
}
@ -589,7 +589,8 @@ nsresult nsHTMLTokenizer::ConsumeEndTag(PRUnichar aChar,CToken*& aToken,nsScanne
nsresult result=NS_OK;
if(aToken) {
if(aToken->GetTypeID()==eHTMLTag_textarea && mRecordTrailingContent) {
eHTMLTags theTag=(eHTMLTags)aToken->GetTypeID();
if((theTag==eHTMLTag_textarea || theTag==eHTMLTag_xmp) && mRecordTrailingContent) {
mRecordTrailingContent=PR_FALSE;
}
result= aToken->Consume(aChar,aScanner,mParseMode); //tell new token to finish consuming text...

View File

@ -282,16 +282,17 @@ void CStartToken::DebugDumpSource(nsOutputStream& out) {
void CStartToken::GetSource(nsString& anOutputString){
anOutputString.AssignWithConversion("<");
/*
* mTextValue used to contain the name of the tag.
* But for the sake of performance we now rely on the tagID
* rather than tag name. This however, caused bug 15204
* to reincarnate. Since, mTextvalue is not being used here..
* I'm just going to comment it out.
*
* Watch out for Bug 15204
*/
// anOutputString+=mTextValue;
if(mTrailingContent.Length()>0)
anOutputString+=mTrailingContent;
anOutputString=mTrailingContent;
else {
if(mTextValue.Length()>0)
anOutputString=mTextValue;
else
anOutputString.AssignWithConversion(GetTagName(mTypeID));
anOutputString+='>';
}
}
/*
@ -304,16 +305,17 @@ void CStartToken::GetSource(nsString& anOutputString){
void CStartToken::AppendSource(nsString& anOutputString){
anOutputString.AppendWithConversion("<");
/*
* mTextValue used to contain the name of the tag.
* But for the sake of performance we now rely on the tagID
* rather than tag name. This however, caused bug 15204
* to reincarnate. Since, mTextvalue is not being used here..
* I'm just going to comment it out.
*
* Watch out for Bug 15204
*/
// anOutputString+=mTextValue;
if(mTrailingContent.Length()>0)
anOutputString+=mTrailingContent;
else {
if(mTextValue.Length()>0)
anOutputString+=mTextValue;
else
anOutputString.AppendWithConversion(GetTagName(mTypeID));
anOutputString+='>';
}
}
/*
@ -655,10 +657,16 @@ nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScann
else disaster=PR_TRUE;
if(disaster) {
if((!aScanner.IsIncremental()) && (theAltTermStrPos>kNotFound)) {
// If you're here it means..we hit the rock bottom and therefore switch to plan B.
theCurrOffset=theAltTermStrPos;
theLastIteration=PR_TRUE;
if(!aScanner.IsIncremental()) {
if(theAltTermStrPos>kNotFound) {
// If you're here it means..we hit the rock bottom and therefore switch to plan B.
theCurrOffset=theAltTermStrPos;
theLastIteration=PR_TRUE;
}
else {
aTerminalString.Cut(0,2); // Do this to fix Bug. 35456
done=PR_TRUE;
}
}
else
result=kEOF;

View File

@ -42,15 +42,24 @@
#include "CRtfDTD.h"
#include "CNavDTD.h"
#include "COtherDTD.h"
//#define rickgdebug
#include "prenv.h"
#include "nsParserCIID.h"
//#define rickgdebug
#define TEST_DOCTYPES 1
static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
static NS_DEFINE_IID(kClassIID, NS_PARSER_IID);
static NS_DEFINE_IID(kIParserIID, NS_IPARSER_IID);
static NS_DEFINE_IID(kIStreamListenerIID, NS_ISTREAMLISTENER_IID);
static NS_DEFINE_CID(kWellFormedDTDCID, NS_WELLFORMEDDTD_CID);
static NS_DEFINE_CID(kNavDTDCID, NS_CNAVDTD_CID);
static NS_DEFINE_CID(kCOtherDTDCID, NS_COTHER_DTD_CID);
static NS_DEFINE_CID(kViewSourceDTDCID, NS_VIEWSOURCE_DTD_CID);
static NS_DEFINE_CID(kRtfDTDCID, NS_CRTF_DTD_CID);
static const char* kNullURL = "Error: Null URL given";
static const char* kOnStartNotCalled = "Error: OnStartRequest() must be called before OnDataAvailable()";
static const char* kBadListenerInit = "Error: Parser's IStreamListener API was not setup correctly in constructor.";
@ -427,6 +436,194 @@ eParseMode nsParser::GetParseMode(void){
}
/*************************************************************************************************
First, let's define our modalities:
1. compatibility-mode: behave as much like nav4 as possible (unless it's too broken to bother)
2. standard-mode: do html as well as you can per spec, and throw out navigator quirks
3. strict-mode: adhere to the strict DTD specificiation to the highest degree possible
Assume the doctype is in the following form:
<!DOCTYPE [Top Level Element] [Availability] "[Registration]// [Owner-ID] // [Type] [desc-text] // [Language]" "URI|text-identifier">
[HTML] [PUBLIC|...] [+|-] [W3C|IETF|...] [DTD] "..." [EN]|...] "..."
Here are the new rules for DTD handling; comments welcome:
XHTML and XML documents are always strict-mode:
example: <!DOCTYPE \"-//W3C//DTD XHTML 1.0 Strict//EN\">
HTML strict dtd's enable strict-mode:
example: <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN">
example: <!DOCTYPE \"ISO/IEC 15445:1999//DTD HTML//EN\">
HTML 4.0 (or greater) transitional, frameset, (etc), without URI enables compatibility-mode:
example: <!DOCTYPE \"-//W3C//DTD HTML 4.01 Transitional//EN\">
HTML 4.0 (or greater) transitional, frameset, (etc), with a URI that points to the strict.dtd will become strict:
example: <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
"http://www.w3.org/TR/REC-html40/strict.dtd">
doctypes with systemID's or internal subset are handled in strict-mode:
example: <!DOCTYPE HTML PUBLIC PublicID SystemID>
example: <!DOCTYPE HTML SYSTEM SystemID>
example: <!DOCTYPE HTML (PUBLIC PublicID SystemID? | SYSTEM SystemID) [ Internal-SS ]>
All other doctypes (<4.0), and documents without a doctype are handled in compatibility-mode.
*****************************************************************************************************/
/**
* This is called when it's time to find out
* what mode the parser/DTD should run for this document.
* (Each parsercontext can have it's own mode).
*
* @update gess 02/17/00
* @return parsermode (define in nsIParser.h)
*/
static
void DetermineParseMode(nsString& aBuffer,eParseMode& aParseMode,eParserDocType aDocType) {
const char* theModeStr= PR_GetEnv("PARSE_MODE");
aParseMode = eParseMode_unknown;
PRInt32 theIndex=aBuffer.Find("DOCTYPE",PR_TRUE,0,10);
if(kNotFound<theIndex) {
//good, we found "DOCTYPE" -- now go find it's end delimiter '>'
PRInt32 theGTPos=aBuffer.FindChar(kGreaterThan,theIndex+1);
PRInt32 theEnd=(kNotFound==theGTPos) ? 512 : MinInt(512,theGTPos);
PRInt32 theSubIndex=aBuffer.Find("//DTD",PR_TRUE,theIndex+8,theEnd-(theIndex+8)); //skip to the type and desc-text...
PRInt32 theErr=0;
PRInt32 theMajorVersion=3;
//note that if we don't find '>', then we just scan the first 512 bytes.
if(0<=theSubIndex) {
PRInt32 theStartPos=theSubIndex+5;
PRInt32 theCount=theEnd-theStartPos;
if(kNotFound<theSubIndex) {
theSubIndex=aBuffer.Find("XHTML",PR_TRUE,theStartPos,theCount);
if(0<=theSubIndex) {
aDocType=eXHTMLText;
aParseMode=eParseMode_strict;
}
else {
theSubIndex=aBuffer.Find("ISO/IEC 15445:",PR_TRUE,theIndex+8,theEnd-(theIndex+8));
if(0<=theSubIndex) {
aDocType=eHTML4Text;
aParseMode=eParseMode_strict;
theMajorVersion=4;
theSubIndex+=15;
}
else {
theSubIndex=aBuffer.Find("HTML",PR_TRUE,theStartPos,theCount);
if(0<=theSubIndex) {
aDocType=eHTML4Text;
aParseMode=eParseMode_strict;
theMajorVersion=3;
}
else {
theSubIndex=aBuffer.Find("HYPERTEXT MARKUP",PR_TRUE,theStartPos,theCount);
if(0<=theSubIndex) {
aDocType=eHTML3Text;
aParseMode=eParseMode_quirks;
theSubIndex+=20;
}
}
}
}
}
theStartPos=theSubIndex+5;
theCount=theEnd-theStartPos;
nsAutoString theNum;
//get the next substring from the buffer, which should be a number.
//now see what the version number is...
theStartPos=aBuffer.FindCharInSet("123456789",theStartPos);
if(0<=theStartPos) {
PRInt32 theTerminal=aBuffer.FindCharInSet(" />",theStartPos+1);
if(theTerminal) {
aBuffer.Mid(theNum,theStartPos,theTerminal-theStartPos);
}
else aBuffer.Mid(theNum,theStartPos,3);
theMajorVersion=theNum.ToInteger(&theErr);
}
//now see what the
theStartPos+=theNum.Length();
theCount=theEnd-theStartPos;
if((aBuffer.Find("TRANSITIONAL",PR_TRUE,theStartPos,theCount)>kNotFound)||
(aBuffer.Find("LOOSE",PR_TRUE,theStartPos,theCount)>kNotFound) ||
(aBuffer.Find("FRAMESET",PR_TRUE,theStartPos,theCount)>kNotFound) ||
(aBuffer.Find("LATIN1", PR_TRUE,theStartPos,theCount) >kNotFound) ||
(aBuffer.Find("SYMBOLS",PR_TRUE,theStartPos,theCount) >kNotFound) ||
(aBuffer.Find("SPECIAL",PR_TRUE,theStartPos,theCount) >kNotFound)) {
aParseMode=eParseMode_quirks;
}
//one last thing: look for a URI that specifies the strict.dtd
theStartPos+=6;
theCount=theEnd-theStartPos;
theSubIndex=aBuffer.Find("STRICT.DTD",PR_TRUE,theStartPos,theCount);
if(0<theSubIndex) {
//Since we found it, regardless of what's in the descr-text, kick into strict mode.
aParseMode=eParseMode_strict;
aDocType=eHTML4Text;
}
if (0==theErr){
switch(theMajorVersion) {
case 0: case 1: case 2: case 3:
if(aDocType!=eXHTMLText){
aParseMode=eParseMode_quirks; //be as backward compatible as possible
aDocType=eHTML3Text;
}
break;
default:
//XXX hack -- someday, the next line of code will be criticized
//for it's lack of vision...
if(theMajorVersion>20) {
aParseMode=eParseMode_noquirks;
}
break;
} //switch
}
} //if
else {
PRInt32 thePos=aBuffer.Find("HTML",PR_TRUE,1,50);
if(kNotFound!=thePos) {
aDocType=eHTML4Text;
PRInt32 theIDPos=aBuffer.Find("PublicID",thePos);
if(kNotFound==theIDPos)
theIDPos=aBuffer.Find("SystemID",thePos);
aParseMode=(kNotFound==theIDPos) ? eParseMode_quirks : eParseMode_strict;
}
}
}
else if(kNotFound<(theIndex=aBuffer.Find("?XML",PR_TRUE,0,128))) {
aDocType=eXMLText;
aParseMode=eParseMode_strict;
}
if(theModeStr) {
if(0==nsCRT::strcasecmp(theModeStr,"strict"))
aParseMode=eParseMode_strict;
}
else {
if(eParseMode_unknown==aParseMode) {
aBuffer.InsertWithConversion("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\">\n",0);
aDocType=eHTML3Text;
aParseMode=eParseMode_quirks;
}
}
}
/**
*
@ -496,6 +693,131 @@ PRBool FindSuitableDTD( CParserContext& aParserContext,nsString& aBuffer) {
return PR_FALSE;
}
/**
* Call this method to determine a DTD for a DOCTYPE
*
* @update harishd 05/01/00
* @param aDTD -- Carries the deduced ( from DOCTYPE ) DTD.
* @param aDocTypeStr -- A doctype for which a DTD is to be selected.
* @param aMimeType -- A mimetype for which a DTD is to be selected.
* Note: aParseMode might be required.
* @param aCommand -- A command for which a DTD is to be selected.
* @param aParseMode -- Used with aMimeType to choose the correct DTD.
* @return NS_OK if succeeded else ERROR.
*/
nsresult nsParser::CreateCompatibleDTDForDocType(nsIDTD** aDTD, nsString* aDocTypeStr)
{
nsresult result=NS_OK;
const nsCID* theDTDClassID=0;
if(aDocTypeStr) {
eParseMode theParseMode=eParseMode_unknown;
eParserDocType theDocType=ePlainText;
DetermineParseMode(*aDocTypeStr,theParseMode,theDocType);
switch(theDocType) {
case eHTML4Text:
if(theParseMode==eParseMode_strict) {
theDTDClassID=&kCOtherDTDCID;
break;
}
case eHTML3Text:
theDTDClassID=&kNavDTDCID;
break;
case eXHTMLText:
case eXMLText:
theDTDClassID=&kWellFormedDTDCID;
break;
default:
theDTDClassID=&kNavDTDCID;
break;
}
}
result=(theDTDClassID)? nsComponentManager::CreateInstance(*theDTDClassID, nsnull, NS_GET_IID(nsIDTD),(void**)aDTD):NS_OK;
return result;
}
/**
* Call this method to determine a DTD for a given mime type.
*
* @update harishd 05/01/00
* @param aDTD -- Carries the deduced ( from DOCTYPE ) DTD.
* @param aMimeType -- A mimetype for which a DTD is to be selected.
* Note: aParseMode might be required.
* @param aParseMode -- Used with aMimeType to choose the correct DTD.
* @return NS_OK if succeeded else ERROR.
*/
nsresult CreateCompatibleDTDForMimeType(nsIDTD** aDTD, const nsString* aMimeType=nsnull,
eParseMode aParseMode=eParseMode_unknown)
{
nsresult result=NS_OK;
const nsCID* theDTDClassID=0;
if(aMimeType) {
NS_ASSERTION(aParseMode!=eParseMode_unknown,"DTD selection might require a parsemode");
if(aMimeType->EqualsWithConversion(kHTMLTextContentType)) {
if(aParseMode==eParseMode_strict) {
theDTDClassID=&kCOtherDTDCID;
}
else {
theDTDClassID=&kNavDTDCID;
}
}
else if(aMimeType->EqualsWithConversion(kPlainTextContentType)) {
theDTDClassID=&kNavDTDCID;
}
else if(aMimeType->EqualsWithConversion(kXMLTextContentType) ||
aMimeType->EqualsWithConversion(kXULTextContentType) ||
aMimeType->EqualsWithConversion(kRDFTextContentType)) {
theDTDClassID=&kWellFormedDTDCID;
}
else if(aMimeType->EqualsWithConversion(kXIFTextContentType)) {
theDTDClassID=&kRtfDTDCID;
}
else {
theDTDClassID=&kNavDTDCID;
}
result=(theDTDClassID)? nsComponentManager::CreateInstance(*theDTDClassID, nsnull, NS_GET_IID(nsIDTD),(void**)aDTD):NS_OK;
}
return result;
}
/**
* Call this method to determine a DTD for a given command
*
* @update harishd 05/01/00
* @param aDTD -- Carries the deduced ( from DOCTYPE ) DTD.
* @param aCommand -- A command for which a DTD is to be selected.
* @return NS_OK if succeeded else ERROR.
*/
nsresult CreateCompatibleDTDForCommand(nsIDTD** aDTD, eParserCommands aCommand=eViewNormal)
{
nsresult result=NS_OK;
const nsCID* theDTDClassID=0;
switch(aCommand) {
case eViewSource:
theDTDClassID=&kViewSourceDTDCID;
break;
case eViewNormal:
theDTDClassID=&kNavDTDCID;
break;
case eViewErrors:
default:
break;
}
result=(theDTDClassID)? nsComponentManager::CreateInstance(*theDTDClassID, nsnull, NS_GET_IID(nsIDTD),(void**)aDTD):NS_OK;
return result;
}
#ifdef TEST_DOCTYPES
static const char* doctypes[] = {
@ -608,7 +930,7 @@ static const char* doctypes[] = {
*/
nsresult nsParser::WillBuildModel(nsString& aFilename){
nsresult result=NS_OK;
nsresult result=NS_OK;
#if TEST_DOCTYPES
@ -617,9 +939,13 @@ nsresult nsParser::WillBuildModel(nsString& aFilename){
if(!tested) {
tested=PR_TRUE;
eParseMode theParseMode=eParseMode_unknown;
eParserDocType theDocumentType=ePlainText;
while(*theDocType) {
nsAutoString theType(*theDocType);
eParseMode result=mParserContext->DetermineParseMode(theType);
nsAutoString theType;
theType.AssignWithConversion(*theDocType);
DetermineParseMode(theType,theParseMode,theDocumentType);
theDocType++;
}
}
@ -631,7 +957,7 @@ nsresult nsParser::WillBuildModel(nsString& aFilename){
mMinorIteration=-1;
nsString& theBuffer=mParserContext->mScanner->GetBuffer();
mParserContext->DetermineParseMode(theBuffer);
DetermineParseMode(theBuffer,mParserContext->mParseMode,mParserContext->mDocType);
if(PR_TRUE==FindSuitableDTD(*mParserContext,theBuffer)) {
mParserContext->mDTD->WillBuildModel( *mParserContext,mSink);

View File

@ -360,7 +360,40 @@ private:
*/
PRBool DidTokenize(PRBool aIsFinalChunk = PR_FALSE);
/**
* Call this method to determine a DTD for a DOCTYPE
*
* @update harishd 05/01/00
* @param aDTD -- Carries the deduced ( from DOCTYPE ) DTD.
* @param aDocTypeStr -- A doctype for which a DTD is to be selected.
* @param aMimeType -- A mimetype for which a DTD is to be selected.
Note: aParseMode might be required.
* @param aCommand -- A command for which a DTD is to be selected.
* @param aParseMode -- Used with aMimeType to choose the correct DTD.
* @return NS_OK if succeeded else ERROR.
*/
static nsresult CreateCompatibleDTDForDocType(nsIDTD** aDTD, nsString* aDocTypeStr);
/**
* Call this method to determine a DTD for a given mime type.
*
* @update harishd 05/01/00
* @param aDTD -- Carries the deduced ( from DOCTYPE ) DTD.
* @param aMimeType -- A mimetype for which a DTD is to be selected.
Note: aParseMode might be required.
* @param aParseMode -- Used with aMimeType to choose the correct DTD.
* @return NS_OK if succeeded else ERROR.
*/
static nsresult CreateCompatibleDTDForMimeType(nsIDTD** aDTD, const nsString* aMimeType=nsnull,
eParseMode aParseMode=eParseMode_unknown);
/**
* Call this method to determine a DTD for a given command
*
* @update harishd 05/01/00
* @param aDTD -- Carries the deduced ( from DOCTYPE ) DTD.
* @param aCommand -- A command for which a DTD is to be selected.
* @return NS_OK if succeeded else ERROR.
*/
static nsresult CreateCompatibleDTDForCommand(nsIDTD** aDTD, eParserCommands aCommand=eViewNormal);
protected:
//*********************************************
// And now, some data members...

View File

@ -54,6 +54,19 @@
#define NS_XIF_DTD_CID \
{ 0xa6cf910e, 0x15b3, 0x11d2, { 0x93, 0x2e, 0x0, 0x80, 0x5f, 0x8a, 0xdd, 0x32 } }
// {CCF5BED0-1AF8-11d4-812B-0010A4E0C706}
#define NS_COTHER_DTD_CID \
{ 0xccf5bed0, 0x1af8, 0x11d4, { 0x81, 0x2b, 0x0, 0x10, 0xa4, 0xe0, 0xc7, 0x6 } };
// {8323FAD0-2102-11d4-8142-000064657374}
#define NS_VIEWSOURCE_DTD_CID \
{ 0x8323fad0, 0x2102, 0x11d4, { 0x81, 0x42, 0x0, 0x0, 0x64, 0x65, 0x73, 0x74 } };
// {8323FAD1-2102-11d4-8142-000064657374}
#define NS_CRTF_DTD_CID \
{ 0x8323fad1, 0x2102, 0x11d4, { 0x81, 0x42, 0x0, 0x0, 0x64, 0x65, 0x73, 0x74 } };
// {a6cf910f-15b3-11d2-932e-00805f8add32}
#define NS_HTMLCONTENTSINKSTREAM_CID \
{ 0xa6cf910f, 0x15b3, 0x11d2, { 0x93, 0x2e, 0x0, 0x80, 0x5f, 0x8a, 0xdd, 0x32 } }

View File

@ -31,6 +31,9 @@
#include "nsWellFormedDTD.h"
#include "CNavDTD.h"
#include "nsXIFDTD.h"
#include "COtherDTD.h"
#include "CRtfDTD.h"
#include "nsViewSourceHTML.h"
#include "nsHTMLContentSinkStream.h"
#include "nsHTMLToTXTSinkStream.h"
#include "nsHTMLEntities.h"
@ -98,6 +101,9 @@ static NS_DEFINE_CID(kLoggingSinkCID, NS_LOGGING_SINK_CID);
static NS_DEFINE_CID(kWellFormedDTDCID, NS_WELLFORMEDDTD_CID);
static NS_DEFINE_CID(kNavDTDCID, NS_CNAVDTD_CID);
static NS_DEFINE_CID(kXIFDTDCID, NS_XIF_DTD_CID);
static NS_DEFINE_CID(kCOtherDTDCID, NS_COTHER_DTD_CID);
static NS_DEFINE_CID(kViewSourceDTDCID, NS_VIEWSOURCE_DTD_CID);
static NS_DEFINE_CID(kRtfDTDCID, NS_CRTF_DTD_CID);
static NS_DEFINE_CID(kHTMLContentSinkStreamCID, NS_HTMLCONTENTSINKSTREAM_CID);
static NS_DEFINE_CID(kHTMLToTXTSinkStreamCID, NS_HTMLTOTXTSINKSTREAM_CID);
static NS_DEFINE_CID(kParserServiceCID, NS_PARSERSERVICE_CID);
@ -114,6 +120,9 @@ static Components gComponents[] = {
{ "Well formed DTD", &kWellFormedDTDCID },
{ "Navigator HTML DTD", &kNavDTDCID },
{ "XIF DTD", &kXIFDTDCID },
{ "OTHER DTD", &kCOtherDTDCID },
{ "ViewSource DTD", &kViewSourceDTDCID },
{ "Rtf DTD", &kRtfDTDCID },
{ "HTML Content Sink Stream", &kHTMLContentSinkStreamCID },
{ "HTML To Text Sink Stream", &kHTMLToTXTSinkStreamCID },
{ "ParserService", &kParserServiceCID },
@ -127,6 +136,9 @@ NS_GENERIC_FACTORY_CONSTRUCTOR(nsLoggingSink)
NS_GENERIC_FACTORY_CONSTRUCTOR(CWellFormedDTD)
NS_GENERIC_FACTORY_CONSTRUCTOR(CNavDTD)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsXIFDTD)
NS_GENERIC_FACTORY_CONSTRUCTOR(COtherDTD)
NS_GENERIC_FACTORY_CONSTRUCTOR(CViewSourceHTML)
NS_GENERIC_FACTORY_CONSTRUCTOR(CRtfDTD)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsHTMLContentSinkStream)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsHTMLToTXTSinkStream)
NS_GENERIC_FACTORY_CONSTRUCTOR(nsParserService)
@ -154,6 +166,9 @@ protected:
nsCOMPtr<nsIGenericFactory> mWellFormedDTDFactory;
nsCOMPtr<nsIGenericFactory> mNavHTMLDTDFactory;
nsCOMPtr<nsIGenericFactory> mXIFDTDFactory;
nsCOMPtr<nsIGenericFactory> mOtherHTMLDTDFactory;
nsCOMPtr<nsIGenericFactory> mViewSourceHTMLDTDFactory;
nsCOMPtr<nsIGenericFactory> mRtfHTMLDTDFactory;
nsCOMPtr<nsIGenericFactory> mHTMLContentSinkStreamFactory;
nsCOMPtr<nsIGenericFactory> mHTMLToTXTSinkStreamFactory;
nsCOMPtr<nsIGenericFactory> mParserServiceFactory;
@ -260,6 +275,27 @@ nsParserModule::GetClassObject(nsIComponentManager *aCompMgr,
&nsXIFDTDConstructor);
}
fact = mXIFDTDFactory;
}
else if (aClass.Equals(kCOtherDTDCID)) {
if (!mOtherHTMLDTDFactory) {
rv = NS_NewGenericFactory(getter_AddRefs(mOtherHTMLDTDFactory),
&COtherDTDConstructor);
}
fact = mOtherHTMLDTDFactory;
}
else if (aClass.Equals(kViewSourceDTDCID)) {
if (!mViewSourceHTMLDTDFactory) {
rv = NS_NewGenericFactory(getter_AddRefs(mViewSourceHTMLDTDFactory),
&CViewSourceHTMLConstructor);
}
fact = mViewSourceHTMLDTDFactory;
}
else if (aClass.Equals(kRtfDTDCID)) {
if (!mRtfHTMLDTDFactory) {
rv = NS_NewGenericFactory(getter_AddRefs(mRtfHTMLDTDFactory),
&CRtfDTDConstructor);
}
fact = mRtfHTMLDTDFactory;
}
else if (aClass.Equals(kHTMLContentSinkStreamCID)) {
if (!mHTMLContentSinkStreamFactory) {