add SetDocumentCharset method to nsIParser, move nsCharsetSource from nsScanner.h to nsIParser.h

This commit is contained in:
ftang%netscape.com 1999-04-26 17:49:45 +00:00
parent 50e071071c
commit 55c8150ac3
8 changed files with 158 additions and 150 deletions

View File

@ -66,6 +66,16 @@ enum eCRCQuality {
};
typedef enum {
kCharsetUninitialized = 0,
kCharsetFromUserDefault ,
kCharsetFromDocTypeDefault,
kCharsetFromParentFrame,
kCharsetFromAutoDetection,
kCharsetFromMetaTag,
kCharsetFromHTTPHeader
} nsCharsetSource;
enum eStreamState {eNone,eOnStart,eOnDataAvail,eOnStop};
/**
@ -115,6 +125,18 @@ class nsIParser : public nsISupports {
*/
virtual void SetCommand(const char* aCommand)=0;
/**
* Call this method once you've created a parser, and want to instruct it
* about what charset to load
*
* @update ftang 4/23/99
* @param aCharset- the charest of a document
* @param aCharsetSource- the soure of the chares
* @return nada
*/
virtual void SetDocumentCharset(nsString& aCharset, nsCharsetSource aSource)=0;
/******************************************************************************************
* Parse methods always begin with an input source, and perform conversions

View File

@ -142,7 +142,7 @@ CSharedParserObjects& GetSharedObjects() {
* @param
* @return
*/
nsParser::nsParser(nsITokenObserver* anObserver) : mCommand(""), mUnusedInput("") {
nsParser::nsParser(nsITokenObserver* anObserver) : mCommand(""), mUnusedInput("") , mCharset("ISO-8859-1") {
NS_INIT_REFCNT();
mParserFilter = 0;
mObserver = 0;
@ -151,6 +151,7 @@ nsParser::nsParser(nsITokenObserver* anObserver) : mCommand(""), mUnusedInput(""
mTokenObserver=anObserver;
mStreamStatus=0;
mDTDVerification=PR_FALSE;
mCharsetSource=kCharsetUninitialized;
}
@ -245,6 +246,22 @@ void nsParser::SetCommand(const char* aCommand){
mCommand=aCommand;
}
/**
* Call this method once you've created a parser, and want to instruct it
* about what charset to load
*
* @update ftang 4/23/99
* @param aCharset- the charest of a document
* @param aCharsetSource- the soure of the chares
* @return nada
*/
void nsParser::SetDocumentCharset(nsString& aCharset, nsCharsetSource aCharsetSource){
mCharset = aCharset;
mCharsetSource = aCharsetSource;
}
/**
* This method gets called in order to set the content
* sink for this parser to dump nodes to.
@ -555,25 +572,6 @@ nsParser::IsParserEnabled()
nsresult nsParser::Parse(nsIURL* aURL,nsIStreamObserver* aListener,PRBool aVerifyEnabled) {
NS_PRECONDITION(0!=aURL,kNullURL);
nsAutoString charset;
nsCharsetSource charsetSource;
// XXXX get HTTP charset here
// charset =
// charsetSource = kCharsetFromHTTPHeader;
// XXXX get User Prefernce charset here
// charset =
// charsetSource = kCharsetFromUserDefault;
// XXXX get Doc Type Default (e.g. UTF-8 for XML)
// XXX We should really put if doc == html for the following line
charset = "ISO-8859-1";
charsetSource = kCharsetFromDocTypeDefault;
nsresult result=kBadURL;
mDTDVerification=aVerifyEnabled;
if(aURL) {
@ -588,21 +586,26 @@ nsresult nsParser::Parse(nsIURL* aURL,nsIStreamObserver* aListener,PRBool aVerif
theName.Right(last4, 4);
if(last4.EqualsIgnoreCase(".xul") || last4.EqualsIgnoreCase(".xml") || last4.EqualsIgnoreCase(".rdf") )
{
charset = "UTF-8";
if(kCharsetFromDocTypeDefault >= mCharsetSource) {
mCharset = "UTF-8";
mCharsetSource = kCharsetFromDocTypeDefault;
}
}
// XXX begin of meta tag charset hack
if(theName.Equals(nsParser::gHackMetaCharsetURL) && (! nsParser::gHackMetaCharset.Equals("")))
{
charset = nsParser::gHackMetaCharset;
charsetSource = kCharsetFromMetaTag;
if(kCharsetFromMetaTag > mCharsetSource) {
mCharset = nsParser::gHackMetaCharset;
mCharsetSource = kCharsetFromMetaTag;
}
}
nsParser::gHackMetaCharsetURL = theName;
nsParser::gHackMetaCharset = "";
// XXX end of meta tag charset hack
CParserContext* pc=new CParserContext(new nsScanner(theName,PR_FALSE, charset, charsetSource),aURL,aListener);
CParserContext* pc=new CParserContext(new nsScanner(theName,PR_FALSE, mCharset, mCharsetSource),aURL,aListener);
if(pc) {
pc->mMultipart=PR_TRUE;
pc->mContextType=CParserContext::eCTURL;
@ -625,38 +628,24 @@ nsresult nsParser::Parse(nsIInputStream& aStream,PRBool aVerifyEnabled){
mDTDVerification=aVerifyEnabled;
nsresult result=NS_ERROR_OUT_OF_MEMORY;
nsAutoString charset;
nsCharsetSource charsetSource;
// XXXX get HTTP charset here
// charset =
// charsetSource = kCharsetFromHTTPHeader;
// XXXX get User Prefernce charset here
// charset =
// charsetSource = kCharsetFromUserDefault;
// XXXX get Doc Type Default (e.g. UTF-8 for XML)
// XXX We should really put if doc == html for the following line
charset = "ISO-8859-1";
charsetSource = kCharsetFromDocTypeDefault;
//ok, time to create our tokenizer and begin the process
nsAutoString theUnknownFilename("unknown");
// XXX begin of meta tag charset hack
if(theUnknownFilename.Equals(nsParser::gHackMetaCharsetURL) && (! nsParser::gHackMetaCharset.Equals("")))
{
charset = nsParser::gHackMetaCharset;
charsetSource = kCharsetFromMetaTag;
if(kCharsetFromMetaTag > mCharsetSource) {
mCharset = nsParser::gHackMetaCharset;
mCharsetSource = kCharsetFromMetaTag;
}
}
nsParser::gHackMetaCharsetURL = theUnknownFilename;
nsParser::gHackMetaCharset = "";
// XXX end of meta tag charset hack
nsInputStream input(&aStream);
CParserContext* pc=new CParserContext(new nsScanner(theUnknownFilename, input, charset, charsetSource,PR_FALSE),&aStream,0);
CParserContext* pc=new CParserContext(new nsScanner(theUnknownFilename, input, mCharset, mCharsetSource,PR_FALSE),&aStream,0);
if(pc) {
PushContext(*pc);
pc->mSourceType=kHTMLTextContentType;
@ -691,33 +680,22 @@ nsresult nsParser::Parse(nsString& aSourceBuffer,void* aKey,const nsString& aCon
}
#endif
nsAutoString charset;
nsCharsetSource charsetSource;
// XXXX get HTTP charset here
// charset =
// charsetSource = kCharsetFromHTTPHeader;
// XXXX get User Prefernce charset here
// charset =
// charsetSource = kCharsetFromUserDefault;
// XXX temp hack to make parser use UTF-8 as default charset for XML, RDF, XUL
// XXX This should be removed once we have the SetDefaultCharset in the nsIParser interface
if(aContentType.EqualsIgnoreCase("text/xul") || aContentType.EqualsIgnoreCase("text/xml") || aContentType.EqualsIgnoreCase("text/rdf") )
{
charset = "UTF-8";
} else {
charset = "ISO-8859-1";
if(kCharsetFromDocTypeDefault >= mCharsetSource) {
mCharset = "UTF-8";
mCharsetSource = kCharsetFromDocTypeDefault;
}
}
charsetSource = kCharsetFromDocTypeDefault;
// XXX begin of meta tag charset hack
nsAutoString theFakeURL("fromString");
if(theFakeURL.Equals(nsParser::gHackMetaCharsetURL) && (! nsParser::gHackMetaCharset.Equals("")))
{
charset = nsParser::gHackMetaCharset;
charsetSource = kCharsetFromMetaTag;
if(kCharsetFromMetaTag > mCharsetSource) {
mCharset = nsParser::gHackMetaCharset;
mCharsetSource = kCharsetFromMetaTag;
}
}
nsParser::gHackMetaCharsetURL = theFakeURL;
nsParser::gHackMetaCharset = "";
@ -737,7 +715,7 @@ nsresult nsParser::Parse(nsString& aSourceBuffer,void* aKey,const nsString& aCon
if((!mParserContext) || (mParserContext->mKey!=aKey)) {
//only make a new context if we dont have one, OR if we do, but has a different context key...
pc=new CParserContext(new nsScanner(mUnusedInput, charset, charsetSource),aKey, 0);
pc=new CParserContext(new nsScanner(mUnusedInput, mCharset, mCharsetSource),aKey, 0);
if(pc) {
PushContext(*pc);
pc->mStreamListenerState=eOnStart;

View File

@ -126,6 +126,18 @@ friend class CTokenHandler;
*/
virtual void SetCommand(const char* aCommand);
/**
* Call this method once you've created a parser, and want to instruct it
* about what charset to load
*
* @update ftang 4/23/99
* @param aCharset- the charest of a document
* @param aCharsetSource- the soure of the chares
* @return nada
*/
virtual void SetDocumentCharset(nsString& aCharset, nsCharsetSource aSource);
virtual nsIParserFilter* SetParserFilter(nsIParserFilter* aFilter);
virtual void RegisterDTD(nsIDTD* aDTD);
@ -328,6 +340,8 @@ protected:
PRInt32 mStreamStatus;
nsITokenObserver* mTokenObserver;
nsString mUnusedInput;
nsString mCharset;
nsCharsetSource mCharsetSource;
};

View File

@ -39,16 +39,6 @@
#include "nsFileStream.h"
typedef enum {
kCharsetUninitialized = 0,
kCharsetFromUserDefault ,
kCharsetFromDocTypeDefault,
kCharsetFromParentFrame,
kCharsetFromAutoDetection,
kCharsetFromMetaTag,
kCharsetFromHTTPHeader
} nsCharsetSource;
class nsScanner {
public:

View File

@ -66,6 +66,16 @@ enum eCRCQuality {
};
typedef enum {
kCharsetUninitialized = 0,
kCharsetFromUserDefault ,
kCharsetFromDocTypeDefault,
kCharsetFromParentFrame,
kCharsetFromAutoDetection,
kCharsetFromMetaTag,
kCharsetFromHTTPHeader
} nsCharsetSource;
enum eStreamState {eNone,eOnStart,eOnDataAvail,eOnStop};
/**
@ -115,6 +125,18 @@ class nsIParser : public nsISupports {
*/
virtual void SetCommand(const char* aCommand)=0;
/**
* Call this method once you've created a parser, and want to instruct it
* about what charset to load
*
* @update ftang 4/23/99
* @param aCharset- the charest of a document
* @param aCharsetSource- the soure of the chares
* @return nada
*/
virtual void SetDocumentCharset(nsString& aCharset, nsCharsetSource aSource)=0;
/******************************************************************************************
* Parse methods always begin with an input source, and perform conversions

View File

@ -142,7 +142,7 @@ CSharedParserObjects& GetSharedObjects() {
* @param
* @return
*/
nsParser::nsParser(nsITokenObserver* anObserver) : mCommand(""), mUnusedInput("") {
nsParser::nsParser(nsITokenObserver* anObserver) : mCommand(""), mUnusedInput("") , mCharset("ISO-8859-1") {
NS_INIT_REFCNT();
mParserFilter = 0;
mObserver = 0;
@ -151,6 +151,7 @@ nsParser::nsParser(nsITokenObserver* anObserver) : mCommand(""), mUnusedInput(""
mTokenObserver=anObserver;
mStreamStatus=0;
mDTDVerification=PR_FALSE;
mCharsetSource=kCharsetUninitialized;
}
@ -245,6 +246,22 @@ void nsParser::SetCommand(const char* aCommand){
mCommand=aCommand;
}
/**
* Call this method once you've created a parser, and want to instruct it
* about what charset to load
*
* @update ftang 4/23/99
* @param aCharset- the charest of a document
* @param aCharsetSource- the soure of the chares
* @return nada
*/
void nsParser::SetDocumentCharset(nsString& aCharset, nsCharsetSource aCharsetSource){
mCharset = aCharset;
mCharsetSource = aCharsetSource;
}
/**
* This method gets called in order to set the content
* sink for this parser to dump nodes to.
@ -555,25 +572,6 @@ nsParser::IsParserEnabled()
nsresult nsParser::Parse(nsIURL* aURL,nsIStreamObserver* aListener,PRBool aVerifyEnabled) {
NS_PRECONDITION(0!=aURL,kNullURL);
nsAutoString charset;
nsCharsetSource charsetSource;
// XXXX get HTTP charset here
// charset =
// charsetSource = kCharsetFromHTTPHeader;
// XXXX get User Prefernce charset here
// charset =
// charsetSource = kCharsetFromUserDefault;
// XXXX get Doc Type Default (e.g. UTF-8 for XML)
// XXX We should really put if doc == html for the following line
charset = "ISO-8859-1";
charsetSource = kCharsetFromDocTypeDefault;
nsresult result=kBadURL;
mDTDVerification=aVerifyEnabled;
if(aURL) {
@ -588,21 +586,26 @@ nsresult nsParser::Parse(nsIURL* aURL,nsIStreamObserver* aListener,PRBool aVerif
theName.Right(last4, 4);
if(last4.EqualsIgnoreCase(".xul") || last4.EqualsIgnoreCase(".xml") || last4.EqualsIgnoreCase(".rdf") )
{
charset = "UTF-8";
if(kCharsetFromDocTypeDefault >= mCharsetSource) {
mCharset = "UTF-8";
mCharsetSource = kCharsetFromDocTypeDefault;
}
}
// XXX begin of meta tag charset hack
if(theName.Equals(nsParser::gHackMetaCharsetURL) && (! nsParser::gHackMetaCharset.Equals("")))
{
charset = nsParser::gHackMetaCharset;
charsetSource = kCharsetFromMetaTag;
if(kCharsetFromMetaTag > mCharsetSource) {
mCharset = nsParser::gHackMetaCharset;
mCharsetSource = kCharsetFromMetaTag;
}
}
nsParser::gHackMetaCharsetURL = theName;
nsParser::gHackMetaCharset = "";
// XXX end of meta tag charset hack
CParserContext* pc=new CParserContext(new nsScanner(theName,PR_FALSE, charset, charsetSource),aURL,aListener);
CParserContext* pc=new CParserContext(new nsScanner(theName,PR_FALSE, mCharset, mCharsetSource),aURL,aListener);
if(pc) {
pc->mMultipart=PR_TRUE;
pc->mContextType=CParserContext::eCTURL;
@ -625,38 +628,24 @@ nsresult nsParser::Parse(nsIInputStream& aStream,PRBool aVerifyEnabled){
mDTDVerification=aVerifyEnabled;
nsresult result=NS_ERROR_OUT_OF_MEMORY;
nsAutoString charset;
nsCharsetSource charsetSource;
// XXXX get HTTP charset here
// charset =
// charsetSource = kCharsetFromHTTPHeader;
// XXXX get User Prefernce charset here
// charset =
// charsetSource = kCharsetFromUserDefault;
// XXXX get Doc Type Default (e.g. UTF-8 for XML)
// XXX We should really put if doc == html for the following line
charset = "ISO-8859-1";
charsetSource = kCharsetFromDocTypeDefault;
//ok, time to create our tokenizer and begin the process
nsAutoString theUnknownFilename("unknown");
// XXX begin of meta tag charset hack
if(theUnknownFilename.Equals(nsParser::gHackMetaCharsetURL) && (! nsParser::gHackMetaCharset.Equals("")))
{
charset = nsParser::gHackMetaCharset;
charsetSource = kCharsetFromMetaTag;
if(kCharsetFromMetaTag > mCharsetSource) {
mCharset = nsParser::gHackMetaCharset;
mCharsetSource = kCharsetFromMetaTag;
}
}
nsParser::gHackMetaCharsetURL = theUnknownFilename;
nsParser::gHackMetaCharset = "";
// XXX end of meta tag charset hack
nsInputStream input(&aStream);
CParserContext* pc=new CParserContext(new nsScanner(theUnknownFilename, input, charset, charsetSource,PR_FALSE),&aStream,0);
CParserContext* pc=new CParserContext(new nsScanner(theUnknownFilename, input, mCharset, mCharsetSource,PR_FALSE),&aStream,0);
if(pc) {
PushContext(*pc);
pc->mSourceType=kHTMLTextContentType;
@ -691,33 +680,22 @@ nsresult nsParser::Parse(nsString& aSourceBuffer,void* aKey,const nsString& aCon
}
#endif
nsAutoString charset;
nsCharsetSource charsetSource;
// XXXX get HTTP charset here
// charset =
// charsetSource = kCharsetFromHTTPHeader;
// XXXX get User Prefernce charset here
// charset =
// charsetSource = kCharsetFromUserDefault;
// XXX temp hack to make parser use UTF-8 as default charset for XML, RDF, XUL
// XXX This should be removed once we have the SetDefaultCharset in the nsIParser interface
if(aContentType.EqualsIgnoreCase("text/xul") || aContentType.EqualsIgnoreCase("text/xml") || aContentType.EqualsIgnoreCase("text/rdf") )
{
charset = "UTF-8";
} else {
charset = "ISO-8859-1";
if(kCharsetFromDocTypeDefault >= mCharsetSource) {
mCharset = "UTF-8";
mCharsetSource = kCharsetFromDocTypeDefault;
}
}
charsetSource = kCharsetFromDocTypeDefault;
// XXX begin of meta tag charset hack
nsAutoString theFakeURL("fromString");
if(theFakeURL.Equals(nsParser::gHackMetaCharsetURL) && (! nsParser::gHackMetaCharset.Equals("")))
{
charset = nsParser::gHackMetaCharset;
charsetSource = kCharsetFromMetaTag;
if(kCharsetFromMetaTag > mCharsetSource) {
mCharset = nsParser::gHackMetaCharset;
mCharsetSource = kCharsetFromMetaTag;
}
}
nsParser::gHackMetaCharsetURL = theFakeURL;
nsParser::gHackMetaCharset = "";
@ -737,7 +715,7 @@ nsresult nsParser::Parse(nsString& aSourceBuffer,void* aKey,const nsString& aCon
if((!mParserContext) || (mParserContext->mKey!=aKey)) {
//only make a new context if we dont have one, OR if we do, but has a different context key...
pc=new CParserContext(new nsScanner(mUnusedInput, charset, charsetSource),aKey, 0);
pc=new CParserContext(new nsScanner(mUnusedInput, mCharset, mCharsetSource),aKey, 0);
if(pc) {
PushContext(*pc);
pc->mStreamListenerState=eOnStart;

View File

@ -126,6 +126,18 @@ friend class CTokenHandler;
*/
virtual void SetCommand(const char* aCommand);
/**
* Call this method once you've created a parser, and want to instruct it
* about what charset to load
*
* @update ftang 4/23/99
* @param aCharset- the charest of a document
* @param aCharsetSource- the soure of the chares
* @return nada
*/
virtual void SetDocumentCharset(nsString& aCharset, nsCharsetSource aSource);
virtual nsIParserFilter* SetParserFilter(nsIParserFilter* aFilter);
virtual void RegisterDTD(nsIDTD* aDTD);
@ -328,6 +340,8 @@ protected:
PRInt32 mStreamStatus;
nsITokenObserver* mTokenObserver;
nsString mUnusedInput;
nsString mCharset;
nsCharsetSource mCharsetSource;
};

View File

@ -39,16 +39,6 @@
#include "nsFileStream.h"
typedef enum {
kCharsetUninitialized = 0,
kCharsetFromUserDefault ,
kCharsetFromDocTypeDefault,
kCharsetFromParentFrame,
kCharsetFromAutoDetection,
kCharsetFromMetaTag,
kCharsetFromHTTPHeader
} nsCharsetSource;
class nsScanner {
public: