diff --git a/htmlparser/src/nsIParser.h b/htmlparser/src/nsIParser.h
index 5456b01afb5c..d06c29427cb2 100644
--- a/htmlparser/src/nsIParser.h
+++ b/htmlparser/src/nsIParser.h
@@ -66,6 +66,16 @@ enum eCRCQuality {
};
+typedef enum {
+ kCharsetUninitialized = 0,
+ kCharsetFromUserDefault ,
+ kCharsetFromDocTypeDefault,
+ kCharsetFromParentFrame,
+ kCharsetFromAutoDetection,
+ kCharsetFromMetaTag,
+ kCharsetFromHTTPHeader
+} nsCharsetSource;
+
enum eStreamState {eNone,eOnStart,eOnDataAvail,eOnStop};
/**
@@ -115,6 +125,18 @@ class nsIParser : public nsISupports {
*/
virtual void SetCommand(const char* aCommand)=0;
+ /**
+ * Call this method once you've created a parser, and want to instruct it
+ * about what charset to load
+ *
+ * @update ftang 4/23/99
+ * @param aCharset- the charest of a document
+ * @param aCharsetSource- the soure of the chares
+ * @return nada
+ */
+ virtual void SetDocumentCharset(nsString& aCharset, nsCharsetSource aSource)=0;
+
+
/******************************************************************************************
* Parse methods always begin with an input source, and perform conversions
diff --git a/htmlparser/src/nsParser.cpp b/htmlparser/src/nsParser.cpp
index 7e841c85d831..0c1a4c74ffff 100644
--- a/htmlparser/src/nsParser.cpp
+++ b/htmlparser/src/nsParser.cpp
@@ -142,7 +142,7 @@ CSharedParserObjects& GetSharedObjects() {
* @param
* @return
*/
-nsParser::nsParser(nsITokenObserver* anObserver) : mCommand(""), mUnusedInput("") {
+nsParser::nsParser(nsITokenObserver* anObserver) : mCommand(""), mUnusedInput("") , mCharset("ISO-8859-1") {
NS_INIT_REFCNT();
mParserFilter = 0;
mObserver = 0;
@@ -151,6 +151,7 @@ nsParser::nsParser(nsITokenObserver* anObserver) : mCommand(""), mUnusedInput(""
mTokenObserver=anObserver;
mStreamStatus=0;
mDTDVerification=PR_FALSE;
+ mCharsetSource=kCharsetUninitialized;
}
@@ -245,6 +246,22 @@ void nsParser::SetCommand(const char* aCommand){
mCommand=aCommand;
}
+
+
+/**
+ * Call this method once you've created a parser, and want to instruct it
+ * about what charset to load
+ *
+ * @update ftang 4/23/99
+ * @param aCharset- the charest of a document
+ * @param aCharsetSource- the soure of the chares
+ * @return nada
+ */
+void nsParser::SetDocumentCharset(nsString& aCharset, nsCharsetSource aCharsetSource){
+ mCharset = aCharset;
+ mCharsetSource = aCharsetSource;
+}
+
/**
* This method gets called in order to set the content
* sink for this parser to dump nodes to.
@@ -555,25 +572,6 @@ nsParser::IsParserEnabled()
nsresult nsParser::Parse(nsIURL* aURL,nsIStreamObserver* aListener,PRBool aVerifyEnabled) {
NS_PRECONDITION(0!=aURL,kNullURL);
- nsAutoString charset;
- nsCharsetSource charsetSource;
-
- // XXXX get HTTP charset here
- // charset =
- // charsetSource = kCharsetFromHTTPHeader;
-
- // XXXX get User Prefernce charset here
- // charset =
- // charsetSource = kCharsetFromUserDefault;
-
- // XXXX get Doc Type Default (e.g. UTF-8 for XML)
-
- // XXX We should really put if doc == html for the following line
- charset = "ISO-8859-1";
- charsetSource = kCharsetFromDocTypeDefault;
-
-
-
nsresult result=kBadURL;
mDTDVerification=aVerifyEnabled;
if(aURL) {
@@ -588,21 +586,26 @@ nsresult nsParser::Parse(nsIURL* aURL,nsIStreamObserver* aListener,PRBool aVerif
theName.Right(last4, 4);
if(last4.EqualsIgnoreCase(".xul") || last4.EqualsIgnoreCase(".xml") || last4.EqualsIgnoreCase(".rdf") )
{
- charset = "UTF-8";
+ if(kCharsetFromDocTypeDefault >= mCharsetSource) {
+ mCharset = "UTF-8";
+ mCharsetSource = kCharsetFromDocTypeDefault;
+ }
}
// XXX begin of meta tag charset hack
if(theName.Equals(nsParser::gHackMetaCharsetURL) && (! nsParser::gHackMetaCharset.Equals("")))
{
- charset = nsParser::gHackMetaCharset;
- charsetSource = kCharsetFromMetaTag;
+ if(kCharsetFromMetaTag > mCharsetSource) {
+ mCharset = nsParser::gHackMetaCharset;
+ mCharsetSource = kCharsetFromMetaTag;
+ }
}
nsParser::gHackMetaCharsetURL = theName;
nsParser::gHackMetaCharset = "";
// XXX end of meta tag charset hack
- CParserContext* pc=new CParserContext(new nsScanner(theName,PR_FALSE, charset, charsetSource),aURL,aListener);
+ CParserContext* pc=new CParserContext(new nsScanner(theName,PR_FALSE, mCharset, mCharsetSource),aURL,aListener);
if(pc) {
pc->mMultipart=PR_TRUE;
pc->mContextType=CParserContext::eCTURL;
@@ -625,38 +628,24 @@ nsresult nsParser::Parse(nsIInputStream& aStream,PRBool aVerifyEnabled){
mDTDVerification=aVerifyEnabled;
nsresult result=NS_ERROR_OUT_OF_MEMORY;
- nsAutoString charset;
- nsCharsetSource charsetSource;
-
- // XXXX get HTTP charset here
- // charset =
- // charsetSource = kCharsetFromHTTPHeader;
-
- // XXXX get User Prefernce charset here
- // charset =
- // charsetSource = kCharsetFromUserDefault;
-
- // XXXX get Doc Type Default (e.g. UTF-8 for XML)
-
- // XXX We should really put if doc == html for the following line
- charset = "ISO-8859-1";
- charsetSource = kCharsetFromDocTypeDefault;
-
//ok, time to create our tokenizer and begin the process
nsAutoString theUnknownFilename("unknown");
// XXX begin of meta tag charset hack
if(theUnknownFilename.Equals(nsParser::gHackMetaCharsetURL) && (! nsParser::gHackMetaCharset.Equals("")))
{
- charset = nsParser::gHackMetaCharset;
- charsetSource = kCharsetFromMetaTag;
+
+ if(kCharsetFromMetaTag > mCharsetSource) {
+ mCharset = nsParser::gHackMetaCharset;
+ mCharsetSource = kCharsetFromMetaTag;
+ }
}
nsParser::gHackMetaCharsetURL = theUnknownFilename;
nsParser::gHackMetaCharset = "";
// XXX end of meta tag charset hack
nsInputStream input(&aStream);
- CParserContext* pc=new CParserContext(new nsScanner(theUnknownFilename, input, charset, charsetSource,PR_FALSE),&aStream,0);
+ CParserContext* pc=new CParserContext(new nsScanner(theUnknownFilename, input, mCharset, mCharsetSource,PR_FALSE),&aStream,0);
if(pc) {
PushContext(*pc);
pc->mSourceType=kHTMLTextContentType;
@@ -691,33 +680,22 @@ nsresult nsParser::Parse(nsString& aSourceBuffer,void* aKey,const nsString& aCon
}
#endif
- nsAutoString charset;
- nsCharsetSource charsetSource;
-
- // XXXX get HTTP charset here
- // charset =
- // charsetSource = kCharsetFromHTTPHeader;
-
- // XXXX get User Prefernce charset here
- // charset =
- // charsetSource = kCharsetFromUserDefault;
-
- // XXX temp hack to make parser use UTF-8 as default charset for XML, RDF, XUL
- // XXX This should be removed once we have the SetDefaultCharset in the nsIParser interface
if(aContentType.EqualsIgnoreCase("text/xul") || aContentType.EqualsIgnoreCase("text/xml") || aContentType.EqualsIgnoreCase("text/rdf") )
{
- charset = "UTF-8";
- } else {
- charset = "ISO-8859-1";
- }
- charsetSource = kCharsetFromDocTypeDefault;
+ if(kCharsetFromDocTypeDefault >= mCharsetSource) {
+ mCharset = "UTF-8";
+ mCharsetSource = kCharsetFromDocTypeDefault;
+ }
+ }
// XXX begin of meta tag charset hack
nsAutoString theFakeURL("fromString");
if(theFakeURL.Equals(nsParser::gHackMetaCharsetURL) && (! nsParser::gHackMetaCharset.Equals("")))
{
- charset = nsParser::gHackMetaCharset;
- charsetSource = kCharsetFromMetaTag;
+ if(kCharsetFromMetaTag > mCharsetSource) {
+ mCharset = nsParser::gHackMetaCharset;
+ mCharsetSource = kCharsetFromMetaTag;
+ }
}
nsParser::gHackMetaCharsetURL = theFakeURL;
nsParser::gHackMetaCharset = "";
@@ -737,7 +715,7 @@ nsresult nsParser::Parse(nsString& aSourceBuffer,void* aKey,const nsString& aCon
if((!mParserContext) || (mParserContext->mKey!=aKey)) {
//only make a new context if we dont have one, OR if we do, but has a different context key...
- pc=new CParserContext(new nsScanner(mUnusedInput, charset, charsetSource),aKey, 0);
+ pc=new CParserContext(new nsScanner(mUnusedInput, mCharset, mCharsetSource),aKey, 0);
if(pc) {
PushContext(*pc);
pc->mStreamListenerState=eOnStart;
diff --git a/htmlparser/src/nsParser.h b/htmlparser/src/nsParser.h
index f962994c3b72..9a203f087f28 100644
--- a/htmlparser/src/nsParser.h
+++ b/htmlparser/src/nsParser.h
@@ -126,6 +126,18 @@ friend class CTokenHandler;
*/
virtual void SetCommand(const char* aCommand);
+ /**
+ * Call this method once you've created a parser, and want to instruct it
+ * about what charset to load
+ *
+ * @update ftang 4/23/99
+ * @param aCharset- the charest of a document
+ * @param aCharsetSource- the soure of the chares
+ * @return nada
+ */
+ virtual void SetDocumentCharset(nsString& aCharset, nsCharsetSource aSource);
+
+
virtual nsIParserFilter* SetParserFilter(nsIParserFilter* aFilter);
virtual void RegisterDTD(nsIDTD* aDTD);
@@ -328,6 +340,8 @@ protected:
PRInt32 mStreamStatus;
nsITokenObserver* mTokenObserver;
nsString mUnusedInput;
+ nsString mCharset;
+ nsCharsetSource mCharsetSource;
};
diff --git a/htmlparser/src/nsScanner.h b/htmlparser/src/nsScanner.h
index 993bd33393fa..025db1883d2a 100644
--- a/htmlparser/src/nsScanner.h
+++ b/htmlparser/src/nsScanner.h
@@ -39,16 +39,6 @@
#include "nsFileStream.h"
-typedef enum {
- kCharsetUninitialized = 0,
- kCharsetFromUserDefault ,
- kCharsetFromDocTypeDefault,
- kCharsetFromParentFrame,
- kCharsetFromAutoDetection,
- kCharsetFromMetaTag,
- kCharsetFromHTTPHeader
-} nsCharsetSource;
-
class nsScanner {
public:
diff --git a/parser/htmlparser/src/nsIParser.h b/parser/htmlparser/src/nsIParser.h
index 5456b01afb5c..d06c29427cb2 100644
--- a/parser/htmlparser/src/nsIParser.h
+++ b/parser/htmlparser/src/nsIParser.h
@@ -66,6 +66,16 @@ enum eCRCQuality {
};
+typedef enum {
+ kCharsetUninitialized = 0,
+ kCharsetFromUserDefault ,
+ kCharsetFromDocTypeDefault,
+ kCharsetFromParentFrame,
+ kCharsetFromAutoDetection,
+ kCharsetFromMetaTag,
+ kCharsetFromHTTPHeader
+} nsCharsetSource;
+
enum eStreamState {eNone,eOnStart,eOnDataAvail,eOnStop};
/**
@@ -115,6 +125,18 @@ class nsIParser : public nsISupports {
*/
virtual void SetCommand(const char* aCommand)=0;
+ /**
+ * Call this method once you've created a parser, and want to instruct it
+ * about what charset to load
+ *
+ * @update ftang 4/23/99
+ * @param aCharset- the charest of a document
+ * @param aCharsetSource- the soure of the chares
+ * @return nada
+ */
+ virtual void SetDocumentCharset(nsString& aCharset, nsCharsetSource aSource)=0;
+
+
/******************************************************************************************
* Parse methods always begin with an input source, and perform conversions
diff --git a/parser/htmlparser/src/nsParser.cpp b/parser/htmlparser/src/nsParser.cpp
index 7e841c85d831..0c1a4c74ffff 100644
--- a/parser/htmlparser/src/nsParser.cpp
+++ b/parser/htmlparser/src/nsParser.cpp
@@ -142,7 +142,7 @@ CSharedParserObjects& GetSharedObjects() {
* @param
* @return
*/
-nsParser::nsParser(nsITokenObserver* anObserver) : mCommand(""), mUnusedInput("") {
+nsParser::nsParser(nsITokenObserver* anObserver) : mCommand(""), mUnusedInput("") , mCharset("ISO-8859-1") {
NS_INIT_REFCNT();
mParserFilter = 0;
mObserver = 0;
@@ -151,6 +151,7 @@ nsParser::nsParser(nsITokenObserver* anObserver) : mCommand(""), mUnusedInput(""
mTokenObserver=anObserver;
mStreamStatus=0;
mDTDVerification=PR_FALSE;
+ mCharsetSource=kCharsetUninitialized;
}
@@ -245,6 +246,22 @@ void nsParser::SetCommand(const char* aCommand){
mCommand=aCommand;
}
+
+
+/**
+ * Call this method once you've created a parser, and want to instruct it
+ * about what charset to load
+ *
+ * @update ftang 4/23/99
+ * @param aCharset- the charest of a document
+ * @param aCharsetSource- the soure of the chares
+ * @return nada
+ */
+void nsParser::SetDocumentCharset(nsString& aCharset, nsCharsetSource aCharsetSource){
+ mCharset = aCharset;
+ mCharsetSource = aCharsetSource;
+}
+
/**
* This method gets called in order to set the content
* sink for this parser to dump nodes to.
@@ -555,25 +572,6 @@ nsParser::IsParserEnabled()
nsresult nsParser::Parse(nsIURL* aURL,nsIStreamObserver* aListener,PRBool aVerifyEnabled) {
NS_PRECONDITION(0!=aURL,kNullURL);
- nsAutoString charset;
- nsCharsetSource charsetSource;
-
- // XXXX get HTTP charset here
- // charset =
- // charsetSource = kCharsetFromHTTPHeader;
-
- // XXXX get User Prefernce charset here
- // charset =
- // charsetSource = kCharsetFromUserDefault;
-
- // XXXX get Doc Type Default (e.g. UTF-8 for XML)
-
- // XXX We should really put if doc == html for the following line
- charset = "ISO-8859-1";
- charsetSource = kCharsetFromDocTypeDefault;
-
-
-
nsresult result=kBadURL;
mDTDVerification=aVerifyEnabled;
if(aURL) {
@@ -588,21 +586,26 @@ nsresult nsParser::Parse(nsIURL* aURL,nsIStreamObserver* aListener,PRBool aVerif
theName.Right(last4, 4);
if(last4.EqualsIgnoreCase(".xul") || last4.EqualsIgnoreCase(".xml") || last4.EqualsIgnoreCase(".rdf") )
{
- charset = "UTF-8";
+ if(kCharsetFromDocTypeDefault >= mCharsetSource) {
+ mCharset = "UTF-8";
+ mCharsetSource = kCharsetFromDocTypeDefault;
+ }
}
// XXX begin of meta tag charset hack
if(theName.Equals(nsParser::gHackMetaCharsetURL) && (! nsParser::gHackMetaCharset.Equals("")))
{
- charset = nsParser::gHackMetaCharset;
- charsetSource = kCharsetFromMetaTag;
+ if(kCharsetFromMetaTag > mCharsetSource) {
+ mCharset = nsParser::gHackMetaCharset;
+ mCharsetSource = kCharsetFromMetaTag;
+ }
}
nsParser::gHackMetaCharsetURL = theName;
nsParser::gHackMetaCharset = "";
// XXX end of meta tag charset hack
- CParserContext* pc=new CParserContext(new nsScanner(theName,PR_FALSE, charset, charsetSource),aURL,aListener);
+ CParserContext* pc=new CParserContext(new nsScanner(theName,PR_FALSE, mCharset, mCharsetSource),aURL,aListener);
if(pc) {
pc->mMultipart=PR_TRUE;
pc->mContextType=CParserContext::eCTURL;
@@ -625,38 +628,24 @@ nsresult nsParser::Parse(nsIInputStream& aStream,PRBool aVerifyEnabled){
mDTDVerification=aVerifyEnabled;
nsresult result=NS_ERROR_OUT_OF_MEMORY;
- nsAutoString charset;
- nsCharsetSource charsetSource;
-
- // XXXX get HTTP charset here
- // charset =
- // charsetSource = kCharsetFromHTTPHeader;
-
- // XXXX get User Prefernce charset here
- // charset =
- // charsetSource = kCharsetFromUserDefault;
-
- // XXXX get Doc Type Default (e.g. UTF-8 for XML)
-
- // XXX We should really put if doc == html for the following line
- charset = "ISO-8859-1";
- charsetSource = kCharsetFromDocTypeDefault;
-
//ok, time to create our tokenizer and begin the process
nsAutoString theUnknownFilename("unknown");
// XXX begin of meta tag charset hack
if(theUnknownFilename.Equals(nsParser::gHackMetaCharsetURL) && (! nsParser::gHackMetaCharset.Equals("")))
{
- charset = nsParser::gHackMetaCharset;
- charsetSource = kCharsetFromMetaTag;
+
+ if(kCharsetFromMetaTag > mCharsetSource) {
+ mCharset = nsParser::gHackMetaCharset;
+ mCharsetSource = kCharsetFromMetaTag;
+ }
}
nsParser::gHackMetaCharsetURL = theUnknownFilename;
nsParser::gHackMetaCharset = "";
// XXX end of meta tag charset hack
nsInputStream input(&aStream);
- CParserContext* pc=new CParserContext(new nsScanner(theUnknownFilename, input, charset, charsetSource,PR_FALSE),&aStream,0);
+ CParserContext* pc=new CParserContext(new nsScanner(theUnknownFilename, input, mCharset, mCharsetSource,PR_FALSE),&aStream,0);
if(pc) {
PushContext(*pc);
pc->mSourceType=kHTMLTextContentType;
@@ -691,33 +680,22 @@ nsresult nsParser::Parse(nsString& aSourceBuffer,void* aKey,const nsString& aCon
}
#endif
- nsAutoString charset;
- nsCharsetSource charsetSource;
-
- // XXXX get HTTP charset here
- // charset =
- // charsetSource = kCharsetFromHTTPHeader;
-
- // XXXX get User Prefernce charset here
- // charset =
- // charsetSource = kCharsetFromUserDefault;
-
- // XXX temp hack to make parser use UTF-8 as default charset for XML, RDF, XUL
- // XXX This should be removed once we have the SetDefaultCharset in the nsIParser interface
if(aContentType.EqualsIgnoreCase("text/xul") || aContentType.EqualsIgnoreCase("text/xml") || aContentType.EqualsIgnoreCase("text/rdf") )
{
- charset = "UTF-8";
- } else {
- charset = "ISO-8859-1";
- }
- charsetSource = kCharsetFromDocTypeDefault;
+ if(kCharsetFromDocTypeDefault >= mCharsetSource) {
+ mCharset = "UTF-8";
+ mCharsetSource = kCharsetFromDocTypeDefault;
+ }
+ }
// XXX begin of meta tag charset hack
nsAutoString theFakeURL("fromString");
if(theFakeURL.Equals(nsParser::gHackMetaCharsetURL) && (! nsParser::gHackMetaCharset.Equals("")))
{
- charset = nsParser::gHackMetaCharset;
- charsetSource = kCharsetFromMetaTag;
+ if(kCharsetFromMetaTag > mCharsetSource) {
+ mCharset = nsParser::gHackMetaCharset;
+ mCharsetSource = kCharsetFromMetaTag;
+ }
}
nsParser::gHackMetaCharsetURL = theFakeURL;
nsParser::gHackMetaCharset = "";
@@ -737,7 +715,7 @@ nsresult nsParser::Parse(nsString& aSourceBuffer,void* aKey,const nsString& aCon
if((!mParserContext) || (mParserContext->mKey!=aKey)) {
//only make a new context if we dont have one, OR if we do, but has a different context key...
- pc=new CParserContext(new nsScanner(mUnusedInput, charset, charsetSource),aKey, 0);
+ pc=new CParserContext(new nsScanner(mUnusedInput, mCharset, mCharsetSource),aKey, 0);
if(pc) {
PushContext(*pc);
pc->mStreamListenerState=eOnStart;
diff --git a/parser/htmlparser/src/nsParser.h b/parser/htmlparser/src/nsParser.h
index f962994c3b72..9a203f087f28 100644
--- a/parser/htmlparser/src/nsParser.h
+++ b/parser/htmlparser/src/nsParser.h
@@ -126,6 +126,18 @@ friend class CTokenHandler;
*/
virtual void SetCommand(const char* aCommand);
+ /**
+ * Call this method once you've created a parser, and want to instruct it
+ * about what charset to load
+ *
+ * @update ftang 4/23/99
+ * @param aCharset- the charest of a document
+ * @param aCharsetSource- the soure of the chares
+ * @return nada
+ */
+ virtual void SetDocumentCharset(nsString& aCharset, nsCharsetSource aSource);
+
+
virtual nsIParserFilter* SetParserFilter(nsIParserFilter* aFilter);
virtual void RegisterDTD(nsIDTD* aDTD);
@@ -328,6 +340,8 @@ protected:
PRInt32 mStreamStatus;
nsITokenObserver* mTokenObserver;
nsString mUnusedInput;
+ nsString mCharset;
+ nsCharsetSource mCharsetSource;
};
diff --git a/parser/htmlparser/src/nsScanner.h b/parser/htmlparser/src/nsScanner.h
index 993bd33393fa..025db1883d2a 100644
--- a/parser/htmlparser/src/nsScanner.h
+++ b/parser/htmlparser/src/nsScanner.h
@@ -39,16 +39,6 @@
#include "nsFileStream.h"
-typedef enum {
- kCharsetUninitialized = 0,
- kCharsetFromUserDefault ,
- kCharsetFromDocTypeDefault,
- kCharsetFromParentFrame,
- kCharsetFromAutoDetection,
- kCharsetFromMetaTag,
- kCharsetFromHTTPHeader
-} nsCharsetSource;
-
class nsScanner {
public: