reviewed by rickg. Pick up meta tag and change converter

2025-03-05 00:02:37 +00:00 · 1999-03-08 20:00:23 +00:00 · 1999-03-08 20:00:23 +00:00 · a4489898ad
commit a4489898ad
parent 3df38a1d80
8 changed files with 286 additions and 66 deletions
--- a/htmlparser/src/CNavDTD.cpp
+++ b/htmlparser/src/CNavDTD.cpp
@ -942,10 +942,19 @@ nsresult CNavDTD::WillHandleStartTag(CToken* aToken,eHTMLTags aTag,nsCParserNode
      if(theKey.EqualsIgnoreCase("HTTP-EQUIV")) {
        const nsString& theKey2=aNode.GetKeyAt(1);
        if(theKey2.EqualsIgnoreCase("CONTENT")) {
-          nsScanner* theScanner=mParser->GetScanner();
-          if(theScanner) {
-            const nsString& theValue=aNode.GetValueAt(1);
-            theScanner->SetDocumentCharset(theValue);
+            nsScanner* theScanner=mParser->GetScanner();
+            if(theScanner) {
+              const nsString& theValue=aNode.GetValueAt(1);
+              PRInt32 charsetValueStart = theValue.RFind("charset=", PR_TRUE ) ;
+              if(kNotFound != charsetValueStart) {	
+                 charsetValueStart += 8; // 8 = "charset=".length 
+                 PRInt32 charsetValueEnd = theValue.FindCharInSet("\'\";", charsetValueStart  );
+                 if(kNotFound == charsetValueEnd ) 
+                    charsetValueEnd = theValue.Length();
+                 nsAutoString theCharset;
+                 theValue.Mid(theCharset, charsetValueStart, charsetValueEnd - charsetValueStart);
+                 theScanner->SetDocumentCharset(theCharset, kCharsetFromMetaTag);
+              } //if
          } //if
        }
      } //if
--- a/htmlparser/src/nsParser.cpp
+++ b/htmlparser/src/nsParser.cpp
@ -225,6 +225,7 @@ nsIParserFilter * nsParser::SetParserFilter(nsIParserFilter * aFilter)
  return old;
 }

+
 /**
 *  Call this method once you've created a parser, and want to instruct it
 *  about the command which caused the parser to be constructed. For example,
@ -526,6 +527,24 @@ PRBool nsParser::EnableParser(PRBool aState){
 nsresult nsParser::Parse(nsIURL* aURL,nsIStreamObserver* aListener,PRBool aVerifyEnabled) {
  NS_PRECONDITION(0!=aURL,kNullURL);

+  nsAutoString charset;
+  nsCharsetSource charsetSource;
+
+  // XXXX get HTTP charset here
+  // charset =
+  // charsetSource = kCharsetFromHTTPHeader;
+
+  // XXXX get User Prefernce charset here
+  // charset =
+  // charsetSource = kCharsetFromUserDefault;
+
+  // XXXX get Doc Type Default (e.g. UTF-8 for XML)
+
+  // XXX We should really put if doc == html for the following line
+  charset = "ISO-8859-1";
+  charsetSource = kCharsetFromDocTypeDefault;
+
+
  nsresult result=kBadURL;
  mDTDVerification=aVerifyEnabled;
  if(aURL) {
@ -533,7 +552,7 @@ nsresult nsParser::Parse(nsIURL* aURL,nsIStreamObserver* aListener,PRBool aVerif
    nsresult rv = aURL->GetSpec(&spec);
    if (rv != NS_OK) return rv;
    nsAutoString theName(spec);
-    CParserContext* pc=new CParserContext(new nsScanner(theName,PR_FALSE),aURL,aListener);
+    CParserContext* pc=new CParserContext(new nsScanner(theName,PR_FALSE, charset, charsetSource),aURL,aListener);
    if(pc) {
      pc->mMultipart=PR_TRUE;
      pc->mContextType=CParserContext::eCTURL;
@ -555,10 +574,27 @@ nsresult nsParser::Parse(fstream& aStream,PRBool aVerifyEnabled){

  mDTDVerification=aVerifyEnabled;
  nsresult  result=NS_ERROR_OUT_OF_MEMORY;
+
+  nsAutoString charset;
+  nsCharsetSource charsetSource;
+
+  // XXXX get HTTP charset here
+  // charset =
+  // charsetSource = kCharsetFromHTTPHeader;
+
+  // XXXX get User Prefernce charset here
+  // charset =
+  // charsetSource = kCharsetFromUserDefault;
+
+  // XXXX get Doc Type Default (e.g. UTF-8 for XML)
+
+  // XXX We should really put if doc == html for the following line
+  charset = "ISO-8859-1";
+  charsetSource = kCharsetFromDocTypeDefault;
  
  //ok, time to create our tokenizer and begin the process
  nsAutoString theUnknownFilename("unknown");
-  CParserContext* pc=new CParserContext(new nsScanner(theUnknownFilename,aStream,PR_FALSE),&aStream,0);
+  CParserContext* pc=new CParserContext(new nsScanner(theUnknownFilename,aStream, charset, charsetSource,PR_FALSE),&aStream,0);
  if(pc) {
    PushContext(*pc);
    pc->mSourceType=kHTMLTextContentType;
@ -593,6 +629,22 @@ nsresult nsParser::Parse(nsString& aSourceBuffer,void* aKey,const nsString& aCon
  }
 #endif

+  nsAutoString charset;
+  nsCharsetSource charsetSource;
+
+  // XXXX get HTTP charset here
+  // charset =
+  // charsetSource = kCharsetFromHTTPHeader;
+
+  // XXXX get User Prefernce charset here
+  // charset =
+  // charsetSource = kCharsetFromUserDefault;
+
+  // XXXX get Doc Type Default (e.g. UTF-8 for XML)
+
+  // XXX We should really put if doc == html for the following line
+  charset = "ISO-8859-1";
+  charsetSource = kCharsetFromDocTypeDefault;
  //NOTE: Make sure that updates to this method don't cause 
  //      bug #2361 to break again!

@ -607,7 +659,7 @@ nsresult nsParser::Parse(nsString& aSourceBuffer,void* aKey,const nsString& aCon

    if((!mParserContext) || (mParserContext->mKey!=aKey))  {
      //only make a new context if we dont have one, OR if we do, but has a different context key...
-      pc=new CParserContext(new nsScanner(mUnusedInput),aKey,0);
+      pc=new CParserContext(new nsScanner(mUnusedInput, charset, charsetSource),aKey, 0);
      if(pc) {
        PushContext(*pc);
        pc->mStreamListenerState=eOnStart;  
--- a/htmlparser/src/nsScanner.cpp
+++ b/htmlparser/src/nsScanner.cpp
@ -23,6 +23,7 @@
 #include "nsDebug.h"
 #include "nsIServiceManager.h"
 #include "nsICharsetConverterManager.h"
+#include "nsICharsetAlias.h"


 const char* kBadHTMLText="<H3>Oops...</H3>You just tried to read a non-existent document: <BR>";
@ -34,8 +35,6 @@ const int   kBufsize=1;
 const int   kBufsize=64;
 #endif

-// #define DEFAULTCHARSET "Shift_JIS"
-#define DEFAULTCHARSET "ISO-8859-1"

 /**
 *  Use this constructor if you want i/o to be based on 
@ -46,8 +45,8 @@ const int   kBufsize=64;
 *  @param   aMode represents the parser mode (nav, other)
 *  @return  
 */
-nsScanner::nsScanner(nsString& anHTMLString) : 
-  mBuffer(anHTMLString), mFilename("") , mCharset("")
+nsScanner::nsScanner(nsString& anHTMLString, const nsString& aCharset, nsCharsetSource aSource) : 
+  mBuffer(anHTMLString), mFilename("")
 {
  mTotalRead=mBuffer.Length();
  mIncremental=PR_TRUE;
@ -55,9 +54,10 @@ nsScanner::nsScanner(nsString& anHTMLString) :
  mOffset=0;
  mMarkPos=-1;
  mFileStream=0;
-  nsAutoString defaultCharset(DEFAULTCHARSET);
  mUnicodeDecoder = 0;
-  SetDocumentCharset(defaultCharset);
+  mCharset = "";
+  mCharsetSource = kCharsetUninitialized;
+  SetDocumentCharset(aCharset, aSource);
 }

 /**
@ -69,8 +69,8 @@ nsScanner::nsScanner(nsString& anHTMLString) :
 *  @param   aFilename --
 *  @return  
 */
-nsScanner::nsScanner(nsString& aFilename,PRBool aCreateStream) : 
-    mBuffer(""), mFilename(aFilename) , mCharset("")
+nsScanner::nsScanner(nsString& aFilename,PRBool aCreateStream, const nsString& aCharset, nsCharsetSource aSource) : 
+    mBuffer(""), mFilename(aFilename)
 {
  mIncremental=PR_TRUE;
  mOffset=0;
@ -91,8 +91,9 @@ nsScanner::nsScanner(nsString& aFilename,PRBool aCreateStream) :
    #endif
  } //if
  mUnicodeDecoder = 0;
-  nsAutoString defaultCharset(DEFAULTCHARSET);
-  SetDocumentCharset(defaultCharset);
+  mCharset = "";
+  mCharsetSource = kCharsetUninitialized;
+  SetDocumentCharset(aCharset, aSource);

 }

@ -105,8 +106,8 @@ nsScanner::nsScanner(nsString& aFilename,PRBool aCreateStream) :
 *  @param   aFilename --
 *  @return  
 */
-nsScanner::nsScanner(nsString& aFilename,fstream& aStream,PRBool assumeOwnership) :
-    mBuffer(""), mFilename(aFilename) , mCharset("")
+nsScanner::nsScanner(nsString& aFilename,fstream& aStream,const nsString& aCharset, nsCharsetSource aSource, PRBool assumeOwnership) :
+    mBuffer(""), mFilename(aFilename) 
 {    
  mIncremental=PR_TRUE;
  mOffset=0;
@ -115,15 +116,46 @@ nsScanner::nsScanner(nsString& aFilename,fstream& aStream,PRBool assumeOwnership
  mOwnsStream=assumeOwnership;
  mFileStream=&aStream;
  mUnicodeDecoder = 0;
-  nsAutoString defaultCharset(DEFAULTCHARSET);
-  SetDocumentCharset(defaultCharset);
+  mCharset = "";
+  mCharsetSource = kCharsetUninitialized;
+  SetDocumentCharset(aCharset, aSource);
 }

-nsresult nsScanner::SetDocumentCharset(const nsString& aCharset )
+nsresult nsScanner::SetDocumentCharset(const nsString& aCharset , nsCharsetSource aSource)
 {
+
  nsresult res = NS_OK;
-  if(! mCharset.EqualsIgnoreCase(aCharset)) // see do we need to change a converter.
+
+  if( aSource < mCharsetSource) // priority is lower the the current one , just
+    return res;
+
+  nsICharsetAlias* calias = nsnull;
+  res = nsServiceManager::GetService(kCharsetAliasCID,
+                                       kICharsetAliasIID,
+                                       (nsISupports**)&calias);
+
+  NS_ASSERTION( nsnull != calias, "cannot find charet alias");
+  nsAutoString charsetName = aCharset;
+  if( NS_SUCCEEDED(res) && (nsnull != calias))
  {
+    PRBool same = PR_FALSE;
+    res = calias->Equals(aCharset, mCharset, &same);
+    if(NS_SUCCEEDED(res) && same)
+    {
+      return NS_OK; // no difference, don't change it
+    }
+    // different, need to change it
+    res = calias->GetPreferred(aCharset, charsetName);
+    nsServiceManager::ReleaseService(kCharsetAliasCID, calias);
+
+    if(NS_FAILED(res) && (kCharsetUninitialized == mCharsetSource) )
+    {
+       // failed - unknown alias , fallback to ISO-8859-1
+      charsetName = "ISO-8859-1";
+    }
+    mCharset = charsetName;
+    mCharsetSource = aSource;
+
    nsICharsetConverterManager * ccm = nsnull;
    res = nsServiceManager::GetService(kCharsetConverterManagerCID, 
                                       kICharsetConverterManagerIID, 
@ -131,13 +163,12 @@ nsresult nsScanner::SetDocumentCharset(const nsString& aCharset )
    if(NS_SUCCEEDED(res) && (nsnull != ccm))
    {
      nsIUnicodeDecoder * decoder = nsnull;
-      res = ccm->GetUnicodeDecoder(&aCharset, &decoder);
+      res = ccm->GetUnicodeDecoder(&mCharset, &decoder);
      if(NS_SUCCEEDED(res) && (nsnull != decoder))
      {
         NS_IF_RELEASE(mUnicodeDecoder);

         mUnicodeDecoder = decoder;
-         mCharset = aCharset;
      }    
      nsServiceManager::ReleaseService(kCharsetConverterManagerCID, ccm);
    }
--- a/htmlparser/src/nsScanner.h
+++ b/htmlparser/src/nsScanner.h
@ -39,6 +39,16 @@
 #include <fstream.h>


+typedef enum {
+   kCharsetUninitialized = 0,
+   kCharsetFromUserDefault ,
+   kCharsetFromDocTypeDefault,
+   kCharsetFromParentFrame,
+   kCharsetFromAutoDetection,
+   kCharsetFromMetaTag,
+   kCharsetFromHTTPHeader
+} nsCharsetSource;
+
 class nsScanner {
  public:

@ -47,30 +57,36 @@ class nsScanner {
       *  a single string you hand in during construction.
       *  This short cut was added for Javascript.
       *
-       *  @update  gess 5/12/98
+       *  @update  ftang 3/02/99
+       *  @param   aCharset charset
+       *  @param   aCharsetSource - where the charset info came from 
       *  @param   aMode represents the parser mode (nav, other)
       *  @return  
       */
-      nsScanner(nsString& anHTMLString);
+      nsScanner(nsString& anHTMLString, const nsString& aCharset, nsCharsetSource aSource);

      /**
       *  Use this constructor if you want i/o to be based on 
       *  a file (therefore a stream) or just data you provide via Append().
       *
-       *  @update  gess 5/12/98
+       *  @update  ftang 3/02/99
+       *  @param   aCharset charset
+       *  @param   aCharsetSource - where the charset info came from 
       *  @param   aMode represents the parser mode (nav, other)
       *  @return  
       */
-      nsScanner(nsString& aFilename,PRBool aCreateStream);
+      nsScanner(nsString& aFilename,PRBool aCreateStream, const nsString& aCharset, nsCharsetSource aSource);

      /**
       *  Use this constructor if you want i/o to be stream based.
       *
-       *  @update  gess 5/12/98
+       *  @update  ftang 3/02/99
+       *  @param   aCharset charset
+       *  @param   aCharsetSource - where the charset info came from 
       *  @param   aMode represents the parser mode (nav, other)
       *  @return  
       */
-      nsScanner(nsString& aFilename,fstream& aStream,PRBool assumeOwnership=PR_TRUE);
+      nsScanner(nsString& aFilename,fstream& aStream, const nsString& aCharset, nsCharsetSource aSource,PRBool assumeOwnership=PR_TRUE);


      ~nsScanner();
@ -269,11 +285,12 @@ class nsScanner {
      /**
       *  Use this setter to change the scanner's unicode decoder
       *
-       *  @update  ftang 2/12/99
+       *  @update  ftang 3/02/99
       *  @param   aCharset a normalized (alias resolved) charset name
+       *  @param   aCharsetSource- where the charset info came from
       *  @return  
       */
-      nsresult SetDocumentCharset(const nsString& aCharset);
+      nsresult SetDocumentCharset(const nsString& aCharset, nsCharsetSource aSource);

  protected:

@ -295,6 +312,7 @@ class nsScanner {
      PRUint32        mTotalRead;
      PRBool          mOwnsStream;
      PRBool          mIncremental;
+      nsCharsetSource mCharsetSource;
      nsString        mCharset;
      nsIUnicodeDecoder *mUnicodeDecoder;
 };
--- a/parser/htmlparser/src/CNavDTD.cpp
+++ b/parser/htmlparser/src/CNavDTD.cpp
@ -942,10 +942,19 @@ nsresult CNavDTD::WillHandleStartTag(CToken* aToken,eHTMLTags aTag,nsCParserNode
      if(theKey.EqualsIgnoreCase("HTTP-EQUIV")) {
        const nsString& theKey2=aNode.GetKeyAt(1);
        if(theKey2.EqualsIgnoreCase("CONTENT")) {
-          nsScanner* theScanner=mParser->GetScanner();
-          if(theScanner) {
-            const nsString& theValue=aNode.GetValueAt(1);
-            theScanner->SetDocumentCharset(theValue);
+            nsScanner* theScanner=mParser->GetScanner();
+            if(theScanner) {
+              const nsString& theValue=aNode.GetValueAt(1);
+              PRInt32 charsetValueStart = theValue.RFind("charset=", PR_TRUE ) ;
+              if(kNotFound != charsetValueStart) {	
+                 charsetValueStart += 8; // 8 = "charset=".length 
+                 PRInt32 charsetValueEnd = theValue.FindCharInSet("\'\";", charsetValueStart  );
+                 if(kNotFound == charsetValueEnd ) 
+                    charsetValueEnd = theValue.Length();
+                 nsAutoString theCharset;
+                 theValue.Mid(theCharset, charsetValueStart, charsetValueEnd - charsetValueStart);
+                 theScanner->SetDocumentCharset(theCharset, kCharsetFromMetaTag);
+              } //if
          } //if
        }
      } //if
--- a/parser/htmlparser/src/nsParser.cpp
+++ b/parser/htmlparser/src/nsParser.cpp
@ -225,6 +225,7 @@ nsIParserFilter * nsParser::SetParserFilter(nsIParserFilter * aFilter)
  return old;
 }

+
 /**
 *  Call this method once you've created a parser, and want to instruct it
 *  about the command which caused the parser to be constructed. For example,
@ -526,6 +527,24 @@ PRBool nsParser::EnableParser(PRBool aState){
 nsresult nsParser::Parse(nsIURL* aURL,nsIStreamObserver* aListener,PRBool aVerifyEnabled) {
  NS_PRECONDITION(0!=aURL,kNullURL);

+  nsAutoString charset;
+  nsCharsetSource charsetSource;
+
+  // XXXX get HTTP charset here
+  // charset =
+  // charsetSource = kCharsetFromHTTPHeader;
+
+  // XXXX get User Prefernce charset here
+  // charset =
+  // charsetSource = kCharsetFromUserDefault;
+
+  // XXXX get Doc Type Default (e.g. UTF-8 for XML)
+
+  // XXX We should really put if doc == html for the following line
+  charset = "ISO-8859-1";
+  charsetSource = kCharsetFromDocTypeDefault;
+
+
  nsresult result=kBadURL;
  mDTDVerification=aVerifyEnabled;
  if(aURL) {
@ -533,7 +552,7 @@ nsresult nsParser::Parse(nsIURL* aURL,nsIStreamObserver* aListener,PRBool aVerif
    nsresult rv = aURL->GetSpec(&spec);
    if (rv != NS_OK) return rv;
    nsAutoString theName(spec);
-    CParserContext* pc=new CParserContext(new nsScanner(theName,PR_FALSE),aURL,aListener);
+    CParserContext* pc=new CParserContext(new nsScanner(theName,PR_FALSE, charset, charsetSource),aURL,aListener);
    if(pc) {
      pc->mMultipart=PR_TRUE;
      pc->mContextType=CParserContext::eCTURL;
@ -555,10 +574,27 @@ nsresult nsParser::Parse(fstream& aStream,PRBool aVerifyEnabled){

  mDTDVerification=aVerifyEnabled;
  nsresult  result=NS_ERROR_OUT_OF_MEMORY;
+
+  nsAutoString charset;
+  nsCharsetSource charsetSource;
+
+  // XXXX get HTTP charset here
+  // charset =
+  // charsetSource = kCharsetFromHTTPHeader;
+
+  // XXXX get User Prefernce charset here
+  // charset =
+  // charsetSource = kCharsetFromUserDefault;
+
+  // XXXX get Doc Type Default (e.g. UTF-8 for XML)
+
+  // XXX We should really put if doc == html for the following line
+  charset = "ISO-8859-1";
+  charsetSource = kCharsetFromDocTypeDefault;
  
  //ok, time to create our tokenizer and begin the process
  nsAutoString theUnknownFilename("unknown");
-  CParserContext* pc=new CParserContext(new nsScanner(theUnknownFilename,aStream,PR_FALSE),&aStream,0);
+  CParserContext* pc=new CParserContext(new nsScanner(theUnknownFilename,aStream, charset, charsetSource,PR_FALSE),&aStream,0);
  if(pc) {
    PushContext(*pc);
    pc->mSourceType=kHTMLTextContentType;
@ -593,6 +629,22 @@ nsresult nsParser::Parse(nsString& aSourceBuffer,void* aKey,const nsString& aCon
  }
 #endif

+  nsAutoString charset;
+  nsCharsetSource charsetSource;
+
+  // XXXX get HTTP charset here
+  // charset =
+  // charsetSource = kCharsetFromHTTPHeader;
+
+  // XXXX get User Prefernce charset here
+  // charset =
+  // charsetSource = kCharsetFromUserDefault;
+
+  // XXXX get Doc Type Default (e.g. UTF-8 for XML)
+
+  // XXX We should really put if doc == html for the following line
+  charset = "ISO-8859-1";
+  charsetSource = kCharsetFromDocTypeDefault;
  //NOTE: Make sure that updates to this method don't cause 
  //      bug #2361 to break again!

@ -607,7 +659,7 @@ nsresult nsParser::Parse(nsString& aSourceBuffer,void* aKey,const nsString& aCon

    if((!mParserContext) || (mParserContext->mKey!=aKey))  {
      //only make a new context if we dont have one, OR if we do, but has a different context key...
-      pc=new CParserContext(new nsScanner(mUnusedInput),aKey,0);
+      pc=new CParserContext(new nsScanner(mUnusedInput, charset, charsetSource),aKey, 0);
      if(pc) {
        PushContext(*pc);
        pc->mStreamListenerState=eOnStart;  
--- a/parser/htmlparser/src/nsScanner.cpp
+++ b/parser/htmlparser/src/nsScanner.cpp
@ -23,6 +23,7 @@
 #include "nsDebug.h"
 #include "nsIServiceManager.h"
 #include "nsICharsetConverterManager.h"
+#include "nsICharsetAlias.h"


 const char* kBadHTMLText="<H3>Oops...</H3>You just tried to read a non-existent document: <BR>";
@ -34,8 +35,6 @@ const int   kBufsize=1;
 const int   kBufsize=64;
 #endif

-// #define DEFAULTCHARSET "Shift_JIS"
-#define DEFAULTCHARSET "ISO-8859-1"

 /**
 *  Use this constructor if you want i/o to be based on 
@ -46,8 +45,8 @@ const int   kBufsize=64;
 *  @param   aMode represents the parser mode (nav, other)
 *  @return  
 */
-nsScanner::nsScanner(nsString& anHTMLString) : 
-  mBuffer(anHTMLString), mFilename("") , mCharset("")
+nsScanner::nsScanner(nsString& anHTMLString, const nsString& aCharset, nsCharsetSource aSource) : 
+  mBuffer(anHTMLString), mFilename("")
 {
  mTotalRead=mBuffer.Length();
  mIncremental=PR_TRUE;
@ -55,9 +54,10 @@ nsScanner::nsScanner(nsString& anHTMLString) :
  mOffset=0;
  mMarkPos=-1;
  mFileStream=0;
-  nsAutoString defaultCharset(DEFAULTCHARSET);
  mUnicodeDecoder = 0;
-  SetDocumentCharset(defaultCharset);
+  mCharset = "";
+  mCharsetSource = kCharsetUninitialized;
+  SetDocumentCharset(aCharset, aSource);
 }

 /**
@ -69,8 +69,8 @@ nsScanner::nsScanner(nsString& anHTMLString) :
 *  @param   aFilename --
 *  @return  
 */
-nsScanner::nsScanner(nsString& aFilename,PRBool aCreateStream) : 
-    mBuffer(""), mFilename(aFilename) , mCharset("")
+nsScanner::nsScanner(nsString& aFilename,PRBool aCreateStream, const nsString& aCharset, nsCharsetSource aSource) : 
+    mBuffer(""), mFilename(aFilename)
 {
  mIncremental=PR_TRUE;
  mOffset=0;
@ -91,8 +91,9 @@ nsScanner::nsScanner(nsString& aFilename,PRBool aCreateStream) :
    #endif
  } //if
  mUnicodeDecoder = 0;
-  nsAutoString defaultCharset(DEFAULTCHARSET);
-  SetDocumentCharset(defaultCharset);
+  mCharset = "";
+  mCharsetSource = kCharsetUninitialized;
+  SetDocumentCharset(aCharset, aSource);

 }

@ -105,8 +106,8 @@ nsScanner::nsScanner(nsString& aFilename,PRBool aCreateStream) :
 *  @param   aFilename --
 *  @return  
 */
-nsScanner::nsScanner(nsString& aFilename,fstream& aStream,PRBool assumeOwnership) :
-    mBuffer(""), mFilename(aFilename) , mCharset("")
+nsScanner::nsScanner(nsString& aFilename,fstream& aStream,const nsString& aCharset, nsCharsetSource aSource, PRBool assumeOwnership) :
+    mBuffer(""), mFilename(aFilename) 
 {    
  mIncremental=PR_TRUE;
  mOffset=0;
@ -115,15 +116,46 @@ nsScanner::nsScanner(nsString& aFilename,fstream& aStream,PRBool assumeOwnership
  mOwnsStream=assumeOwnership;
  mFileStream=&aStream;
  mUnicodeDecoder = 0;
-  nsAutoString defaultCharset(DEFAULTCHARSET);
-  SetDocumentCharset(defaultCharset);
+  mCharset = "";
+  mCharsetSource = kCharsetUninitialized;
+  SetDocumentCharset(aCharset, aSource);
 }

-nsresult nsScanner::SetDocumentCharset(const nsString& aCharset )
+nsresult nsScanner::SetDocumentCharset(const nsString& aCharset , nsCharsetSource aSource)
 {
+
  nsresult res = NS_OK;
-  if(! mCharset.EqualsIgnoreCase(aCharset)) // see do we need to change a converter.
+
+  if( aSource < mCharsetSource) // priority is lower the the current one , just
+    return res;
+
+  nsICharsetAlias* calias = nsnull;
+  res = nsServiceManager::GetService(kCharsetAliasCID,
+                                       kICharsetAliasIID,
+                                       (nsISupports**)&calias);
+
+  NS_ASSERTION( nsnull != calias, "cannot find charet alias");
+  nsAutoString charsetName = aCharset;
+  if( NS_SUCCEEDED(res) && (nsnull != calias))
  {
+    PRBool same = PR_FALSE;
+    res = calias->Equals(aCharset, mCharset, &same);
+    if(NS_SUCCEEDED(res) && same)
+    {
+      return NS_OK; // no difference, don't change it
+    }
+    // different, need to change it
+    res = calias->GetPreferred(aCharset, charsetName);
+    nsServiceManager::ReleaseService(kCharsetAliasCID, calias);
+
+    if(NS_FAILED(res) && (kCharsetUninitialized == mCharsetSource) )
+    {
+       // failed - unknown alias , fallback to ISO-8859-1
+      charsetName = "ISO-8859-1";
+    }
+    mCharset = charsetName;
+    mCharsetSource = aSource;
+
    nsICharsetConverterManager * ccm = nsnull;
    res = nsServiceManager::GetService(kCharsetConverterManagerCID, 
                                       kICharsetConverterManagerIID, 
@ -131,13 +163,12 @@ nsresult nsScanner::SetDocumentCharset(const nsString& aCharset )
    if(NS_SUCCEEDED(res) && (nsnull != ccm))
    {
      nsIUnicodeDecoder * decoder = nsnull;
-      res = ccm->GetUnicodeDecoder(&aCharset, &decoder);
+      res = ccm->GetUnicodeDecoder(&mCharset, &decoder);
      if(NS_SUCCEEDED(res) && (nsnull != decoder))
      {
         NS_IF_RELEASE(mUnicodeDecoder);

         mUnicodeDecoder = decoder;
-         mCharset = aCharset;
      }    
      nsServiceManager::ReleaseService(kCharsetConverterManagerCID, ccm);
    }
--- a/parser/htmlparser/src/nsScanner.h
+++ b/parser/htmlparser/src/nsScanner.h
@ -39,6 +39,16 @@
 #include <fstream.h>


+typedef enum {
+   kCharsetUninitialized = 0,
+   kCharsetFromUserDefault ,
+   kCharsetFromDocTypeDefault,
+   kCharsetFromParentFrame,
+   kCharsetFromAutoDetection,
+   kCharsetFromMetaTag,
+   kCharsetFromHTTPHeader
+} nsCharsetSource;
+
 class nsScanner {
  public:

@ -47,30 +57,36 @@ class nsScanner {
       *  a single string you hand in during construction.
       *  This short cut was added for Javascript.
       *
-       *  @update  gess 5/12/98
+       *  @update  ftang 3/02/99
+       *  @param   aCharset charset
+       *  @param   aCharsetSource - where the charset info came from 
       *  @param   aMode represents the parser mode (nav, other)
       *  @return  
       */
-      nsScanner(nsString& anHTMLString);
+      nsScanner(nsString& anHTMLString, const nsString& aCharset, nsCharsetSource aSource);

      /**
       *  Use this constructor if you want i/o to be based on 
       *  a file (therefore a stream) or just data you provide via Append().
       *
-       *  @update  gess 5/12/98
+       *  @update  ftang 3/02/99
+       *  @param   aCharset charset
+       *  @param   aCharsetSource - where the charset info came from 
       *  @param   aMode represents the parser mode (nav, other)
       *  @return  
       */
-      nsScanner(nsString& aFilename,PRBool aCreateStream);
+      nsScanner(nsString& aFilename,PRBool aCreateStream, const nsString& aCharset, nsCharsetSource aSource);

      /**
       *  Use this constructor if you want i/o to be stream based.
       *
-       *  @update  gess 5/12/98
+       *  @update  ftang 3/02/99
+       *  @param   aCharset charset
+       *  @param   aCharsetSource - where the charset info came from 
       *  @param   aMode represents the parser mode (nav, other)
       *  @return  
       */
-      nsScanner(nsString& aFilename,fstream& aStream,PRBool assumeOwnership=PR_TRUE);
+      nsScanner(nsString& aFilename,fstream& aStream, const nsString& aCharset, nsCharsetSource aSource,PRBool assumeOwnership=PR_TRUE);


      ~nsScanner();
@ -269,11 +285,12 @@ class nsScanner {
      /**
       *  Use this setter to change the scanner's unicode decoder
       *
-       *  @update  ftang 2/12/99
+       *  @update  ftang 3/02/99
       *  @param   aCharset a normalized (alias resolved) charset name
+       *  @param   aCharsetSource- where the charset info came from
       *  @return  
       */
-      nsresult SetDocumentCharset(const nsString& aCharset);
+      nsresult SetDocumentCharset(const nsString& aCharset, nsCharsetSource aSource);

  protected:

@ -295,6 +312,7 @@ class nsScanner {
      PRUint32        mTotalRead;
      PRBool          mOwnsStream;
      PRBool          mIncremental;
+      nsCharsetSource mCharsetSource;
      nsString        mCharset;
      nsIUnicodeDecoder *mUnicodeDecoder;
 };