mirror of
https://github.com/mozilla/gecko-dev.git
synced 2025-03-05 00:02:37 +00:00
reviewed by rickg. Pick up meta tag and change converter
This commit is contained in:
parent
3df38a1d80
commit
a4489898ad
@ -942,10 +942,19 @@ nsresult CNavDTD::WillHandleStartTag(CToken* aToken,eHTMLTags aTag,nsCParserNode
|
||||
if(theKey.EqualsIgnoreCase("HTTP-EQUIV")) {
|
||||
const nsString& theKey2=aNode.GetKeyAt(1);
|
||||
if(theKey2.EqualsIgnoreCase("CONTENT")) {
|
||||
nsScanner* theScanner=mParser->GetScanner();
|
||||
if(theScanner) {
|
||||
const nsString& theValue=aNode.GetValueAt(1);
|
||||
theScanner->SetDocumentCharset(theValue);
|
||||
nsScanner* theScanner=mParser->GetScanner();
|
||||
if(theScanner) {
|
||||
const nsString& theValue=aNode.GetValueAt(1);
|
||||
PRInt32 charsetValueStart = theValue.RFind("charset=", PR_TRUE ) ;
|
||||
if(kNotFound != charsetValueStart) {
|
||||
charsetValueStart += 8; // 8 = "charset=".length
|
||||
PRInt32 charsetValueEnd = theValue.FindCharInSet("\'\";", charsetValueStart );
|
||||
if(kNotFound == charsetValueEnd )
|
||||
charsetValueEnd = theValue.Length();
|
||||
nsAutoString theCharset;
|
||||
theValue.Mid(theCharset, charsetValueStart, charsetValueEnd - charsetValueStart);
|
||||
theScanner->SetDocumentCharset(theCharset, kCharsetFromMetaTag);
|
||||
} //if
|
||||
} //if
|
||||
}
|
||||
} //if
|
||||
|
@ -225,6 +225,7 @@ nsIParserFilter * nsParser::SetParserFilter(nsIParserFilter * aFilter)
|
||||
return old;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Call this method once you've created a parser, and want to instruct it
|
||||
* about the command which caused the parser to be constructed. For example,
|
||||
@ -526,6 +527,24 @@ PRBool nsParser::EnableParser(PRBool aState){
|
||||
nsresult nsParser::Parse(nsIURL* aURL,nsIStreamObserver* aListener,PRBool aVerifyEnabled) {
|
||||
NS_PRECONDITION(0!=aURL,kNullURL);
|
||||
|
||||
nsAutoString charset;
|
||||
nsCharsetSource charsetSource;
|
||||
|
||||
// XXXX get HTTP charset here
|
||||
// charset =
|
||||
// charsetSource = kCharsetFromHTTPHeader;
|
||||
|
||||
// XXXX get User Prefernce charset here
|
||||
// charset =
|
||||
// charsetSource = kCharsetFromUserDefault;
|
||||
|
||||
// XXXX get Doc Type Default (e.g. UTF-8 for XML)
|
||||
|
||||
// XXX We should really put if doc == html for the following line
|
||||
charset = "ISO-8859-1";
|
||||
charsetSource = kCharsetFromDocTypeDefault;
|
||||
|
||||
|
||||
nsresult result=kBadURL;
|
||||
mDTDVerification=aVerifyEnabled;
|
||||
if(aURL) {
|
||||
@ -533,7 +552,7 @@ nsresult nsParser::Parse(nsIURL* aURL,nsIStreamObserver* aListener,PRBool aVerif
|
||||
nsresult rv = aURL->GetSpec(&spec);
|
||||
if (rv != NS_OK) return rv;
|
||||
nsAutoString theName(spec);
|
||||
CParserContext* pc=new CParserContext(new nsScanner(theName,PR_FALSE),aURL,aListener);
|
||||
CParserContext* pc=new CParserContext(new nsScanner(theName,PR_FALSE, charset, charsetSource),aURL,aListener);
|
||||
if(pc) {
|
||||
pc->mMultipart=PR_TRUE;
|
||||
pc->mContextType=CParserContext::eCTURL;
|
||||
@ -555,10 +574,27 @@ nsresult nsParser::Parse(fstream& aStream,PRBool aVerifyEnabled){
|
||||
|
||||
mDTDVerification=aVerifyEnabled;
|
||||
nsresult result=NS_ERROR_OUT_OF_MEMORY;
|
||||
|
||||
nsAutoString charset;
|
||||
nsCharsetSource charsetSource;
|
||||
|
||||
// XXXX get HTTP charset here
|
||||
// charset =
|
||||
// charsetSource = kCharsetFromHTTPHeader;
|
||||
|
||||
// XXXX get User Prefernce charset here
|
||||
// charset =
|
||||
// charsetSource = kCharsetFromUserDefault;
|
||||
|
||||
// XXXX get Doc Type Default (e.g. UTF-8 for XML)
|
||||
|
||||
// XXX We should really put if doc == html for the following line
|
||||
charset = "ISO-8859-1";
|
||||
charsetSource = kCharsetFromDocTypeDefault;
|
||||
|
||||
//ok, time to create our tokenizer and begin the process
|
||||
nsAutoString theUnknownFilename("unknown");
|
||||
CParserContext* pc=new CParserContext(new nsScanner(theUnknownFilename,aStream,PR_FALSE),&aStream,0);
|
||||
CParserContext* pc=new CParserContext(new nsScanner(theUnknownFilename,aStream, charset, charsetSource,PR_FALSE),&aStream,0);
|
||||
if(pc) {
|
||||
PushContext(*pc);
|
||||
pc->mSourceType=kHTMLTextContentType;
|
||||
@ -593,6 +629,22 @@ nsresult nsParser::Parse(nsString& aSourceBuffer,void* aKey,const nsString& aCon
|
||||
}
|
||||
#endif
|
||||
|
||||
nsAutoString charset;
|
||||
nsCharsetSource charsetSource;
|
||||
|
||||
// XXXX get HTTP charset here
|
||||
// charset =
|
||||
// charsetSource = kCharsetFromHTTPHeader;
|
||||
|
||||
// XXXX get User Prefernce charset here
|
||||
// charset =
|
||||
// charsetSource = kCharsetFromUserDefault;
|
||||
|
||||
// XXXX get Doc Type Default (e.g. UTF-8 for XML)
|
||||
|
||||
// XXX We should really put if doc == html for the following line
|
||||
charset = "ISO-8859-1";
|
||||
charsetSource = kCharsetFromDocTypeDefault;
|
||||
//NOTE: Make sure that updates to this method don't cause
|
||||
// bug #2361 to break again!
|
||||
|
||||
@ -607,7 +659,7 @@ nsresult nsParser::Parse(nsString& aSourceBuffer,void* aKey,const nsString& aCon
|
||||
|
||||
if((!mParserContext) || (mParserContext->mKey!=aKey)) {
|
||||
//only make a new context if we dont have one, OR if we do, but has a different context key...
|
||||
pc=new CParserContext(new nsScanner(mUnusedInput),aKey,0);
|
||||
pc=new CParserContext(new nsScanner(mUnusedInput, charset, charsetSource),aKey, 0);
|
||||
if(pc) {
|
||||
PushContext(*pc);
|
||||
pc->mStreamListenerState=eOnStart;
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "nsDebug.h"
|
||||
#include "nsIServiceManager.h"
|
||||
#include "nsICharsetConverterManager.h"
|
||||
#include "nsICharsetAlias.h"
|
||||
|
||||
|
||||
const char* kBadHTMLText="<H3>Oops...</H3>You just tried to read a non-existent document: <BR>";
|
||||
@ -34,8 +35,6 @@ const int kBufsize=1;
|
||||
const int kBufsize=64;
|
||||
#endif
|
||||
|
||||
// #define DEFAULTCHARSET "Shift_JIS"
|
||||
#define DEFAULTCHARSET "ISO-8859-1"
|
||||
|
||||
/**
|
||||
* Use this constructor if you want i/o to be based on
|
||||
@ -46,8 +45,8 @@ const int kBufsize=64;
|
||||
* @param aMode represents the parser mode (nav, other)
|
||||
* @return
|
||||
*/
|
||||
nsScanner::nsScanner(nsString& anHTMLString) :
|
||||
mBuffer(anHTMLString), mFilename("") , mCharset("")
|
||||
nsScanner::nsScanner(nsString& anHTMLString, const nsString& aCharset, nsCharsetSource aSource) :
|
||||
mBuffer(anHTMLString), mFilename("")
|
||||
{
|
||||
mTotalRead=mBuffer.Length();
|
||||
mIncremental=PR_TRUE;
|
||||
@ -55,9 +54,10 @@ nsScanner::nsScanner(nsString& anHTMLString) :
|
||||
mOffset=0;
|
||||
mMarkPos=-1;
|
||||
mFileStream=0;
|
||||
nsAutoString defaultCharset(DEFAULTCHARSET);
|
||||
mUnicodeDecoder = 0;
|
||||
SetDocumentCharset(defaultCharset);
|
||||
mCharset = "";
|
||||
mCharsetSource = kCharsetUninitialized;
|
||||
SetDocumentCharset(aCharset, aSource);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -69,8 +69,8 @@ nsScanner::nsScanner(nsString& anHTMLString) :
|
||||
* @param aFilename --
|
||||
* @return
|
||||
*/
|
||||
nsScanner::nsScanner(nsString& aFilename,PRBool aCreateStream) :
|
||||
mBuffer(""), mFilename(aFilename) , mCharset("")
|
||||
nsScanner::nsScanner(nsString& aFilename,PRBool aCreateStream, const nsString& aCharset, nsCharsetSource aSource) :
|
||||
mBuffer(""), mFilename(aFilename)
|
||||
{
|
||||
mIncremental=PR_TRUE;
|
||||
mOffset=0;
|
||||
@ -91,8 +91,9 @@ nsScanner::nsScanner(nsString& aFilename,PRBool aCreateStream) :
|
||||
#endif
|
||||
} //if
|
||||
mUnicodeDecoder = 0;
|
||||
nsAutoString defaultCharset(DEFAULTCHARSET);
|
||||
SetDocumentCharset(defaultCharset);
|
||||
mCharset = "";
|
||||
mCharsetSource = kCharsetUninitialized;
|
||||
SetDocumentCharset(aCharset, aSource);
|
||||
|
||||
}
|
||||
|
||||
@ -105,8 +106,8 @@ nsScanner::nsScanner(nsString& aFilename,PRBool aCreateStream) :
|
||||
* @param aFilename --
|
||||
* @return
|
||||
*/
|
||||
nsScanner::nsScanner(nsString& aFilename,fstream& aStream,PRBool assumeOwnership) :
|
||||
mBuffer(""), mFilename(aFilename) , mCharset("")
|
||||
nsScanner::nsScanner(nsString& aFilename,fstream& aStream,const nsString& aCharset, nsCharsetSource aSource, PRBool assumeOwnership) :
|
||||
mBuffer(""), mFilename(aFilename)
|
||||
{
|
||||
mIncremental=PR_TRUE;
|
||||
mOffset=0;
|
||||
@ -115,15 +116,46 @@ nsScanner::nsScanner(nsString& aFilename,fstream& aStream,PRBool assumeOwnership
|
||||
mOwnsStream=assumeOwnership;
|
||||
mFileStream=&aStream;
|
||||
mUnicodeDecoder = 0;
|
||||
nsAutoString defaultCharset(DEFAULTCHARSET);
|
||||
SetDocumentCharset(defaultCharset);
|
||||
mCharset = "";
|
||||
mCharsetSource = kCharsetUninitialized;
|
||||
SetDocumentCharset(aCharset, aSource);
|
||||
}
|
||||
|
||||
nsresult nsScanner::SetDocumentCharset(const nsString& aCharset )
|
||||
nsresult nsScanner::SetDocumentCharset(const nsString& aCharset , nsCharsetSource aSource)
|
||||
{
|
||||
|
||||
nsresult res = NS_OK;
|
||||
if(! mCharset.EqualsIgnoreCase(aCharset)) // see do we need to change a converter.
|
||||
|
||||
if( aSource < mCharsetSource) // priority is lower the the current one , just
|
||||
return res;
|
||||
|
||||
nsICharsetAlias* calias = nsnull;
|
||||
res = nsServiceManager::GetService(kCharsetAliasCID,
|
||||
kICharsetAliasIID,
|
||||
(nsISupports**)&calias);
|
||||
|
||||
NS_ASSERTION( nsnull != calias, "cannot find charet alias");
|
||||
nsAutoString charsetName = aCharset;
|
||||
if( NS_SUCCEEDED(res) && (nsnull != calias))
|
||||
{
|
||||
PRBool same = PR_FALSE;
|
||||
res = calias->Equals(aCharset, mCharset, &same);
|
||||
if(NS_SUCCEEDED(res) && same)
|
||||
{
|
||||
return NS_OK; // no difference, don't change it
|
||||
}
|
||||
// different, need to change it
|
||||
res = calias->GetPreferred(aCharset, charsetName);
|
||||
nsServiceManager::ReleaseService(kCharsetAliasCID, calias);
|
||||
|
||||
if(NS_FAILED(res) && (kCharsetUninitialized == mCharsetSource) )
|
||||
{
|
||||
// failed - unknown alias , fallback to ISO-8859-1
|
||||
charsetName = "ISO-8859-1";
|
||||
}
|
||||
mCharset = charsetName;
|
||||
mCharsetSource = aSource;
|
||||
|
||||
nsICharsetConverterManager * ccm = nsnull;
|
||||
res = nsServiceManager::GetService(kCharsetConverterManagerCID,
|
||||
kICharsetConverterManagerIID,
|
||||
@ -131,13 +163,12 @@ nsresult nsScanner::SetDocumentCharset(const nsString& aCharset )
|
||||
if(NS_SUCCEEDED(res) && (nsnull != ccm))
|
||||
{
|
||||
nsIUnicodeDecoder * decoder = nsnull;
|
||||
res = ccm->GetUnicodeDecoder(&aCharset, &decoder);
|
||||
res = ccm->GetUnicodeDecoder(&mCharset, &decoder);
|
||||
if(NS_SUCCEEDED(res) && (nsnull != decoder))
|
||||
{
|
||||
NS_IF_RELEASE(mUnicodeDecoder);
|
||||
|
||||
mUnicodeDecoder = decoder;
|
||||
mCharset = aCharset;
|
||||
}
|
||||
nsServiceManager::ReleaseService(kCharsetConverterManagerCID, ccm);
|
||||
}
|
||||
|
@ -39,6 +39,16 @@
|
||||
#include <fstream.h>
|
||||
|
||||
|
||||
typedef enum {
|
||||
kCharsetUninitialized = 0,
|
||||
kCharsetFromUserDefault ,
|
||||
kCharsetFromDocTypeDefault,
|
||||
kCharsetFromParentFrame,
|
||||
kCharsetFromAutoDetection,
|
||||
kCharsetFromMetaTag,
|
||||
kCharsetFromHTTPHeader
|
||||
} nsCharsetSource;
|
||||
|
||||
class nsScanner {
|
||||
public:
|
||||
|
||||
@ -47,30 +57,36 @@ class nsScanner {
|
||||
* a single string you hand in during construction.
|
||||
* This short cut was added for Javascript.
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @update ftang 3/02/99
|
||||
* @param aCharset charset
|
||||
* @param aCharsetSource - where the charset info came from
|
||||
* @param aMode represents the parser mode (nav, other)
|
||||
* @return
|
||||
*/
|
||||
nsScanner(nsString& anHTMLString);
|
||||
nsScanner(nsString& anHTMLString, const nsString& aCharset, nsCharsetSource aSource);
|
||||
|
||||
/**
|
||||
* Use this constructor if you want i/o to be based on
|
||||
* a file (therefore a stream) or just data you provide via Append().
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @update ftang 3/02/99
|
||||
* @param aCharset charset
|
||||
* @param aCharsetSource - where the charset info came from
|
||||
* @param aMode represents the parser mode (nav, other)
|
||||
* @return
|
||||
*/
|
||||
nsScanner(nsString& aFilename,PRBool aCreateStream);
|
||||
nsScanner(nsString& aFilename,PRBool aCreateStream, const nsString& aCharset, nsCharsetSource aSource);
|
||||
|
||||
/**
|
||||
* Use this constructor if you want i/o to be stream based.
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @update ftang 3/02/99
|
||||
* @param aCharset charset
|
||||
* @param aCharsetSource - where the charset info came from
|
||||
* @param aMode represents the parser mode (nav, other)
|
||||
* @return
|
||||
*/
|
||||
nsScanner(nsString& aFilename,fstream& aStream,PRBool assumeOwnership=PR_TRUE);
|
||||
nsScanner(nsString& aFilename,fstream& aStream, const nsString& aCharset, nsCharsetSource aSource,PRBool assumeOwnership=PR_TRUE);
|
||||
|
||||
|
||||
~nsScanner();
|
||||
@ -269,11 +285,12 @@ class nsScanner {
|
||||
/**
|
||||
* Use this setter to change the scanner's unicode decoder
|
||||
*
|
||||
* @update ftang 2/12/99
|
||||
* @update ftang 3/02/99
|
||||
* @param aCharset a normalized (alias resolved) charset name
|
||||
* @param aCharsetSource- where the charset info came from
|
||||
* @return
|
||||
*/
|
||||
nsresult SetDocumentCharset(const nsString& aCharset);
|
||||
nsresult SetDocumentCharset(const nsString& aCharset, nsCharsetSource aSource);
|
||||
|
||||
protected:
|
||||
|
||||
@ -295,6 +312,7 @@ class nsScanner {
|
||||
PRUint32 mTotalRead;
|
||||
PRBool mOwnsStream;
|
||||
PRBool mIncremental;
|
||||
nsCharsetSource mCharsetSource;
|
||||
nsString mCharset;
|
||||
nsIUnicodeDecoder *mUnicodeDecoder;
|
||||
};
|
||||
|
@ -942,10 +942,19 @@ nsresult CNavDTD::WillHandleStartTag(CToken* aToken,eHTMLTags aTag,nsCParserNode
|
||||
if(theKey.EqualsIgnoreCase("HTTP-EQUIV")) {
|
||||
const nsString& theKey2=aNode.GetKeyAt(1);
|
||||
if(theKey2.EqualsIgnoreCase("CONTENT")) {
|
||||
nsScanner* theScanner=mParser->GetScanner();
|
||||
if(theScanner) {
|
||||
const nsString& theValue=aNode.GetValueAt(1);
|
||||
theScanner->SetDocumentCharset(theValue);
|
||||
nsScanner* theScanner=mParser->GetScanner();
|
||||
if(theScanner) {
|
||||
const nsString& theValue=aNode.GetValueAt(1);
|
||||
PRInt32 charsetValueStart = theValue.RFind("charset=", PR_TRUE ) ;
|
||||
if(kNotFound != charsetValueStart) {
|
||||
charsetValueStart += 8; // 8 = "charset=".length
|
||||
PRInt32 charsetValueEnd = theValue.FindCharInSet("\'\";", charsetValueStart );
|
||||
if(kNotFound == charsetValueEnd )
|
||||
charsetValueEnd = theValue.Length();
|
||||
nsAutoString theCharset;
|
||||
theValue.Mid(theCharset, charsetValueStart, charsetValueEnd - charsetValueStart);
|
||||
theScanner->SetDocumentCharset(theCharset, kCharsetFromMetaTag);
|
||||
} //if
|
||||
} //if
|
||||
}
|
||||
} //if
|
||||
|
@ -225,6 +225,7 @@ nsIParserFilter * nsParser::SetParserFilter(nsIParserFilter * aFilter)
|
||||
return old;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Call this method once you've created a parser, and want to instruct it
|
||||
* about the command which caused the parser to be constructed. For example,
|
||||
@ -526,6 +527,24 @@ PRBool nsParser::EnableParser(PRBool aState){
|
||||
nsresult nsParser::Parse(nsIURL* aURL,nsIStreamObserver* aListener,PRBool aVerifyEnabled) {
|
||||
NS_PRECONDITION(0!=aURL,kNullURL);
|
||||
|
||||
nsAutoString charset;
|
||||
nsCharsetSource charsetSource;
|
||||
|
||||
// XXXX get HTTP charset here
|
||||
// charset =
|
||||
// charsetSource = kCharsetFromHTTPHeader;
|
||||
|
||||
// XXXX get User Prefernce charset here
|
||||
// charset =
|
||||
// charsetSource = kCharsetFromUserDefault;
|
||||
|
||||
// XXXX get Doc Type Default (e.g. UTF-8 for XML)
|
||||
|
||||
// XXX We should really put if doc == html for the following line
|
||||
charset = "ISO-8859-1";
|
||||
charsetSource = kCharsetFromDocTypeDefault;
|
||||
|
||||
|
||||
nsresult result=kBadURL;
|
||||
mDTDVerification=aVerifyEnabled;
|
||||
if(aURL) {
|
||||
@ -533,7 +552,7 @@ nsresult nsParser::Parse(nsIURL* aURL,nsIStreamObserver* aListener,PRBool aVerif
|
||||
nsresult rv = aURL->GetSpec(&spec);
|
||||
if (rv != NS_OK) return rv;
|
||||
nsAutoString theName(spec);
|
||||
CParserContext* pc=new CParserContext(new nsScanner(theName,PR_FALSE),aURL,aListener);
|
||||
CParserContext* pc=new CParserContext(new nsScanner(theName,PR_FALSE, charset, charsetSource),aURL,aListener);
|
||||
if(pc) {
|
||||
pc->mMultipart=PR_TRUE;
|
||||
pc->mContextType=CParserContext::eCTURL;
|
||||
@ -555,10 +574,27 @@ nsresult nsParser::Parse(fstream& aStream,PRBool aVerifyEnabled){
|
||||
|
||||
mDTDVerification=aVerifyEnabled;
|
||||
nsresult result=NS_ERROR_OUT_OF_MEMORY;
|
||||
|
||||
nsAutoString charset;
|
||||
nsCharsetSource charsetSource;
|
||||
|
||||
// XXXX get HTTP charset here
|
||||
// charset =
|
||||
// charsetSource = kCharsetFromHTTPHeader;
|
||||
|
||||
// XXXX get User Prefernce charset here
|
||||
// charset =
|
||||
// charsetSource = kCharsetFromUserDefault;
|
||||
|
||||
// XXXX get Doc Type Default (e.g. UTF-8 for XML)
|
||||
|
||||
// XXX We should really put if doc == html for the following line
|
||||
charset = "ISO-8859-1";
|
||||
charsetSource = kCharsetFromDocTypeDefault;
|
||||
|
||||
//ok, time to create our tokenizer and begin the process
|
||||
nsAutoString theUnknownFilename("unknown");
|
||||
CParserContext* pc=new CParserContext(new nsScanner(theUnknownFilename,aStream,PR_FALSE),&aStream,0);
|
||||
CParserContext* pc=new CParserContext(new nsScanner(theUnknownFilename,aStream, charset, charsetSource,PR_FALSE),&aStream,0);
|
||||
if(pc) {
|
||||
PushContext(*pc);
|
||||
pc->mSourceType=kHTMLTextContentType;
|
||||
@ -593,6 +629,22 @@ nsresult nsParser::Parse(nsString& aSourceBuffer,void* aKey,const nsString& aCon
|
||||
}
|
||||
#endif
|
||||
|
||||
nsAutoString charset;
|
||||
nsCharsetSource charsetSource;
|
||||
|
||||
// XXXX get HTTP charset here
|
||||
// charset =
|
||||
// charsetSource = kCharsetFromHTTPHeader;
|
||||
|
||||
// XXXX get User Prefernce charset here
|
||||
// charset =
|
||||
// charsetSource = kCharsetFromUserDefault;
|
||||
|
||||
// XXXX get Doc Type Default (e.g. UTF-8 for XML)
|
||||
|
||||
// XXX We should really put if doc == html for the following line
|
||||
charset = "ISO-8859-1";
|
||||
charsetSource = kCharsetFromDocTypeDefault;
|
||||
//NOTE: Make sure that updates to this method don't cause
|
||||
// bug #2361 to break again!
|
||||
|
||||
@ -607,7 +659,7 @@ nsresult nsParser::Parse(nsString& aSourceBuffer,void* aKey,const nsString& aCon
|
||||
|
||||
if((!mParserContext) || (mParserContext->mKey!=aKey)) {
|
||||
//only make a new context if we dont have one, OR if we do, but has a different context key...
|
||||
pc=new CParserContext(new nsScanner(mUnusedInput),aKey,0);
|
||||
pc=new CParserContext(new nsScanner(mUnusedInput, charset, charsetSource),aKey, 0);
|
||||
if(pc) {
|
||||
PushContext(*pc);
|
||||
pc->mStreamListenerState=eOnStart;
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "nsDebug.h"
|
||||
#include "nsIServiceManager.h"
|
||||
#include "nsICharsetConverterManager.h"
|
||||
#include "nsICharsetAlias.h"
|
||||
|
||||
|
||||
const char* kBadHTMLText="<H3>Oops...</H3>You just tried to read a non-existent document: <BR>";
|
||||
@ -34,8 +35,6 @@ const int kBufsize=1;
|
||||
const int kBufsize=64;
|
||||
#endif
|
||||
|
||||
// #define DEFAULTCHARSET "Shift_JIS"
|
||||
#define DEFAULTCHARSET "ISO-8859-1"
|
||||
|
||||
/**
|
||||
* Use this constructor if you want i/o to be based on
|
||||
@ -46,8 +45,8 @@ const int kBufsize=64;
|
||||
* @param aMode represents the parser mode (nav, other)
|
||||
* @return
|
||||
*/
|
||||
nsScanner::nsScanner(nsString& anHTMLString) :
|
||||
mBuffer(anHTMLString), mFilename("") , mCharset("")
|
||||
nsScanner::nsScanner(nsString& anHTMLString, const nsString& aCharset, nsCharsetSource aSource) :
|
||||
mBuffer(anHTMLString), mFilename("")
|
||||
{
|
||||
mTotalRead=mBuffer.Length();
|
||||
mIncremental=PR_TRUE;
|
||||
@ -55,9 +54,10 @@ nsScanner::nsScanner(nsString& anHTMLString) :
|
||||
mOffset=0;
|
||||
mMarkPos=-1;
|
||||
mFileStream=0;
|
||||
nsAutoString defaultCharset(DEFAULTCHARSET);
|
||||
mUnicodeDecoder = 0;
|
||||
SetDocumentCharset(defaultCharset);
|
||||
mCharset = "";
|
||||
mCharsetSource = kCharsetUninitialized;
|
||||
SetDocumentCharset(aCharset, aSource);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -69,8 +69,8 @@ nsScanner::nsScanner(nsString& anHTMLString) :
|
||||
* @param aFilename --
|
||||
* @return
|
||||
*/
|
||||
nsScanner::nsScanner(nsString& aFilename,PRBool aCreateStream) :
|
||||
mBuffer(""), mFilename(aFilename) , mCharset("")
|
||||
nsScanner::nsScanner(nsString& aFilename,PRBool aCreateStream, const nsString& aCharset, nsCharsetSource aSource) :
|
||||
mBuffer(""), mFilename(aFilename)
|
||||
{
|
||||
mIncremental=PR_TRUE;
|
||||
mOffset=0;
|
||||
@ -91,8 +91,9 @@ nsScanner::nsScanner(nsString& aFilename,PRBool aCreateStream) :
|
||||
#endif
|
||||
} //if
|
||||
mUnicodeDecoder = 0;
|
||||
nsAutoString defaultCharset(DEFAULTCHARSET);
|
||||
SetDocumentCharset(defaultCharset);
|
||||
mCharset = "";
|
||||
mCharsetSource = kCharsetUninitialized;
|
||||
SetDocumentCharset(aCharset, aSource);
|
||||
|
||||
}
|
||||
|
||||
@ -105,8 +106,8 @@ nsScanner::nsScanner(nsString& aFilename,PRBool aCreateStream) :
|
||||
* @param aFilename --
|
||||
* @return
|
||||
*/
|
||||
nsScanner::nsScanner(nsString& aFilename,fstream& aStream,PRBool assumeOwnership) :
|
||||
mBuffer(""), mFilename(aFilename) , mCharset("")
|
||||
nsScanner::nsScanner(nsString& aFilename,fstream& aStream,const nsString& aCharset, nsCharsetSource aSource, PRBool assumeOwnership) :
|
||||
mBuffer(""), mFilename(aFilename)
|
||||
{
|
||||
mIncremental=PR_TRUE;
|
||||
mOffset=0;
|
||||
@ -115,15 +116,46 @@ nsScanner::nsScanner(nsString& aFilename,fstream& aStream,PRBool assumeOwnership
|
||||
mOwnsStream=assumeOwnership;
|
||||
mFileStream=&aStream;
|
||||
mUnicodeDecoder = 0;
|
||||
nsAutoString defaultCharset(DEFAULTCHARSET);
|
||||
SetDocumentCharset(defaultCharset);
|
||||
mCharset = "";
|
||||
mCharsetSource = kCharsetUninitialized;
|
||||
SetDocumentCharset(aCharset, aSource);
|
||||
}
|
||||
|
||||
nsresult nsScanner::SetDocumentCharset(const nsString& aCharset )
|
||||
nsresult nsScanner::SetDocumentCharset(const nsString& aCharset , nsCharsetSource aSource)
|
||||
{
|
||||
|
||||
nsresult res = NS_OK;
|
||||
if(! mCharset.EqualsIgnoreCase(aCharset)) // see do we need to change a converter.
|
||||
|
||||
if( aSource < mCharsetSource) // priority is lower the the current one , just
|
||||
return res;
|
||||
|
||||
nsICharsetAlias* calias = nsnull;
|
||||
res = nsServiceManager::GetService(kCharsetAliasCID,
|
||||
kICharsetAliasIID,
|
||||
(nsISupports**)&calias);
|
||||
|
||||
NS_ASSERTION( nsnull != calias, "cannot find charet alias");
|
||||
nsAutoString charsetName = aCharset;
|
||||
if( NS_SUCCEEDED(res) && (nsnull != calias))
|
||||
{
|
||||
PRBool same = PR_FALSE;
|
||||
res = calias->Equals(aCharset, mCharset, &same);
|
||||
if(NS_SUCCEEDED(res) && same)
|
||||
{
|
||||
return NS_OK; // no difference, don't change it
|
||||
}
|
||||
// different, need to change it
|
||||
res = calias->GetPreferred(aCharset, charsetName);
|
||||
nsServiceManager::ReleaseService(kCharsetAliasCID, calias);
|
||||
|
||||
if(NS_FAILED(res) && (kCharsetUninitialized == mCharsetSource) )
|
||||
{
|
||||
// failed - unknown alias , fallback to ISO-8859-1
|
||||
charsetName = "ISO-8859-1";
|
||||
}
|
||||
mCharset = charsetName;
|
||||
mCharsetSource = aSource;
|
||||
|
||||
nsICharsetConverterManager * ccm = nsnull;
|
||||
res = nsServiceManager::GetService(kCharsetConverterManagerCID,
|
||||
kICharsetConverterManagerIID,
|
||||
@ -131,13 +163,12 @@ nsresult nsScanner::SetDocumentCharset(const nsString& aCharset )
|
||||
if(NS_SUCCEEDED(res) && (nsnull != ccm))
|
||||
{
|
||||
nsIUnicodeDecoder * decoder = nsnull;
|
||||
res = ccm->GetUnicodeDecoder(&aCharset, &decoder);
|
||||
res = ccm->GetUnicodeDecoder(&mCharset, &decoder);
|
||||
if(NS_SUCCEEDED(res) && (nsnull != decoder))
|
||||
{
|
||||
NS_IF_RELEASE(mUnicodeDecoder);
|
||||
|
||||
mUnicodeDecoder = decoder;
|
||||
mCharset = aCharset;
|
||||
}
|
||||
nsServiceManager::ReleaseService(kCharsetConverterManagerCID, ccm);
|
||||
}
|
||||
|
@ -39,6 +39,16 @@
|
||||
#include <fstream.h>
|
||||
|
||||
|
||||
typedef enum {
|
||||
kCharsetUninitialized = 0,
|
||||
kCharsetFromUserDefault ,
|
||||
kCharsetFromDocTypeDefault,
|
||||
kCharsetFromParentFrame,
|
||||
kCharsetFromAutoDetection,
|
||||
kCharsetFromMetaTag,
|
||||
kCharsetFromHTTPHeader
|
||||
} nsCharsetSource;
|
||||
|
||||
class nsScanner {
|
||||
public:
|
||||
|
||||
@ -47,30 +57,36 @@ class nsScanner {
|
||||
* a single string you hand in during construction.
|
||||
* This short cut was added for Javascript.
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @update ftang 3/02/99
|
||||
* @param aCharset charset
|
||||
* @param aCharsetSource - where the charset info came from
|
||||
* @param aMode represents the parser mode (nav, other)
|
||||
* @return
|
||||
*/
|
||||
nsScanner(nsString& anHTMLString);
|
||||
nsScanner(nsString& anHTMLString, const nsString& aCharset, nsCharsetSource aSource);
|
||||
|
||||
/**
|
||||
* Use this constructor if you want i/o to be based on
|
||||
* a file (therefore a stream) or just data you provide via Append().
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @update ftang 3/02/99
|
||||
* @param aCharset charset
|
||||
* @param aCharsetSource - where the charset info came from
|
||||
* @param aMode represents the parser mode (nav, other)
|
||||
* @return
|
||||
*/
|
||||
nsScanner(nsString& aFilename,PRBool aCreateStream);
|
||||
nsScanner(nsString& aFilename,PRBool aCreateStream, const nsString& aCharset, nsCharsetSource aSource);
|
||||
|
||||
/**
|
||||
* Use this constructor if you want i/o to be stream based.
|
||||
*
|
||||
* @update gess 5/12/98
|
||||
* @update ftang 3/02/99
|
||||
* @param aCharset charset
|
||||
* @param aCharsetSource - where the charset info came from
|
||||
* @param aMode represents the parser mode (nav, other)
|
||||
* @return
|
||||
*/
|
||||
nsScanner(nsString& aFilename,fstream& aStream,PRBool assumeOwnership=PR_TRUE);
|
||||
nsScanner(nsString& aFilename,fstream& aStream, const nsString& aCharset, nsCharsetSource aSource,PRBool assumeOwnership=PR_TRUE);
|
||||
|
||||
|
||||
~nsScanner();
|
||||
@ -269,11 +285,12 @@ class nsScanner {
|
||||
/**
|
||||
* Use this setter to change the scanner's unicode decoder
|
||||
*
|
||||
* @update ftang 2/12/99
|
||||
* @update ftang 3/02/99
|
||||
* @param aCharset a normalized (alias resolved) charset name
|
||||
* @param aCharsetSource- where the charset info came from
|
||||
* @return
|
||||
*/
|
||||
nsresult SetDocumentCharset(const nsString& aCharset);
|
||||
nsresult SetDocumentCharset(const nsString& aCharset, nsCharsetSource aSource);
|
||||
|
||||
protected:
|
||||
|
||||
@ -295,6 +312,7 @@ class nsScanner {
|
||||
PRUint32 mTotalRead;
|
||||
PRBool mOwnsStream;
|
||||
PRBool mIncremental;
|
||||
nsCharsetSource mCharsetSource;
|
||||
nsString mCharset;
|
||||
nsIUnicodeDecoder *mUnicodeDecoder;
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user