#111340 Autodetect All detect SJIS as Greek

r=yokoyama, sr=brendan
This commit is contained in:
shanjian%netscape.com 2005-11-02 16:57:16 +00:00
parent 16ccb6e2bf
commit af6d628bba
5 changed files with 3 additions and 76 deletions

View File

@ -46,11 +46,6 @@
void nsEUCJPProber::Reset(void)
{
mCodingSM->Reset();
mNumOfRoman = 0;
mNumOfHankaku = 0;
mNumOfKana = 0;
mNumOfKanji = 0;
mNumOfMisc = 0;
mState = eDetecting;
mContextAnalyser.Reset();
mDistributionAnalyser.Reset();
@ -80,12 +75,10 @@ nsProbingState nsEUCJPProber::HandleData(const char* aBuf, PRUint32 aLen)
if (i == 0)
{
mLastChar[1] = aBuf[0];
GetDistribution(charLen, mLastChar);
mContextAnalyser.HandleOneChar(mLastChar, charLen);
mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
}
else
GetDistribution(charLen, aBuf+i-1);
mContextAnalyser.HandleOneChar(aBuf+i-1, charLen);
mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen);
}
@ -100,28 +93,6 @@ nsProbingState nsEUCJPProber::HandleData(const char* aBuf, PRUint32 aLen)
return mState;
}
void nsEUCJPProber::GetDistribution(PRUint32 aCharLen, const char* aStr)
{
if (aCharLen == 2)
{
if ((unsigned char)*aStr == (unsigned char)0xa4 ||
(unsigned char)*(aStr+1) == (unsigned char)0xa5)
mNumOfKana++;
else if ((unsigned char)*aStr >= (unsigned char)0xa6)
mNumOfKanji++;
else if ((unsigned char)*aStr == (unsigned char)0x8e)
mNumOfHankaku++;
else
mNumOfMisc++;
}
else if (aCharLen > 2)
mNumOfKanji++;
else
{
mNumOfRoman++;
}
}
float nsEUCJPProber::GetConfidence(void)
{
float contxtCf = mContextAnalyser.GetConfidence();

View File

@ -62,15 +62,8 @@ public:
void SetOpion() {};
protected:
void GetDistribution(PRUint32 aCharLen, const char* aStr);
nsCodingStateMachine* mCodingSM;
nsProbingState mState;
PRUint32 mNumOfRoman;
PRUint32 mNumOfHankaku;
PRUint32 mNumOfKana;
PRUint32 mNumOfKanji;
PRUint32 mNumOfMisc;
EUCJPContextAnalysis mContextAnalyser;
EUCJPDistributionAnalysis mDistributionAnalyser;

View File

@ -391,7 +391,9 @@ PCK4BITS(3,3,3,3,3,3,3,3), // 80 - 87
PCK4BITS(3,3,3,3,3,3,3,3), // 88 - 8f
PCK4BITS(3,3,3,3,3,3,3,3), // 90 - 97
PCK4BITS(3,3,3,3,3,3,3,3), // 98 - 9f
PCK4BITS(4,2,2,2,2,2,2,2), // a0 - a7
//0xa0 is illegal in sjis encoding, but some pages does
//contain such byte. We need to be more error forgiven.
PCK4BITS(2,2,2,2,2,2,2,2), // a0 - a7
PCK4BITS(2,2,2,2,2,2,2,2), // a8 - af
PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf

View File

@ -46,11 +46,6 @@
void nsSJISProber::Reset(void)
{
mCodingSM->Reset();
mNumOfRoman = 0;
mNumOfHankaku = 0;
mNumOfKana = 0;
mNumOfKanji = 0;
mNumOfMisc = 0;
mState = eDetecting;
mContextAnalyser.Reset();
mDistributionAnalyser.Reset();
@ -79,13 +74,11 @@ nsProbingState nsSJISProber::HandleData(const char* aBuf, PRUint32 aLen)
if (i == 0)
{
mLastChar[1] = aBuf[0];
GetDistribution(mCodingSM->GetCurrentCharLen(), mLastChar);
mContextAnalyser.HandleOneChar(mLastChar+2-charLen, charLen);
mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
}
else
{
GetDistribution(mCodingSM->GetCurrentCharLen(), aBuf+i-1);
mContextAnalyser.HandleOneChar(aBuf+i+1-charLen, charLen);
mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen);
}
@ -101,31 +94,6 @@ nsProbingState nsSJISProber::HandleData(const char* aBuf, PRUint32 aLen)
return mState;
}
void nsSJISProber::GetDistribution(PRUint32 aCharLen, const char* aStr)
{
if (aCharLen >= 2)
{
if ((unsigned char)*aStr == (unsigned char)0x82 &&
(unsigned char)*(aStr+1) >= (unsigned char)0x9f &&
(unsigned char)*(aStr+1) <= (unsigned char)0xf1 ||
(unsigned char)*aStr == (unsigned char)0x83 &&
(unsigned char)*(aStr+1) >= (unsigned char)0x40 &&
(unsigned char)*(aStr+1) <= (unsigned char)0x96)
mNumOfKana++;
else if ((unsigned char)*aStr >= (unsigned char)0x88)
mNumOfKanji++;
else
mNumOfMisc++;
}
else
{
if ((unsigned char)*(aStr+1) >= (unsigned char)0xa1)
mNumOfHankaku++;
else
mNumOfRoman++;
}
}
float nsSJISProber::GetConfidence(void)
{
float contxtCf = mContextAnalyser.GetConfidence();

View File

@ -63,15 +63,8 @@ public:
void SetOpion() {};
protected:
void GetDistribution(PRUint32 aCharLen, const char* aStr);
nsCodingStateMachine* mCodingSM;
nsProbingState mState;
PRUint32 mNumOfRoman;
PRUint32 mNumOfHankaku;
PRUint32 mNumOfKana;
PRUint32 mNumOfKanji;
PRUint32 mNumOfMisc;
SJISContextAnalysis mContextAnalyser;
SJISDistributionAnalysis mDistributionAnalyser;