mirror of
https://github.com/mozilla/gecko-dev.git
synced 2025-01-26 14:46:02 +00:00
parent
16ccb6e2bf
commit
af6d628bba
@ -46,11 +46,6 @@
|
||||
void nsEUCJPProber::Reset(void)
|
||||
{
|
||||
mCodingSM->Reset();
|
||||
mNumOfRoman = 0;
|
||||
mNumOfHankaku = 0;
|
||||
mNumOfKana = 0;
|
||||
mNumOfKanji = 0;
|
||||
mNumOfMisc = 0;
|
||||
mState = eDetecting;
|
||||
mContextAnalyser.Reset();
|
||||
mDistributionAnalyser.Reset();
|
||||
@ -80,12 +75,10 @@ nsProbingState nsEUCJPProber::HandleData(const char* aBuf, PRUint32 aLen)
|
||||
if (i == 0)
|
||||
{
|
||||
mLastChar[1] = aBuf[0];
|
||||
GetDistribution(charLen, mLastChar);
|
||||
mContextAnalyser.HandleOneChar(mLastChar, charLen);
|
||||
mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
|
||||
}
|
||||
else
|
||||
GetDistribution(charLen, aBuf+i-1);
|
||||
mContextAnalyser.HandleOneChar(aBuf+i-1, charLen);
|
||||
mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen);
|
||||
}
|
||||
@ -100,28 +93,6 @@ nsProbingState nsEUCJPProber::HandleData(const char* aBuf, PRUint32 aLen)
|
||||
return mState;
|
||||
}
|
||||
|
||||
void nsEUCJPProber::GetDistribution(PRUint32 aCharLen, const char* aStr)
|
||||
{
|
||||
if (aCharLen == 2)
|
||||
{
|
||||
if ((unsigned char)*aStr == (unsigned char)0xa4 ||
|
||||
(unsigned char)*(aStr+1) == (unsigned char)0xa5)
|
||||
mNumOfKana++;
|
||||
else if ((unsigned char)*aStr >= (unsigned char)0xa6)
|
||||
mNumOfKanji++;
|
||||
else if ((unsigned char)*aStr == (unsigned char)0x8e)
|
||||
mNumOfHankaku++;
|
||||
else
|
||||
mNumOfMisc++;
|
||||
}
|
||||
else if (aCharLen > 2)
|
||||
mNumOfKanji++;
|
||||
else
|
||||
{
|
||||
mNumOfRoman++;
|
||||
}
|
||||
}
|
||||
|
||||
float nsEUCJPProber::GetConfidence(void)
|
||||
{
|
||||
float contxtCf = mContextAnalyser.GetConfidence();
|
||||
|
@ -62,15 +62,8 @@ public:
|
||||
void SetOpion() {};
|
||||
|
||||
protected:
|
||||
void GetDistribution(PRUint32 aCharLen, const char* aStr);
|
||||
|
||||
nsCodingStateMachine* mCodingSM;
|
||||
nsProbingState mState;
|
||||
PRUint32 mNumOfRoman;
|
||||
PRUint32 mNumOfHankaku;
|
||||
PRUint32 mNumOfKana;
|
||||
PRUint32 mNumOfKanji;
|
||||
PRUint32 mNumOfMisc;
|
||||
|
||||
EUCJPContextAnalysis mContextAnalyser;
|
||||
EUCJPDistributionAnalysis mDistributionAnalyser;
|
||||
|
@ -391,7 +391,9 @@ PCK4BITS(3,3,3,3,3,3,3,3), // 80 - 87
|
||||
PCK4BITS(3,3,3,3,3,3,3,3), // 88 - 8f
|
||||
PCK4BITS(3,3,3,3,3,3,3,3), // 90 - 97
|
||||
PCK4BITS(3,3,3,3,3,3,3,3), // 98 - 9f
|
||||
PCK4BITS(4,2,2,2,2,2,2,2), // a0 - a7
|
||||
//0xa0 is illegal in sjis encoding, but some pages does
|
||||
//contain such byte. We need to be more error forgiven.
|
||||
PCK4BITS(2,2,2,2,2,2,2,2), // a0 - a7
|
||||
PCK4BITS(2,2,2,2,2,2,2,2), // a8 - af
|
||||
PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
|
||||
PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
|
||||
|
@ -46,11 +46,6 @@
|
||||
void nsSJISProber::Reset(void)
|
||||
{
|
||||
mCodingSM->Reset();
|
||||
mNumOfRoman = 0;
|
||||
mNumOfHankaku = 0;
|
||||
mNumOfKana = 0;
|
||||
mNumOfKanji = 0;
|
||||
mNumOfMisc = 0;
|
||||
mState = eDetecting;
|
||||
mContextAnalyser.Reset();
|
||||
mDistributionAnalyser.Reset();
|
||||
@ -79,13 +74,11 @@ nsProbingState nsSJISProber::HandleData(const char* aBuf, PRUint32 aLen)
|
||||
if (i == 0)
|
||||
{
|
||||
mLastChar[1] = aBuf[0];
|
||||
GetDistribution(mCodingSM->GetCurrentCharLen(), mLastChar);
|
||||
mContextAnalyser.HandleOneChar(mLastChar+2-charLen, charLen);
|
||||
mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
|
||||
}
|
||||
else
|
||||
{
|
||||
GetDistribution(mCodingSM->GetCurrentCharLen(), aBuf+i-1);
|
||||
mContextAnalyser.HandleOneChar(aBuf+i+1-charLen, charLen);
|
||||
mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen);
|
||||
}
|
||||
@ -101,31 +94,6 @@ nsProbingState nsSJISProber::HandleData(const char* aBuf, PRUint32 aLen)
|
||||
return mState;
|
||||
}
|
||||
|
||||
void nsSJISProber::GetDistribution(PRUint32 aCharLen, const char* aStr)
|
||||
{
|
||||
if (aCharLen >= 2)
|
||||
{
|
||||
if ((unsigned char)*aStr == (unsigned char)0x82 &&
|
||||
(unsigned char)*(aStr+1) >= (unsigned char)0x9f &&
|
||||
(unsigned char)*(aStr+1) <= (unsigned char)0xf1 ||
|
||||
(unsigned char)*aStr == (unsigned char)0x83 &&
|
||||
(unsigned char)*(aStr+1) >= (unsigned char)0x40 &&
|
||||
(unsigned char)*(aStr+1) <= (unsigned char)0x96)
|
||||
mNumOfKana++;
|
||||
else if ((unsigned char)*aStr >= (unsigned char)0x88)
|
||||
mNumOfKanji++;
|
||||
else
|
||||
mNumOfMisc++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((unsigned char)*(aStr+1) >= (unsigned char)0xa1)
|
||||
mNumOfHankaku++;
|
||||
else
|
||||
mNumOfRoman++;
|
||||
}
|
||||
}
|
||||
|
||||
float nsSJISProber::GetConfidence(void)
|
||||
{
|
||||
float contxtCf = mContextAnalyser.GetConfidence();
|
||||
|
@ -63,15 +63,8 @@ public:
|
||||
void SetOpion() {};
|
||||
|
||||
protected:
|
||||
void GetDistribution(PRUint32 aCharLen, const char* aStr);
|
||||
|
||||
nsCodingStateMachine* mCodingSM;
|
||||
nsProbingState mState;
|
||||
PRUint32 mNumOfRoman;
|
||||
PRUint32 mNumOfHankaku;
|
||||
PRUint32 mNumOfKana;
|
||||
PRUint32 mNumOfKanji;
|
||||
PRUint32 mNumOfMisc;
|
||||
|
||||
SJISContextAnalysis mContextAnalyser;
|
||||
SJISDistributionAnalysis mDistributionAnalyser;
|
||||
|
Loading…
x
Reference in New Issue
Block a user