#125628 Auto-detect can not detect the gb18030 page above properly

This patch added a state machine for gb18030 verification. The data of
the state machine is created based on GB18030 standard, and the code was
generated by a perl script which is almost identical to rest of the perl
script that generated other header files. Rest of the code is just
adding this state machine to some charset detectors.
r=ftang, sr=shaver, a=asa
This commit is contained in:
shanjian%netscape.com 2002-03-14 00:47:05 +00:00
parent 5faf1177d5
commit 2059c806a2
4 changed files with 156 additions and 3 deletions

View File

@ -0,0 +1,109 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: NPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Netscape Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/NPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the NPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the NPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
/*
* DO NOT EDIT THIS DOCUMENT MANUALLY !!!
* THIS FILE IS AUTOMATICALLY GENERATED BY THE TOOLS UNDER
* mozilla/intl/chardet/tools/
* Please contact ftang@netscape.com or mozilla-i18n@mozilla.org
* if you have any question. Thanks
*/
#include "nsVerifier.h"
static PRUint32 gb18030_cls [ 256 / 8 ] = {
PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07
PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f
PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17
PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f
PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27
PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f
PCK4BITS(3,3,3,3,3,3,3,3), // 30 - 37
PCK4BITS(3,3,1,1,1,1,1,1), // 38 - 3f
PCK4BITS(2,2,2,2,2,2,2,2), // 40 - 47
PCK4BITS(2,2,2,2,2,2,2,2), // 48 - 4f
PCK4BITS(2,2,2,2,2,2,2,2), // 50 - 57
PCK4BITS(2,2,2,2,2,2,2,2), // 58 - 5f
PCK4BITS(2,2,2,2,2,2,2,2), // 60 - 67
PCK4BITS(2,2,2,2,2,2,2,2), // 68 - 6f
PCK4BITS(2,2,2,2,2,2,2,2), // 70 - 77
PCK4BITS(2,2,2,2,2,2,2,4), // 78 - 7f
PCK4BITS(5,6,6,6,6,6,6,6), // 80 - 87
PCK4BITS(6,6,6,6,6,6,6,6), // 88 - 8f
PCK4BITS(6,6,6,6,6,6,6,6), // 90 - 97
PCK4BITS(6,6,6,6,6,6,6,6), // 98 - 9f
PCK4BITS(6,6,6,6,6,6,6,6), // a0 - a7
PCK4BITS(6,6,6,6,6,6,6,6), // a8 - af
PCK4BITS(6,6,6,6,6,6,6,6), // b0 - b7
PCK4BITS(6,6,6,6,6,6,6,6), // b8 - bf
PCK4BITS(6,6,6,6,6,6,6,6), // c0 - c7
PCK4BITS(6,6,6,6,6,6,6,6), // c8 - cf
PCK4BITS(6,6,6,6,6,6,6,6), // d0 - d7
PCK4BITS(6,6,6,6,6,6,6,6), // d8 - df
PCK4BITS(6,6,6,6,6,6,6,6), // e0 - e7
PCK4BITS(6,6,6,6,6,6,6,6), // e8 - ef
PCK4BITS(6,6,6,6,6,6,6,6), // f0 - f7
PCK4BITS(6,6,6,6,6,6,6,0) // f8 - ff
};
static PRUint32 gb18030_st [ 6] = {
PCK4BITS(eError,eStart,eStart,eStart,eStart,eStart, 3,eError),//00-07
PCK4BITS(eError,eError,eError,eError,eError,eError,eItsMe,eItsMe),//08-0f
PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart),//10-17
PCK4BITS( 4,eError,eStart,eStart,eError,eError,eError,eError),//18-1f
PCK4BITS(eError,eError, 5,eError,eError,eError,eItsMe,eError),//20-27
PCK4BITS(eError,eError,eStart,eStart,eStart,eStart,eStart,eStart) //28-2f
};
static nsVerifier nsGB18030Verifier = {
"gb18030",
{
eIdxSft4bits,
eSftMsk4bits,
eBitSft4bits,
eUnitMsk4bits,
gb18030_cls
},
4,
{
eIdxSft4bits,
eSftMsk4bits,
eBitSft4bits,
eUnitMsk4bits,
gb18030_st
}
};

View File

@ -114,6 +114,7 @@ nsVerifier *gKoVerifierSet[KO_DETECTOR_NUM_VERIFIERS] = {
nsVerifier *gZhCnVerifierSet[ZHCN_DETECTOR_NUM_VERIFIERS] = {
&nsUTF8Verifier,
&nsGB2312Verifier,
&nsGB18030Verifier,
&nsISO2022CNVerifier,
&nsHZVerifier,
&nsCP1252Verifier,
@ -138,6 +139,7 @@ nsVerifier *gJaVerifierSet[JA_DETECTOR_NUM_VERIFIERS] = {
nsVerifier *gZhVerifierSet[ZH_DETECTOR_NUM_VERIFIERS] = {
&nsUTF8Verifier,
&nsGB2312Verifier,
&nsGB18030Verifier,
&nsBIG5Verifier,
&nsISO2022CNVerifier,
&nsHZVerifier,
@ -171,6 +173,7 @@ nsVerifier *gCJKVerifierSet[CJK_DETECTOR_NUM_VERIFIERS] = {
&nsBIG5Verifier,
&nsEUCTWVerifier,
&nsGB2312Verifier,
&nsGB18030Verifier,
&nsISO2022CNVerifier,
&nsHZVerifier,
&nsCP1252Verifier,

View File

@ -54,6 +54,7 @@
#include "nsUCS2LEVerifier.h"
#include "nsBIG5Verifier.h"
#include "nsGB2312Verifier.h"
#include "nsGB18030Verifier.h"
#include "nsEUCTWVerifier.h"
#include "nsEUCKRVerifier.h"
//---- end verifiers
@ -162,17 +163,17 @@ extern nsEUCStatistics *gZhTwStatisticsSet[];
#define KO_DETECTOR_NUM_VERIFIERS 6
extern nsVerifier *gKoVerifierSet[];
#define ZHCN_DETECTOR_NUM_VERIFIERS 7
#define ZHCN_DETECTOR_NUM_VERIFIERS 8
extern nsVerifier *gZhCnVerifierSet[];
#define JA_DETECTOR_NUM_VERIFIERS 7
extern nsVerifier *gJaVerifierSet[];
#define ZH_DETECTOR_NUM_VERIFIERS 9
#define ZH_DETECTOR_NUM_VERIFIERS 10
extern nsVerifier *gZhVerifierSet[];
extern nsEUCStatistics *gZhStatisticsSet[];
#define CJK_DETECTOR_NUM_VERIFIERS 14
#define CJK_DETECTOR_NUM_VERIFIERS 15
extern nsVerifier *gCJKVerifierSet[];
extern nsEUCStatistics *gCJKStatisticsSet[];

View File

@ -0,0 +1,40 @@
#!/usr/local/bin/perl
use strict;
require "genverifier.pm";
use genverifier;
my(@gb18030_cls);
my(@gb18030_st);
my($gb18030_ver);
@gb18030_cls = (
[ 0x0e , 0x0f , 0 ],
[ 0x1b , 0x1b , 0 ],
[ 0x30 , 0x39 , 3 ],
[ 0x00 , 0x3f , 1 ],
[ 0x40 , 0x7e , 2 ],
[ 0x7f , 0x7f , 4 ],
[ 0x80 , 0x80 , 5 ],
[ 0x81 , 0xfe , 6 ],
[ 0xff , 0xff , 0 ],
);
package genverifier;
@gb18030_st = (
# 0 1 2 3 4 5 6
1, 0, 0, 0, 0, 0, 3, # state 0
1, 1, 1, 1, 1, 1, 1, # Error State - 1
2, 2, 2, 2, 2, 2, 2, # ItsMe State - 2
1, 1, 0, 4, 1, 0, 0, # state 3, multibytes, 1st byte identified
1, 1, 1, 1, 1, 1, 5, # state 4, multibytes, 2nd byte identified
1, 1, 1, 2, 1, 1, 1, # state 5, multibytes, 3rd byte identified
);
$gb18030_ver = genverifier::GenVerifier("gb18030", "gb18030", \@gb18030_cls, 4, \@gb18030_st);
print $gb18030_ver;