mirror of
https://github.com/go-gitea/chardet.git
synced 2026-07-01 20:24:11 -04:00
Finalize interface for detector
This commit is contained in:
+4
-4
@@ -10,13 +10,13 @@ type chardetTester struct {
|
||||
|
||||
func newChardetTester(r ...recognizer) *chardetTester {
|
||||
if len(r) == 0 {
|
||||
return &chardetTester{NewDetector()}
|
||||
return &chardetTester{NewHtmlDetector()}
|
||||
}
|
||||
return &chardetTester{&Detector{r}}
|
||||
return &chardetTester{&Detector{r, true}}
|
||||
}
|
||||
|
||||
func (this *chardetTester) ExpectBest(b []byte, charset string, lang string, t *testing.T) bool {
|
||||
r, err := this.d.DetectBest(b, true, "")
|
||||
r, err := this.d.DetectBest(b)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return false
|
||||
@@ -29,7 +29,7 @@ func (this *chardetTester) ExpectBest(b []byte, charset string, lang string, t *
|
||||
}
|
||||
|
||||
func (this *chardetTester) ExpectUnknown(b []byte, t *testing.T) bool {
|
||||
r, err := this.d.DetectBest(b, true, "")
|
||||
r, err := this.d.DetectBest(b)
|
||||
if err == nil {
|
||||
t.Errorf("Expect unknown, actual %#v", *r)
|
||||
return false
|
||||
|
||||
+11
-6
@@ -13,6 +13,7 @@ type Result struct {
|
||||
|
||||
type Detector struct {
|
||||
recognizers []recognizer
|
||||
stripTag bool
|
||||
}
|
||||
|
||||
// List of charset recognizers
|
||||
@@ -62,24 +63,28 @@ var recognizers = []recognizer{
|
||||
newRecognizer_IBM420_ar_ltr(),
|
||||
}
|
||||
|
||||
func NewDetector() *Detector {
|
||||
return &Detector{recognizers}
|
||||
func NewTextDetector() *Detector {
|
||||
return &Detector{recognizers, false}
|
||||
}
|
||||
|
||||
func NewHtmlDetector() *Detector {
|
||||
return &Detector{recognizers, true}
|
||||
}
|
||||
|
||||
var (
|
||||
NotDetectedError = errors.New("Charset not detected.")
|
||||
)
|
||||
|
||||
func (d *Detector) DetectBest(b []byte, stripTag bool, declaredCharset string) (r *Result, err error) {
|
||||
func (d *Detector) DetectBest(b []byte) (r *Result, err error) {
|
||||
var all []Result
|
||||
if all, err = d.DetectAll(b, stripTag, declaredCharset); err == nil {
|
||||
if all, err = d.DetectAll(b); err == nil {
|
||||
r = &all[0]
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (d *Detector) DetectAll(b []byte, stripTag bool, declaredCharset string) ([]Result, error) {
|
||||
input := newRecognizerInput(b, stripTag, declaredCharset)
|
||||
func (d *Detector) DetectAll(b []byte) ([]Result, error) {
|
||||
input := newRecognizerInput(b, d.stripTag)
|
||||
outputChan := make(chan recognizerOutput)
|
||||
for _, r := range d.recognizers {
|
||||
go matchHelper(r, input, outputChan)
|
||||
|
||||
+1
-3
@@ -10,19 +10,17 @@ type recognizerInput struct {
|
||||
raw []byte
|
||||
input []byte
|
||||
tagStripped bool
|
||||
declaredCharset string
|
||||
byteStats []int
|
||||
hasC1Bytes bool
|
||||
}
|
||||
|
||||
func newRecognizerInput(raw []byte, stripTag bool, declaredCharset string) *recognizerInput {
|
||||
func newRecognizerInput(raw []byte, stripTag bool) *recognizerInput {
|
||||
input, stripped := mayStripInput(raw, stripTag)
|
||||
byteStats := computeByteStats(input)
|
||||
return &recognizerInput{
|
||||
raw: raw,
|
||||
input: input,
|
||||
tagStripped: stripped,
|
||||
declaredCharset: declaredCharset,
|
||||
byteStats: byteStats,
|
||||
hasC1Bytes: computeHasC1Bytes(byteStats),
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user