Finalize interface for detector

This commit is contained in:
Sheng Yu
2012-08-15 11:46:09 -07:00
parent ae559a90d6
commit eb1e2b7f1a
3 changed files with 16 additions and 13 deletions
+4 -4
View File
@@ -10,13 +10,13 @@ type chardetTester struct {
func newChardetTester(r ...recognizer) *chardetTester {
if len(r) == 0 {
return &chardetTester{NewDetector()}
return &chardetTester{NewHtmlDetector()}
}
return &chardetTester{&Detector{r}}
return &chardetTester{&Detector{r, true}}
}
func (this *chardetTester) ExpectBest(b []byte, charset string, lang string, t *testing.T) bool {
r, err := this.d.DetectBest(b, true, "")
r, err := this.d.DetectBest(b)
if err != nil {
t.Error(err)
return false
@@ -29,7 +29,7 @@ func (this *chardetTester) ExpectBest(b []byte, charset string, lang string, t *
}
func (this *chardetTester) ExpectUnknown(b []byte, t *testing.T) bool {
r, err := this.d.DetectBest(b, true, "")
r, err := this.d.DetectBest(b)
if err == nil {
t.Errorf("Expect unknown, actual %#v", *r)
return false
+11 -6
View File
@@ -13,6 +13,7 @@ type Result struct {
type Detector struct {
recognizers []recognizer
stripTag bool
}
// List of charset recognizers
@@ -62,24 +63,28 @@ var recognizers = []recognizer{
newRecognizer_IBM420_ar_ltr(),
}
func NewDetector() *Detector {
return &Detector{recognizers}
func NewTextDetector() *Detector {
return &Detector{recognizers, false}
}
func NewHtmlDetector() *Detector {
return &Detector{recognizers, true}
}
var (
NotDetectedError = errors.New("Charset not detected.")
)
func (d *Detector) DetectBest(b []byte, stripTag bool, declaredCharset string) (r *Result, err error) {
func (d *Detector) DetectBest(b []byte) (r *Result, err error) {
var all []Result
if all, err = d.DetectAll(b, stripTag, declaredCharset); err == nil {
if all, err = d.DetectAll(b); err == nil {
r = &all[0]
}
return
}
func (d *Detector) DetectAll(b []byte, stripTag bool, declaredCharset string) ([]Result, error) {
input := newRecognizerInput(b, stripTag, declaredCharset)
func (d *Detector) DetectAll(b []byte) ([]Result, error) {
input := newRecognizerInput(b, d.stripTag)
outputChan := make(chan recognizerOutput)
for _, r := range d.recognizers {
go matchHelper(r, input, outputChan)
+1 -3
View File
@@ -10,19 +10,17 @@ type recognizerInput struct {
raw []byte
input []byte
tagStripped bool
declaredCharset string
byteStats []int
hasC1Bytes bool
}
func newRecognizerInput(raw []byte, stripTag bool, declaredCharset string) *recognizerInput {
func newRecognizerInput(raw []byte, stripTag bool) *recognizerInput {
input, stripped := mayStripInput(raw, stripTag)
byteStats := computeByteStats(input)
return &recognizerInput{
raw: raw,
input: input,
tagStripped: stripped,
declaredCharset: declaredCharset,
byteStats: byteStats,
hasC1Bytes: computeHasC1Bytes(byteStats),
}