Fix the flushing out of raw buffers on encoding conversions

https://bugzilla.gnome.org/show_bug.cgi?id=692915

the new set of converting functions tried to limit the encoding
conversion of the raw buffer to the consumption one to work in
a more progressive fashion. Unfortunately this was bad for
performances and led to errors on progressive parsing when
a very large chunk was close to the end of the document. Fix
the new internal function and switch back to the old way of
converting. Fix another bug in the process.
This commit is contained in:
Daniel Veillard 2013-02-13 18:19:42 +08:00
parent de0cc20c29
commit bf058dce13
6 changed files with 17 additions and 12 deletions

View File

@ -3561,7 +3561,7 @@ htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const xmlChar *encoding) {
*/
processed = ctxt->input->cur - ctxt->input->base;
xmlBufShrink(ctxt->input->buf->buffer, processed);
nbchars = xmlCharEncInput(ctxt->input->buf);
nbchars = xmlCharEncInput(ctxt->input->buf, 1);
if (nbchars < 0) {
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
"htmlCheckEncoding: encoder error\n",
@ -6057,7 +6057,7 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
size_t current = ctxt->input->cur - ctxt->input->base;
nbchars = xmlCharEncInput(in);
nbchars = xmlCharEncInput(in, terminate);
if (nbchars < 0) {
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
"encoder error\n", NULL, NULL);

2
enc.h
View File

@ -21,7 +21,7 @@ extern "C" {
int xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
xmlBufferPtr in, int len);
int xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len);
int xmlCharEncInput(xmlParserInputBufferPtr input);
int xmlCharEncInput(xmlParserInputBufferPtr input, int flush);
int xmlCharEncOutput(xmlOutputBufferPtr output, int init);
#ifdef __cplusplus

View File

@ -2163,6 +2163,7 @@ xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
/**
* xmlCharEncInput:
* @input: a parser input buffer
* @flush: try to flush all the raw buffer
*
* Generic front-end for the encoding handler on parser input
*
@ -2172,7 +2173,7 @@ xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
* the result of transformation can't fit into the encoding we want), or
*/
int
xmlCharEncInput(xmlParserInputBufferPtr input)
xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
{
int ret = -2;
size_t written;
@ -2191,7 +2192,7 @@ xmlCharEncInput(xmlParserInputBufferPtr input)
toconv = xmlBufUse(in);
if (toconv == 0)
return (0);
if (toconv > 64 * 1024)
if ((toconv > 64 * 1024) && (flush == 0))
toconv = 64 * 1024;
written = xmlBufAvail(out);
if (written > 0)
@ -2202,7 +2203,7 @@ xmlCharEncInput(xmlParserInputBufferPtr input)
if (written > 0)
written--; /* count '\0' */
}
if (written > 128 * 1024)
if ((written > 128 * 1024) && (flush == 0))
written = 128 * 1024;
c_in = toconv;

View File

@ -11122,9 +11122,13 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
/*
* If we are operating on converted input, try to flush
* remainng chars to avoid them stalling in the non-converted
* buffer.
* buffer. But do not do this in document start where
* encoding="..." may not have been read and we work on a
* guessed encoding.
*/
if (xmlBufIsEmpty(ctxt->input->buf->buffer) == 0) {
if ((ctxt->instate != XML_PARSER_START) &&
(ctxt->input->buf->raw != NULL) &&
(xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
ctxt->input);
size_t current = ctxt->input->cur - ctxt->input->base;
@ -12146,7 +12150,7 @@ xmldecl_done:
size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
size_t current = ctxt->input->cur - ctxt->input->base;
nbchars = xmlCharEncInput(in);
nbchars = xmlCharEncInput(in, terminate);
if (nbchars < 0) {
/* TODO 2.6.0 */
xmlGenericError(xmlGenericErrorContext,

View File

@ -1201,7 +1201,7 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
/*
* convert as much as possible of the buffer
*/
nbchars = xmlCharEncInput(input->buf);
nbchars = xmlCharEncInput(input->buf, 1);
} else {
/*
* convert just enough to get

View File

@ -3238,7 +3238,7 @@ xmlParserInputBufferPush(xmlParserInputBufferPtr in,
* convert as much as possible to the parser reading buffer.
*/
use = xmlBufUse(in->raw);
nbchars = xmlCharEncInput(in);
nbchars = xmlCharEncInput(in, 1);
if (nbchars < 0) {
xmlIOErr(XML_IO_ENCODER, NULL);
in->error = XML_IO_ENCODER;
@ -3343,7 +3343,7 @@ xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) {
* convert as much as possible to the parser reading buffer.
*/
use = xmlBufUse(in->raw);
nbchars = xmlCharEncInput(in);
nbchars = xmlCharEncInput(in, 1);
if (nbchars < 0) {
xmlIOErr(XML_IO_ENCODER, NULL);
in->error = XML_IO_ENCODER;