diff --git a/HTMLparser.c b/HTMLparser.c index 6b83654d..dd0c1ead 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -3561,7 +3561,7 @@ htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const xmlChar *encoding) { */ processed = ctxt->input->cur - ctxt->input->base; xmlBufShrink(ctxt->input->buf->buffer, processed); - nbchars = xmlCharEncInput(ctxt->input->buf); + nbchars = xmlCharEncInput(ctxt->input->buf, 1); if (nbchars < 0) { htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, "htmlCheckEncoding: encoder error\n", @@ -6057,7 +6057,7 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size, size_t base = xmlBufGetInputBase(in->buffer, ctxt->input); size_t current = ctxt->input->cur - ctxt->input->base; - nbchars = xmlCharEncInput(in); + nbchars = xmlCharEncInput(in, terminate); if (nbchars < 0) { htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, "encoder error\n", NULL, NULL); diff --git a/enc.h b/enc.h index 91977604..057d206d 100644 --- a/enc.h +++ b/enc.h @@ -21,7 +21,7 @@ extern "C" { int xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, xmlBufferPtr in, int len); int xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len); -int xmlCharEncInput(xmlParserInputBufferPtr input); +int xmlCharEncInput(xmlParserInputBufferPtr input, int flush); int xmlCharEncOutput(xmlOutputBufferPtr output, int init); #ifdef __cplusplus diff --git a/encoding.c b/encoding.c index 7275ffdf..7330e908 100644 --- a/encoding.c +++ b/encoding.c @@ -2163,6 +2163,7 @@ xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len) /** * xmlCharEncInput: * @input: a parser input buffer + * @flush: try to flush all the raw buffer * * Generic front-end for the encoding handler on parser input * @@ -2172,7 +2173,7 @@ xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len) * the result of transformation can't fit into the encoding we want), or */ int -xmlCharEncInput(xmlParserInputBufferPtr input) +xmlCharEncInput(xmlParserInputBufferPtr input, int flush) { int ret = -2; size_t written; @@ -2191,7 +2192,7 @@ xmlCharEncInput(xmlParserInputBufferPtr input) toconv = xmlBufUse(in); if (toconv == 0) return (0); - if (toconv > 64 * 1024) + if ((toconv > 64 * 1024) && (flush == 0)) toconv = 64 * 1024; written = xmlBufAvail(out); if (written > 0) @@ -2202,7 +2203,7 @@ xmlCharEncInput(xmlParserInputBufferPtr input) if (written > 0) written--; /* count '\0' */ } - if (written > 128 * 1024) + if ((written > 128 * 1024) && (flush == 0)) written = 128 * 1024; c_in = toconv; diff --git a/parser.c b/parser.c index 1c99051f..91f8c90c 100644 --- a/parser.c +++ b/parser.c @@ -11122,9 +11122,13 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { /* * If we are operating on converted input, try to flush * remainng chars to avoid them stalling in the non-converted - * buffer. + * buffer. But do not do this in document start where + * encoding="..." may not have been read and we work on a + * guessed encoding. */ - if (xmlBufIsEmpty(ctxt->input->buf->buffer) == 0) { + if ((ctxt->instate != XML_PARSER_START) && + (ctxt->input->buf->raw != NULL) && + (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) { size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); size_t current = ctxt->input->cur - ctxt->input->base; @@ -12146,7 +12150,7 @@ xmldecl_done: size_t base = xmlBufGetInputBase(in->buffer, ctxt->input); size_t current = ctxt->input->cur - ctxt->input->base; - nbchars = xmlCharEncInput(in); + nbchars = xmlCharEncInput(in, terminate); if (nbchars < 0) { /* TODO 2.6.0 */ xmlGenericError(xmlGenericErrorContext, diff --git a/parserInternals.c b/parserInternals.c index 46760503..02032d5b 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -1201,7 +1201,7 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, /* * convert as much as possible of the buffer */ - nbchars = xmlCharEncInput(input->buf); + nbchars = xmlCharEncInput(input->buf, 1); } else { /* * convert just enough to get diff --git a/xmlIO.c b/xmlIO.c index 44254e4c..fecdae56 100644 --- a/xmlIO.c +++ b/xmlIO.c @@ -3238,7 +3238,7 @@ xmlParserInputBufferPush(xmlParserInputBufferPtr in, * convert as much as possible to the parser reading buffer. */ use = xmlBufUse(in->raw); - nbchars = xmlCharEncInput(in); + nbchars = xmlCharEncInput(in, 1); if (nbchars < 0) { xmlIOErr(XML_IO_ENCODER, NULL); in->error = XML_IO_ENCODER; @@ -3343,7 +3343,7 @@ xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) { * convert as much as possible to the parser reading buffer. */ use = xmlBufUse(in->raw); - nbchars = xmlCharEncInput(in); + nbchars = xmlCharEncInput(in, 1); if (nbchars < 0) { xmlIOErr(XML_IO_ENCODER, NULL); in->error = XML_IO_ENCODER;