mirror of
https://github.com/darlinghq/darling-libxml2.git
synced 2025-01-31 23:42:01 +00:00
Fix the flushing out of raw buffers on encoding conversions
https://bugzilla.gnome.org/show_bug.cgi?id=692915 the new set of converting functions tried to limit the encoding conversion of the raw buffer to the consumption one to work in a more progressive fashion. Unfortunately this was bad for performances and led to errors on progressive parsing when a very large chunk was close to the end of the document. Fix the new internal function and switch back to the old way of converting. Fix another bug in the process.
This commit is contained in:
parent
de0cc20c29
commit
bf058dce13
@ -3561,7 +3561,7 @@ htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const xmlChar *encoding) {
|
||||
*/
|
||||
processed = ctxt->input->cur - ctxt->input->base;
|
||||
xmlBufShrink(ctxt->input->buf->buffer, processed);
|
||||
nbchars = xmlCharEncInput(ctxt->input->buf);
|
||||
nbchars = xmlCharEncInput(ctxt->input->buf, 1);
|
||||
if (nbchars < 0) {
|
||||
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
|
||||
"htmlCheckEncoding: encoder error\n",
|
||||
@ -6057,7 +6057,7 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
|
||||
size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
|
||||
size_t current = ctxt->input->cur - ctxt->input->base;
|
||||
|
||||
nbchars = xmlCharEncInput(in);
|
||||
nbchars = xmlCharEncInput(in, terminate);
|
||||
if (nbchars < 0) {
|
||||
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
|
||||
"encoder error\n", NULL, NULL);
|
||||
|
2
enc.h
2
enc.h
@ -21,7 +21,7 @@ extern "C" {
|
||||
int xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
|
||||
xmlBufferPtr in, int len);
|
||||
int xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len);
|
||||
int xmlCharEncInput(xmlParserInputBufferPtr input);
|
||||
int xmlCharEncInput(xmlParserInputBufferPtr input, int flush);
|
||||
int xmlCharEncOutput(xmlOutputBufferPtr output, int init);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -2163,6 +2163,7 @@ xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
|
||||
/**
|
||||
* xmlCharEncInput:
|
||||
* @input: a parser input buffer
|
||||
* @flush: try to flush all the raw buffer
|
||||
*
|
||||
* Generic front-end for the encoding handler on parser input
|
||||
*
|
||||
@ -2172,7 +2173,7 @@ xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
|
||||
* the result of transformation can't fit into the encoding we want), or
|
||||
*/
|
||||
int
|
||||
xmlCharEncInput(xmlParserInputBufferPtr input)
|
||||
xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
|
||||
{
|
||||
int ret = -2;
|
||||
size_t written;
|
||||
@ -2191,7 +2192,7 @@ xmlCharEncInput(xmlParserInputBufferPtr input)
|
||||
toconv = xmlBufUse(in);
|
||||
if (toconv == 0)
|
||||
return (0);
|
||||
if (toconv > 64 * 1024)
|
||||
if ((toconv > 64 * 1024) && (flush == 0))
|
||||
toconv = 64 * 1024;
|
||||
written = xmlBufAvail(out);
|
||||
if (written > 0)
|
||||
@ -2202,7 +2203,7 @@ xmlCharEncInput(xmlParserInputBufferPtr input)
|
||||
if (written > 0)
|
||||
written--; /* count '\0' */
|
||||
}
|
||||
if (written > 128 * 1024)
|
||||
if ((written > 128 * 1024) && (flush == 0))
|
||||
written = 128 * 1024;
|
||||
|
||||
c_in = toconv;
|
||||
|
10
parser.c
10
parser.c
@ -11122,9 +11122,13 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
||||
/*
|
||||
* If we are operating on converted input, try to flush
|
||||
* remainng chars to avoid them stalling in the non-converted
|
||||
* buffer.
|
||||
* buffer. But do not do this in document start where
|
||||
* encoding="..." may not have been read and we work on a
|
||||
* guessed encoding.
|
||||
*/
|
||||
if (xmlBufIsEmpty(ctxt->input->buf->buffer) == 0) {
|
||||
if ((ctxt->instate != XML_PARSER_START) &&
|
||||
(ctxt->input->buf->raw != NULL) &&
|
||||
(xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
|
||||
size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
|
||||
ctxt->input);
|
||||
size_t current = ctxt->input->cur - ctxt->input->base;
|
||||
@ -12146,7 +12150,7 @@ xmldecl_done:
|
||||
size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
|
||||
size_t current = ctxt->input->cur - ctxt->input->base;
|
||||
|
||||
nbchars = xmlCharEncInput(in);
|
||||
nbchars = xmlCharEncInput(in, terminate);
|
||||
if (nbchars < 0) {
|
||||
/* TODO 2.6.0 */
|
||||
xmlGenericError(xmlGenericErrorContext,
|
||||
|
@ -1201,7 +1201,7 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
|
||||
/*
|
||||
* convert as much as possible of the buffer
|
||||
*/
|
||||
nbchars = xmlCharEncInput(input->buf);
|
||||
nbchars = xmlCharEncInput(input->buf, 1);
|
||||
} else {
|
||||
/*
|
||||
* convert just enough to get
|
||||
|
4
xmlIO.c
4
xmlIO.c
@ -3238,7 +3238,7 @@ xmlParserInputBufferPush(xmlParserInputBufferPtr in,
|
||||
* convert as much as possible to the parser reading buffer.
|
||||
*/
|
||||
use = xmlBufUse(in->raw);
|
||||
nbchars = xmlCharEncInput(in);
|
||||
nbchars = xmlCharEncInput(in, 1);
|
||||
if (nbchars < 0) {
|
||||
xmlIOErr(XML_IO_ENCODER, NULL);
|
||||
in->error = XML_IO_ENCODER;
|
||||
@ -3343,7 +3343,7 @@ xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) {
|
||||
* convert as much as possible to the parser reading buffer.
|
||||
*/
|
||||
use = xmlBufUse(in->raw);
|
||||
nbchars = xmlCharEncInput(in);
|
||||
nbchars = xmlCharEncInput(in, 1);
|
||||
if (nbchars < 0) {
|
||||
xmlIOErr(XML_IO_ENCODER, NULL);
|
||||
in->error = XML_IO_ENCODER;
|
||||
|
Loading…
x
Reference in New Issue
Block a user