(libxml) LIBXML_ICU_ENABLED removed

This commit is contained in:
twinaphex 2012-11-17 23:22:49 +01:00
parent 7a0e128a97
commit 9871a42a3d
4 changed files with 0 additions and 602 deletions

View File

@ -148,15 +148,6 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
#define LIBXML_ICONV_ENABLED
#endif
/**
* LIBXML_ICU_ENABLED:
*
* Whether icu support is available
*/
#if 1
#define LIBXML_ICU_ENABLED
#endif
/**
* LIBXML_ISO8859X_ENABLED:
*

View File

@ -889,11 +889,7 @@ xmlHasFeature(xmlFeature feature)
return(0);
#endif
case XML_WITH_ICU:
#ifdef LIBXML_ICU_ENABLED
return(1);
#else
return(0);
#endif
default:
break;
}

View File

@ -1,559 +0,0 @@
From f1121648d0762cf9bf4e5117bfc1008447fb4080 Mon Sep 17 00:00:00 2001
From: android
Date: Thu, 1 Apr 2010 11:46:35 -0700
Subject: [PATCH] Add ICU support for libxml.
This is derived from Jungshik's patch. The encoding.c is a copy from Chrome's source,
which has one extra modification than Jungshik's patch.
Issue:2557315
Change-Id: I8e4c9e544660f3f943a15042756f7248d5afff8e
---
Android.mk | 4 +-
encoding.c | 248 +++++++++++++++++++++++++++++++++++++++++-
include/libxml/encoding.h | 29 +++++
include/libxml/parser.h | 3 +-
include/libxml/xmlversion.h | 11 ++-
parser.c | 9 ++
xmlregexp.c | 2 +-
7 files changed, 294 insertions(+), 12 deletions(-)
diff --git a/Android.mk b/Android.mk
index 3d0ede8..08bf11f 100644
--- a/Android.mk
+++ b/Android.mk
@@ -57,7 +57,7 @@ common_C_INCLUDES += \
include $(CLEAR_VARS)
LOCAL_SRC_FILES := $(common_SRC_FILES)
-LOCAL_C_INCLUDES += $(common_C_INCLUDES)
+LOCAL_C_INCLUDES += $(common_C_INCLUDES) external/icu4c/common
LOCAL_SHARED_LIBRARIES += $(common_SHARED_LIBRARIES)
LOCAL_CFLAGS += -fvisibility=hidden
@@ -71,7 +71,7 @@ include $(BUILD_STATIC_LIBRARY)
include $(CLEAR_VARS)
LOCAL_SRC_FILES := $(common_SRC_FILES)
-LOCAL_C_INCLUDES += $(common_C_INCLUDES)
+LOCAL_C_INCLUDES += $(common_C_INCLUDES) external/icu4c/common
LOCAL_SHARED_LIBRARIES += $(common_SHARED_LIBRARIES)
LOCAL_MODULE:= libxml2
include $(BUILD_HOST_STATIC_LIBRARY)
diff --git a/encoding.c b/encoding.c
index e2df797..2abc32e 100644
--- a/encoding.c
+++ b/encoding.c
@@ -58,7 +58,7 @@ static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
static int xmlCharEncodingAliasesNb = 0;
static int xmlCharEncodingAliasesMax = 0;
-#ifdef LIBXML_ICONV_ENABLED
+#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
#if 0
#define DEBUG_ENCODING /* Define this to get encoding traces */
#endif
@@ -97,6 +97,54 @@ xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
NULL, 0, val, NULL, NULL, 0, 0, msg, val);
}
+#ifdef LIBXML_ICU_ENABLED
+static uconv_t*
+openIcuConverter(const char* name, int toUnicode)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
+ if (conv == NULL)
+ return NULL;
+
+ conv->uconv = ucnv_open(name, &status);
+ if (U_FAILURE(status))
+ goto error;
+
+ status = U_ZERO_ERROR;
+ if (toUnicode) {
+ ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
+ NULL, NULL, NULL, &status);
+ }
+ else {
+ ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
+ NULL, NULL, NULL, &status);
+ }
+ if (U_FAILURE(status))
+ goto error;
+
+ status = U_ZERO_ERROR;
+ conv->utf8 = ucnv_open("UTF-8", &status);
+ if (U_SUCCESS(status))
+ return conv;
+
+error:
+ if (conv->uconv)
+ ucnv_close(conv->uconv);
+ xmlFree(conv);
+ return NULL;
+}
+
+static void
+closeIcuConverter(uconv_t *conv)
+{
+ if (conv != NULL) {
+ ucnv_close(conv->uconv);
+ ucnv_close(conv->utf8);
+ xmlFree(conv);
+ }
+}
+#endif /* LIBXML_ICU_ENABLED */
+
/************************************************************************
* *
* Conversions To/From UTF8 encoding *
@@ -1306,7 +1354,11 @@ xmlNewCharEncodingHandler(const char *name,
#ifdef LIBXML_ICONV_ENABLED
handler->iconv_in = NULL;
handler->iconv_out = NULL;
-#endif /* LIBXML_ICONV_ENABLED */
+#endif
+#ifdef LIBXML_ICU_ENABLED
+ handler->uconv_in = NULL;
+ handler->uconv_out = NULL;
+#endif
/*
* registers and returns the handler.
@@ -1371,7 +1423,7 @@ xmlInitCharEncodingHandlers(void) {
xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
#endif /* LIBXML_OUTPUT_ENABLED */
-#ifndef LIBXML_ICONV_ENABLED
+#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
#ifdef LIBXML_ISO8859X_ENABLED
xmlRegisterCharEncodingHandlersISO8859x ();
#endif
@@ -1576,6 +1628,10 @@ xmlFindCharEncodingHandler(const char *name) {
xmlCharEncodingHandlerPtr enc;
iconv_t icv_in, icv_out;
#endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+ xmlCharEncodingHandlerPtr enc;
+ uconv_t *ucv_in, *ucv_out;
+#endif /* LIBXML_ICU_ENABLED */
char upper[100];
int i;
@@ -1642,6 +1698,35 @@ xmlFindCharEncodingHandler(const char *name) {
"iconv : problems with filters for '%s'\n", name);
}
#endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+ /* check whether icu can handle this */
+ ucv_in = openIcuConverter(name, 1);
+ ucv_out = openIcuConverter(name, 0);
+ if (ucv_in != NULL && ucv_out != NULL) {
+ enc = (xmlCharEncodingHandlerPtr)
+ xmlMalloc(sizeof(xmlCharEncodingHandler));
+ if (enc == NULL) {
+ closeIcuConverter(ucv_in);
+ closeIcuConverter(ucv_out);
+ return(NULL);
+ }
+ enc->name = xmlMemStrdup(name);
+ enc->input = NULL;
+ enc->output = NULL;
+ enc->uconv_in = ucv_in;
+ enc->uconv_out = ucv_out;
+#ifdef DEBUG_ENCODING
+ xmlGenericError(xmlGenericErrorContext,
+ "Found ICU converter handler for encoding %s\n", name);
+#endif
+ return enc;
+ } else if (ucv_in != NULL || ucv_out != NULL) {
+ closeIcuConverter(ucv_in);
+ closeIcuConverter(ucv_out);
+ xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
+ "ICU converter : problems with filters for '%s'\n", name);
+ }
+#endif /* LIBXML_ICU_ENABLED */
#ifdef DEBUG_ENCODING
xmlGenericError(xmlGenericErrorContext,
@@ -1732,6 +1817,75 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
/************************************************************************
* *
+ * ICU based generic conversion functions *
+ * *
+ ************************************************************************/
+
+#ifdef LIBXML_ICU_ENABLED
+/**
+ * xmlUconvWrapper:
+ * @cd: ICU uconverter data structure
+ * @toUnicode : non-zero if toUnicode. 0 otherwise.
+ * @out: a pointer to an array of bytes to store the result
+ * @outlen: the length of @out
+ * @in: a pointer to an array of ISO Latin 1 chars
+ * @inlen: the length of @in
+ *
+ * Returns 0 if success, or
+ * -1 by lack of space, or
+ * -2 if the transcoding fails (for *in is not valid utf8 string or
+ * the result of transformation can't fit into the encoding we want), or
+ * -3 if there the last byte can't form a single output char.
+ *
+ * The value of @inlen after return is the number of octets consumed
+ * as the return value is positive, else unpredictable.
+ * The value of @outlen after return is the number of ocetes consumed.
+ */
+static int
+xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
+ const unsigned char *in, int *inlen) {
+ const char *ucv_in = (const char *) in;
+ char *ucv_out = (char *) out;
+ UErrorCode err = U_ZERO_ERROR;
+
+ if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
+ if (outlen != NULL) *outlen = 0;
+ return(-1);
+ }
+
+ /*
+ * TODO(jungshik)
+ * 1. is ucnv_convert(To|From)Algorithmic better?
+ * 2. had we better use an explicit pivot buffer?
+ * 3. error returned comes from 'fromUnicode' only even
+ * when toUnicode is true !
+ */
+ if (toUnicode) {
+ /* encoding => UTF-16 => UTF-8 */
+ ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
+ &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
+ 0, TRUE, &err);
+ } else {
+ /* UTF-8 => UTF-16 => encoding */
+ ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
+ &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
+ 0, TRUE, &err);
+ }
+ *inlen = ucv_in - (const char*) in;
+ *outlen = ucv_out - (char *) out;
+ if (U_SUCCESS(err))
+ return 0;
+ if (err == U_BUFFER_OVERFLOW_ERROR)
+ return -1;
+ if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
+ return -2;
+ /* if (err == U_TRUNCATED_CHAR_FOUND) */
+ return -3;
+}
+#endif /* LIBXML_ICU_ENABLED */
+
+/************************************************************************
+ * *
* The real API used by libxml for on-the-fly conversion *
* *
************************************************************************/
@@ -1794,6 +1948,16 @@ xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
if (ret == -1) ret = -3;
}
#endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+ else if (handler->uconv_in != NULL) {
+ ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
+ &written, in->content, &toconv);
+ xmlBufferShrink(in, toconv);
+ out->use += written;
+ out->content[out->use] = 0;
+ if (ret == -1) ret = -3;
+ }
+#endif /* LIBXML_ICU_ENABLED */
#ifdef DEBUG_ENCODING
switch (ret) {
case 0:
@@ -1879,6 +2043,17 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
ret = -3;
}
#endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+ else if (handler->uconv_in != NULL) {
+ ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
+ &written, in->content, &toconv);
+ xmlBufferShrink(in, toconv);
+ out->use += written;
+ out->content[out->use] = 0;
+ if (ret == -1)
+ ret = -3;
+ }
+#endif /* LIBXML_ICU_ENABLED */
switch (ret) {
case 0:
#ifdef DEBUG_ENCODING
@@ -1979,6 +2154,15 @@ retry:
out->content[out->use] = 0;
}
#endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+ else if (handler->uconv_out != NULL) {
+ ret = xmlUconvWrapper(handler->uconv_out, 0,
+ &out->content[out->use],
+ &written, NULL, &toconv);
+ out->use += written;
+ out->content[out->use] = 0;
+ }
+#endif /* LIBXML_ICU_ENABLED */
#ifdef DEBUG_ENCODING
xmlGenericError(xmlGenericErrorContext,
"initialized encoder\n");
@@ -2003,7 +2187,7 @@ retry:
xmlBufferShrink(in, toconv);
out->use += written;
writtentot += written;
- }
+ }
out->content[out->use] = 0;
}
#ifdef LIBXML_ICONV_ENABLED
@@ -2025,6 +2209,26 @@ retry:
}
}
#endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+ else if (handler->uconv_out != NULL) {
+ ret = xmlUconvWrapper(handler->uconv_out, 0,
+ &out->content[out->use],
+ &written, in->content, &toconv);
+ xmlBufferShrink(in, toconv);
+ out->use += written;
+ writtentot += written;
+ out->content[out->use] = 0;
+ if (ret == -1) {
+ if (written > 0) {
+ /*
+ * Can be a limitation of iconv
+ */
+ goto retry;
+ }
+ ret = -3;
+ }
+ }
+#endif /* LIBXML_ICU_ENABLED */
else {
xmlEncodingErr(XML_I18N_NO_OUTPUT,
"xmlCharEncOutFunc: no output function !\n", NULL);
@@ -2137,6 +2341,22 @@ xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
xmlFree(handler);
}
#endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+ if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
+ if (handler->name != NULL)
+ xmlFree(handler->name);
+ handler->name = NULL;
+ if (handler->uconv_out != NULL) {
+ closeIcuConverter(handler->uconv_out);
+ handler->uconv_out = NULL;
+ }
+ if (handler->uconv_in != NULL) {
+ closeIcuConverter(handler->uconv_in);
+ handler->uconv_in = NULL;
+ }
+ xmlFree(handler);
+ }
+#endif
#ifdef DEBUG_ENCODING
if (ret)
xmlGenericError(xmlGenericErrorContext,
@@ -2212,6 +2432,22 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) {
cur += toconv;
} while (ret == -2);
#endif
+#ifdef LIBXML_ICU_ENABLED
+ } else if (handler->uconv_out != NULL) {
+ do {
+ toconv = in->end - cur;
+ written = 32000;
+ ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0],
+ &written, cur, &toconv);
+ if (ret < 0) {
+ if (written > 0)
+ ret = -2;
+ else
+ return(-1);
+ }
+ unused += written;
+ cur += toconv;
+ } while (ret == -2);
} else {
/* could not find a converter */
return(-1);
@@ -2223,8 +2459,9 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) {
}
return(in->consumed + (in->cur - in->base));
}
+#endif
-#ifndef LIBXML_ICONV_ENABLED
+#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
#ifdef LIBXML_ISO8859X_ENABLED
/**
@@ -3296,4 +3533,3 @@ xmlRegisterCharEncodingHandlersISO8859x (void) {
#define bottom_encoding
#include "elfgcchack.h"
-
diff --git a/include/libxml/encoding.h b/include/libxml/encoding.h
index c74b25f..c68ec10 100644
--- a/include/libxml/encoding.h
+++ b/include/libxml/encoding.h
@@ -26,6 +26,24 @@
#ifdef LIBXML_ICONV_ENABLED
#include <iconv.h>
+#else
+#ifdef LIBXML_ICU_ENABLED
+#include <unicode/ucnv.h>
+#if 0
+/* Forward-declare UConverter here rather than pulling in <unicode/ucnv.h>
+ * to prevent unwanted ICU symbols being exposed to users of libxml2.
+ * One particular case is Qt4 conflicting on UChar32.
+ */
+#include <stdint.h>
+struct UConverter;
+typedef struct UConverter UConverter;
+#ifdef _MSC_VER
+typedef wchar_t UChar;
+#else
+typedef uint16_t UChar;
+#endif
+#endif
+#endif
#endif
#ifdef __cplusplus
extern "C" {
@@ -125,6 +143,13 @@ typedef int (* xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen,
* Block defining the handlers for non UTF-8 encodings.
* If iconv is supported, there are two extra fields.
*/
+#ifdef LIBXML_ICU_ENABLED
+struct _uconv_t {
+ UConverter *uconv; /* for conversion between an encoding and UTF-16 */
+ UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
+};
+typedef struct _uconv_t uconv_t;
+#endif
typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler;
typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr;
@@ -136,6 +161,10 @@ struct _xmlCharEncodingHandler {
iconv_t iconv_in;
iconv_t iconv_out;
#endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+ uconv_t *uconv_in;
+ uconv_t *uconv_out;
+#endif /* LIBXML_ICU_ENABLED */
};
#ifdef __cplusplus
diff --git a/include/libxml/parser.h b/include/libxml/parser.h
index 567addb..bd9de24 100644
--- a/include/libxml/parser.h
+++ b/include/libxml/parser.h
@@ -276,6 +276,7 @@ struct _xmlParserCtxt {
int nsNr; /* the number of inherited namespaces */
int nsMax; /* the size of the arrays */
const xmlChar * *nsTab; /* the array of prefix/namespace name */
+ struct _xmlParserCtxt *nsParent; /* parent context to inherit namespaces from * */
int *attallocs; /* which attribute were allocated */
void * *pushTab; /* array of data for push */
xmlHashTablePtr attsDefault; /* defaulted attributes if any */
@@ -1213,6 +1214,7 @@ typedef enum {
XML_WITH_DEBUG_MEM = 29,
XML_WITH_DEBUG_RUN = 30,
XML_WITH_ZLIB = 31,
+ XML_WITH_ICU = 32,
XML_WITH_NONE = 99999 /* just to be sure of allocation size */
} xmlFeature;
@@ -1223,4 +1225,3 @@ XMLPUBFUN int XMLCALL
}
#endif
#endif /* __XML_PARSER_H__ */
-
diff --git a/include/libxml/xmlversion.h b/include/libxml/xmlversion.h
index a98e00c..fb2b8ca 100644
--- a/include/libxml/xmlversion.h
+++ b/include/libxml/xmlversion.h
@@ -269,6 +269,15 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
#endif
/**
+ * LIBXML_ICU_ENABLED:
+ *
+ * Whether icu support is available
+ */
+#if 1
+#define LIBXML_ICU_ENABLED
+#endif
+
+/**
* LIBXML_ISO8859X_ENABLED:
*
* Whether ISO-8859-* support is made available in case iconv is not
@@ -454,5 +463,3 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
}
#endif /* __cplusplus */
#endif
-
-
diff --git a/parser.c b/parser.c
index 9db664f..306b84d 100644
--- a/parser.c
+++ b/parser.c
@@ -937,6 +937,12 @@ xmlHasFeature(xmlFeature feature)
#else
return(0);
#endif
+ case XML_WITH_ICU:
+#ifdef LIBXML_ICU_ENABLED
+ return(1);
+#else
+ return(0);
+#endif
default:
break;
}
@@ -8189,6 +8195,7 @@ xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
return(NULL);
return(ctxt->nsTab[i + 1]);
}
+ if (ctxt->nsParent) return xmlGetNamespace(ctxt->nsParent, prefix);
return(NULL);
}
@@ -12538,6 +12545,8 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
+ ctxt->nsParent = oldctxt;
+
oldsax = ctxt->sax;
ctxt->sax = oldctxt->sax;
xmlDetectSAX2(ctxt);
diff --git a/xmlregexp.c b/xmlregexp.c
index 73598a5..4258a08 100644
--- a/xmlregexp.c
+++ b/xmlregexp.c
@@ -6401,7 +6401,7 @@ xmlExpHashNameComputeKey(const xmlChar *name) {
if (name != NULL) {
value += 30 * (*name);
while ((ch = *name++) != 0) {
- value = value ^ ((value << 5) + (value >> 3) + (unsigned long)ch);
+ value = value ^ ((value << 5) + (value >> 3) + (unsigned short)ch);
}
}
return (value);
--
1.7.0.1

View File

@ -1,30 +0,0 @@
This patch fixes security problems described in issue 5533654. Since the original fixes in libxml2 includes multiple amendments and are somewhat larger in scope, we limit the fix to just this particular issue to play it safe.
The patch does what Chrome does to fix it.
Eventually, when we upgrade libxml2 library, the patch will be unnecessary.
--- a/xpath.c 2011-10-31 14:31:20.201049035 -0700
+++ b/xpath.c 2011-11-01 13:50:00.751736494 -0700
@@ -11736,11 +11736,16 @@
if ((ctxt->error != XPATH_EXPRESSION_OK) || (res == -1)) {
xmlXPathObjectPtr tmp;
- /* pop the result */
- tmp = valuePop(ctxt);
- xmlXPathReleaseObject(xpctxt, tmp);
- /* then pop off contextObj, which will be freed later */
- valuePop(ctxt);
+ /* pop the result if any */
+ tmp = valuePop(ctxt);
+ while (tmp != contextObj) {
+ /*
+ * Free up the result
+ * then pop off contextObj, which will be freed later
+ */
+ xmlXPathReleaseObject(xpctxt, tmp);
+ tmp = valuePop(ctxt);
+ }
goto evaluation_error;
}