mirror of
https://github.com/radareorg/radare2.git
synced 2024-12-01 00:51:19 +00:00
d8ceecbaa2
The R_XML_IS_*() macros assume that `ch` has more than 8 bits, so they can do clever subtraction and only perform a single comparison. While we could add casts to those macros, the 0..255 clamping logic also seems to assume there being more than 8 bits. So these things considered it seems to make the most sense to do what the original yxml code does, and use `unsigned`. It is also consistent with r_xml_refend().
1005 lines
24 KiB
C
1005 lines
24 KiB
C
/* r_xml is based on yxml from Yoran Heling (2013-2014) */
|
|
/* License: BSD */
|
|
/* $ git clone https://g.blicky.net/yxml.git */
|
|
/* https://dev.yorhel.nl/r_xml */
|
|
|
|
#include <r_util.h>
|
|
#include <r_util/r_xml.h>
|
|
|
|
#define R_XML_IS_CHAR(c) true
|
|
/* 0xd should be part of SP, too, but r_xml_parse() already normalizes that into 0xa */
|
|
#define R_XML_IS_SP(c) ((c) == 0x20 || (c) == 0x09 || (c) == 0x0a)
|
|
#define R_XML_IS_ALPHA(c) (((c)|32)-'a' < 26)
|
|
#define R_XML_IS_NUM(c) ((c) - '0' < 10)
|
|
#define r_xml_isHex(c) (R_XML_IS_NUM(c) || ((c)|32)-'a' < 6)
|
|
#define r_xml_isEncName(c) (R_XML_IS_ALPHA(c) || R_XML_IS_NUM(c) || (c) == '.' || (c) == '_' || (c) == '-')
|
|
#define R_XML_IS_NAME_START(c) (R_XML_IS_ALPHA(c) || (c) == ':' || (c) == '_' || (c) >= 128)
|
|
#define R_XML_IS_NAME(x) (R_XML_IS_NAME_START (x) || R_XML_IS_NUM(x) || (x) == '-' || (x) == '.')
|
|
/* XXX: The valid characters are dependent on the quote char, hence the access to x->quote */
|
|
#define r_xml_isAttValue(c) (R_XML_IS_CHAR(c) && (c) != x->quote && (c) != '<' && (c) != '&')
|
|
/* Anything between '&' and ';', the r_xml_ref* functions will do further
|
|
* validation. Strictly speaking, this is "R_XML_IS_NAME(c) || c == '#'", but
|
|
* this parser doesn't understand entities with '.', ':', etc, anwyay. */
|
|
#define R_XML_IS_REF(c) (R_XML_IS_NUM(c) || R_XML_IS_ALPHA (c) || (c) == '#')
|
|
|
|
#define INTFROM5CHARS(a, b, c, d, e) ((((ut64)(a))<<32) | (((ut64)(b))<<24) | (((ut64)(c))<<16) | (((ut64)(d))<<8) | (ut64)(e))
|
|
|
|
|
|
/* Set the given char value to ch (0<=ch<=255). */
|
|
static inline void r_xml_setchar(char *dest, unsigned ch) {
|
|
*(ut8 *)dest = ch;
|
|
}
|
|
|
|
/* Similar to r_xml_setchar(), but will convert ch (any valid unicode point) to
|
|
* UTF-8 and appends a '\0'. dest must have room for at least 5 bytes. */
|
|
static void r_xml_setutf8(char *dest, unsigned ch) {
|
|
if (ch <= 0x007F) {
|
|
r_xml_setchar (dest++, ch);
|
|
} else if (ch <= 0x07FF) {
|
|
r_xml_setchar (dest++, 0xC0 | (ch>>6));
|
|
r_xml_setchar (dest++, 0x80 | (ch & 0x3F));
|
|
} else if (ch <= 0xFFFF) {
|
|
r_xml_setchar (dest++, 0xE0 | (ch>>12));
|
|
r_xml_setchar (dest++, 0x80 | ((ch>>6) & 0x3F));
|
|
r_xml_setchar (dest++, 0x80 | (ch & 0x3F));
|
|
} else {
|
|
r_xml_setchar (dest++, 0xF0 | (ch>>18));
|
|
r_xml_setchar (dest++, 0x80 | ((ch>>12) & 0x3F));
|
|
r_xml_setchar (dest++, 0x80 | ((ch>>6) & 0x3F));
|
|
r_xml_setchar (dest++, 0x80 | (ch & 0x3F));
|
|
}
|
|
*dest = 0;
|
|
}
|
|
|
|
static inline RXmlRet r_xml_datacontent(RXml *x, unsigned ch) {
|
|
r_xml_setchar (x->data, ch);
|
|
x->data[1] = 0;
|
|
return R_XML_CONTENT;
|
|
}
|
|
|
|
static inline RXmlRet r_xml_datapi1(RXml *x, unsigned ch) {
|
|
r_xml_setchar (x->data, ch);
|
|
x->data[1] = 0;
|
|
return R_XML_PICONTENT;
|
|
}
|
|
|
|
static inline RXmlRet r_xml_datapi2(RXml *x, unsigned ch) {
|
|
x->data[0] = '?';
|
|
r_xml_setchar (x->data + 1, ch);
|
|
x->data[2] = 0;
|
|
return R_XML_PICONTENT;
|
|
}
|
|
|
|
static inline RXmlRet r_xml_datacd1(RXml *x, unsigned ch) {
|
|
x->data[0] = ']';
|
|
r_xml_setchar (x->data + 1, ch);
|
|
x->data[2] = 0;
|
|
return R_XML_CONTENT;
|
|
}
|
|
|
|
static inline RXmlRet r_xml_datacd2(RXml *x, unsigned ch) {
|
|
x->data[0] = ']';
|
|
x->data[1] = ']';
|
|
r_xml_setchar (x->data + 2, ch);
|
|
x->data[3] = 0;
|
|
return R_XML_CONTENT;
|
|
}
|
|
|
|
static inline RXmlRet r_xml_dataattr(RXml *x, unsigned ch) {
|
|
/* Normalize attribute values according to the XML spec section 3.3.3. */
|
|
r_xml_setchar (x->data, ch == 0x9 || ch == 0xa ? 0x20 : ch);
|
|
x->data[1] = 0;
|
|
return R_XML_ATTRVAL;
|
|
}
|
|
|
|
static RXmlRet r_xml_pushstack(RXml *x, char **res, unsigned ch) {
|
|
if (x->stacklen + 2 >= x->stacksize) {
|
|
return R_XML_ESTACK;
|
|
}
|
|
x->stacklen++;
|
|
*res = (char *)x->stack+x->stacklen;
|
|
x->stack[x->stacklen] = ch;
|
|
x->stacklen++;
|
|
x->stack[x->stacklen] = 0;
|
|
return R_XML_OK;
|
|
}
|
|
|
|
static RXmlRet r_xml_pushstackc(RXml *x, unsigned ch) {
|
|
if (x->stacklen + 1 >= x->stacksize) {
|
|
return R_XML_ESTACK;
|
|
}
|
|
x->stack[x->stacklen] = ch;
|
|
x->stacklen++;
|
|
x->stack[x->stacklen] = 0;
|
|
return R_XML_OK;
|
|
}
|
|
|
|
static void r_xml_popstack(RXml *x) {
|
|
do {
|
|
x->stacklen--;
|
|
} while (x->stack[x->stacklen]);
|
|
}
|
|
|
|
static inline RXmlRet xml_elemstart(RXml *x, unsigned ch) { return r_xml_pushstack(x, &x->elem, ch); }
|
|
static inline RXmlRet xml_elemname(RXml *x, unsigned ch) { return r_xml_pushstackc(x, ch); }
|
|
static inline RXmlRet xml_elemnameend(RXml *x, unsigned ch) { return R_XML_ELEMSTART; }
|
|
|
|
/* Also used in xml_elemcloseend (), since this function just removes the last
|
|
* element from the stack and returns ELEMEND. */
|
|
static RXmlRet r_xml_selfclose(RXml *x, unsigned ch) {
|
|
r_xml_popstack (x);
|
|
if (x->stacklen) {
|
|
x->elem = (char *)x->stack+x->stacklen-1;
|
|
while (*(x->elem-1)) {
|
|
x->elem--;
|
|
}
|
|
return R_XML_ELEMEND;
|
|
}
|
|
x->elem = (char *)x->stack;
|
|
x->state = R_XML_STATE_MISC3;
|
|
return R_XML_ELEMEND;
|
|
}
|
|
|
|
static inline RXmlRet xml_elemclose(RXml *x, unsigned ch) {
|
|
if (*((ut8 *)x->elem) != ch) {
|
|
return R_XML_ECLOSE;
|
|
}
|
|
x->elem++;
|
|
return R_XML_OK;
|
|
}
|
|
|
|
static inline RXmlRet xml_elemcloseend (RXml *x, unsigned ch) {
|
|
if (*x->elem) {
|
|
return R_XML_ECLOSE;
|
|
}
|
|
return r_xml_selfclose (x, ch);
|
|
}
|
|
|
|
static inline RXmlRet r_xml_attrstart(RXml *x, unsigned ch) { return r_xml_pushstack(x, &x->attr, ch); }
|
|
static inline RXmlRet r_xml_attrname(RXml *x, unsigned ch) { return r_xml_pushstackc(x, ch); }
|
|
static inline RXmlRet r_xml_attrnameend(RXml *x, unsigned ch) { return R_XML_ATTRSTART; }
|
|
static inline RXmlRet r_xml_attrvalend(RXml *x, unsigned ch) { r_xml_popstack(x); return R_XML_ATTREND; }
|
|
|
|
|
|
static inline RXmlRet r_xml_pistart(RXml *x, unsigned ch) { return r_xml_pushstack(x, &x->pi, ch); }
|
|
static inline RXmlRet r_xml_piname(RXml *x, unsigned ch) { return r_xml_pushstackc(x, ch); }
|
|
static inline RXmlRet r_xml_piabort(RXml *x, unsigned ch) { r_xml_popstack(x); return R_XML_OK; }
|
|
static inline RXmlRet r_xml_pinameend(RXml *x, unsigned ch) {
|
|
return (x->pi[0]|32) == 'x' && (x->pi[1]|32) == 'm' && (x->pi[2]|32) == 'l' && !x->pi[3] ? R_XML_ESYN : R_XML_PISTART;
|
|
}
|
|
static inline RXmlRet r_xml_pivalend(RXml *x, unsigned ch) { r_xml_popstack(x); x->pi = (char *)x->stack; return R_XML_PIEND; }
|
|
|
|
static inline RXmlRet r_xml_refstart(RXml *x, unsigned ch) {
|
|
memset (x->data, 0, sizeof (x->data));
|
|
x->reflen = 0;
|
|
return R_XML_OK;
|
|
}
|
|
|
|
static RXmlRet r_xml_ref(RXml *x, unsigned ch) {
|
|
if (x->reflen >= sizeof (x->data) - 1) {
|
|
return R_XML_EREF;
|
|
}
|
|
r_xml_setchar (x->data + x->reflen, ch);
|
|
x->reflen++;
|
|
return R_XML_OK;
|
|
}
|
|
|
|
static RXmlRet r_xml_refend (RXml *x, RXmlRet ret) {
|
|
ut8 *r = (ut8 *)x->data;
|
|
unsigned ch = 0;
|
|
if (*r == '#') {
|
|
if (r[1] == 'x') {
|
|
for (r += 2; r_xml_isHex((ut8)*r); r++) {
|
|
ch = (ch<<4) + (*r <= '9' ? *r-'0' : (*r|32)-'a' + 10);
|
|
}
|
|
} else {
|
|
for (r++; R_XML_IS_NUM((ut8)*r); r++) {
|
|
ch = (ch*10) + (*r-'0');
|
|
}
|
|
}
|
|
if (*r)
|
|
ch = 0;
|
|
} else {
|
|
ut64 i = INTFROM5CHARS (r[0], r[1], r[2], r[3], r[4]);
|
|
ch =
|
|
i == INTFROM5CHARS ('l','t', 0, 0, 0) ? '<' :
|
|
i == INTFROM5CHARS ('g','t', 0, 0, 0) ? '>' :
|
|
i == INTFROM5CHARS ('a','m','p', 0, 0) ? '&' :
|
|
i == INTFROM5CHARS ('a','p','o','s',0) ? '\'':
|
|
i == INTFROM5CHARS ('q','u','o','t',0) ? '"' : 0;
|
|
}
|
|
|
|
/* Codepoints not allowed in the XML 1.1 definition of a Char */
|
|
if (!ch || ch > 0x10FFFF || ch == 0xFFFE || ch == 0xFFFF || (ch-0xDFFF) < 0x7FF) {
|
|
return R_XML_EREF;
|
|
}
|
|
r_xml_setutf8 (x->data, ch);
|
|
return ret;
|
|
}
|
|
|
|
static inline RXmlRet r_xml_refcontent(RXml *x, ut8 ch) { return r_xml_refend (x, R_XML_CONTENT); }
|
|
static inline RXmlRet r_xml_refattrval(RXml *x, ut8 ch) { return r_xml_refend (x, R_XML_ATTRVAL); }
|
|
|
|
R_API void r_xml_init(RXml *x, void *stack, size_t stacksize) {
|
|
r_return_if_fail (x);
|
|
memset (x, 0, sizeof (*x)); // probably unnecessary
|
|
x->line = 1;
|
|
x->stack = (ut8*)stack;
|
|
x->stacksize = stacksize;
|
|
*x->stack = 0;
|
|
x->elem = x->pi = x->attr = (char *)x->stack;
|
|
x->state = R_XML_STATE_INIT;
|
|
}
|
|
|
|
R_API RXml *r_xml_new(int stacksize) {
|
|
RXml *x = R_NEW (RXml);
|
|
if (x) {
|
|
r_xml_init (x, malloc (stacksize), stacksize);
|
|
}
|
|
return x;
|
|
}
|
|
|
|
R_API void r_xml_free(RXml *x) {
|
|
if (x) {
|
|
free (x->stack);
|
|
free (x);
|
|
}
|
|
}
|
|
|
|
R_API RXmlRet r_xml_parse(RXml *x, int _ch) {
|
|
/* Ensure that characters are in the range of 0..255 rather than -126..125.
|
|
* All character comparisons are done with positive integers. */
|
|
ut32 ch = (ut32)(_ch + 256) & 0xff;
|
|
if (!ch) {
|
|
return R_XML_ESYN;
|
|
}
|
|
x->total++;
|
|
|
|
/* End-of-Line normalization, "\rX", "\r\n" and "\n" are recognized and
|
|
* normalized to a single '\n' as per XML 1.0 section 2.11. XML 1.1 adds
|
|
* some non-ASCII character sequences to this list, but we can only handle
|
|
* ASCII here without making assumptions about the input encoding. */
|
|
if (x->ignore == ch) {
|
|
x->ignore = 0;
|
|
return R_XML_OK;
|
|
}
|
|
x->ignore = (ch == 0xd) * 0xa;
|
|
if (ch == 0xa || ch == 0xd) {
|
|
ch = 0xa;
|
|
x->line++;
|
|
x->byte = 0;
|
|
}
|
|
x->byte++;
|
|
|
|
switch (x->state) {
|
|
case R_XML_STATE_STRING:
|
|
if (ch == *x->string) {
|
|
x->string++;
|
|
if (!*x->string) {
|
|
x->state = x->nextstate;
|
|
}
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_ATTR0:
|
|
if (R_XML_IS_NAME (ch)) {
|
|
return r_xml_attrname (x, ch);
|
|
}
|
|
if (R_XML_IS_SP (ch)) {
|
|
x->state = R_XML_STATE_ATTR1;
|
|
return r_xml_attrnameend (x, ch);
|
|
}
|
|
if (ch == (ut8)'=') {
|
|
x->state = R_XML_STATE_ATTR2;
|
|
return r_xml_attrnameend (x, ch);
|
|
}
|
|
break;
|
|
case R_XML_STATE_ATTR1:
|
|
if (R_XML_IS_SP (ch)) {
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'=') {
|
|
x->state = R_XML_STATE_ATTR2;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_ATTR2:
|
|
if (R_XML_IS_SP (ch)) {
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'\'' || ch == (ut8)'"') {
|
|
x->state = R_XML_STATE_ATTR3;
|
|
x->quote = ch;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_ATTR3:
|
|
if (r_xml_isAttValue(ch)) {
|
|
return r_xml_dataattr (x, ch);
|
|
}
|
|
if (ch == (ut8)'&') {
|
|
x->state = R_XML_STATE_ATTR4;
|
|
return r_xml_refstart (x, ch);
|
|
}
|
|
if (x->quote == ch) {
|
|
x->state = R_XML_STATE_ELEM2;
|
|
return r_xml_attrvalend (x, ch);
|
|
}
|
|
break;
|
|
case R_XML_STATE_ATTR4:
|
|
if (R_XML_IS_REF (ch)) {
|
|
return r_xml_ref (x, ch);
|
|
}
|
|
if (ch == (ut8)'\x3b') {
|
|
x->state = R_XML_STATE_ATTR3;
|
|
return r_xml_refattrval (x, ch);
|
|
}
|
|
break;
|
|
case R_XML_STATE_CD0:
|
|
if (ch == (ut8)']') {
|
|
x->state = R_XML_STATE_CD1;
|
|
return R_XML_OK;
|
|
}
|
|
if (R_XML_IS_CHAR (ch)) {
|
|
return r_xml_datacontent(x, ch);
|
|
}
|
|
break;
|
|
case R_XML_STATE_CD1:
|
|
if (ch == (ut8)']') {
|
|
x->state = R_XML_STATE_CD2;
|
|
return R_XML_OK;
|
|
}
|
|
if (R_XML_IS_CHAR (ch)) {
|
|
x->state = R_XML_STATE_CD0;
|
|
return r_xml_datacd1 (x, ch);
|
|
}
|
|
break;
|
|
case R_XML_STATE_CD2:
|
|
if (ch == (ut8)']') {
|
|
return r_xml_datacontent (x, ch);
|
|
}
|
|
if (ch == (ut8)'>') {
|
|
x->state = R_XML_STATE_MISC2;
|
|
return R_XML_OK;
|
|
}
|
|
if (R_XML_IS_CHAR (ch)) {
|
|
x->state = R_XML_STATE_CD0;
|
|
return r_xml_datacd2 (x, ch);
|
|
}
|
|
break;
|
|
case R_XML_STATE_COMMENT0:
|
|
if (ch == (ut8)'-') {
|
|
x->state = R_XML_STATE_COMMENT1;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_COMMENT1:
|
|
if (ch == (ut8)'-') {
|
|
x->state = R_XML_STATE_COMMENT2;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_COMMENT2:
|
|
if (ch == (ut8)'-') {
|
|
x->state = R_XML_STATE_COMMENT3;
|
|
return R_XML_OK;
|
|
}
|
|
if (R_XML_IS_CHAR (ch)) {
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_COMMENT3:
|
|
if (ch == (ut8)'-') {
|
|
x->state = R_XML_STATE_COMMENT4;
|
|
return R_XML_OK;
|
|
}
|
|
if (R_XML_IS_CHAR(ch)) {
|
|
x->state = R_XML_STATE_COMMENT2;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_COMMENT4:
|
|
if (ch == (ut8)'>') {
|
|
x->state = x->nextstate;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_DT0:
|
|
if (ch == (ut8)'>') {
|
|
x->state = R_XML_STATE_MISC1;
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'\'' || ch == (ut8)'"') {
|
|
x->state = R_XML_STATE_DT1;
|
|
x->quote = ch;
|
|
x->nextstate = R_XML_STATE_DT0;
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'<') {
|
|
x->state = R_XML_STATE_DT2;
|
|
return R_XML_OK;
|
|
}
|
|
if (R_XML_IS_CHAR (ch)) {
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_DT1:
|
|
if (x->quote == ch) {
|
|
x->state = x->nextstate;
|
|
return R_XML_OK;
|
|
}
|
|
if (R_XML_IS_CHAR (ch)) {
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_DT2:
|
|
if (ch == (ut8)'?') {
|
|
x->state = R_XML_STATE_PI0;
|
|
x->nextstate = R_XML_STATE_DT0;
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'!') {
|
|
x->state = R_XML_STATE_DT3;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_DT3:
|
|
if (ch == (ut8)'-') {
|
|
x->state = R_XML_STATE_COMMENT1;
|
|
x->nextstate = R_XML_STATE_DT0;
|
|
return R_XML_OK;
|
|
}
|
|
if (R_XML_IS_CHAR (ch)) {
|
|
x->state = R_XML_STATE_DT4;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_DT4:
|
|
if (ch == (ut8)'\'' || ch == (ut8)'"') {
|
|
x->state = R_XML_STATE_DT1;
|
|
x->quote = ch;
|
|
x->nextstate = R_XML_STATE_DT4;
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'>') {
|
|
x->state = R_XML_STATE_DT0;
|
|
return R_XML_OK;
|
|
}
|
|
if (R_XML_IS_CHAR (ch)) {
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_ELEM0:
|
|
if (R_XML_IS_NAME (ch)) {
|
|
return xml_elemname (x, ch);
|
|
}
|
|
if (R_XML_IS_SP (ch)) {
|
|
x->state = R_XML_STATE_ELEM1;
|
|
return xml_elemnameend (x, ch);
|
|
}
|
|
if (ch == (ut8)'/') {
|
|
x->state = R_XML_STATE_ELEM3;
|
|
return xml_elemnameend (x, ch);
|
|
}
|
|
if (ch == (ut8)'>') {
|
|
x->state = R_XML_STATE_MISC2;
|
|
return xml_elemnameend (x, ch);
|
|
}
|
|
break;
|
|
case R_XML_STATE_ELEM1:
|
|
if (R_XML_IS_SP (ch)) {
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'/') {
|
|
x->state = R_XML_STATE_ELEM3;
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'>') {
|
|
x->state = R_XML_STATE_MISC2;
|
|
return R_XML_OK;
|
|
}
|
|
if (R_XML_IS_NAME_START (ch)) {
|
|
x->state = R_XML_STATE_ATTR0;
|
|
return r_xml_attrstart (x, ch);
|
|
}
|
|
break;
|
|
case R_XML_STATE_ELEM2:
|
|
if (R_XML_IS_SP (ch)) {
|
|
x->state = R_XML_STATE_ELEM1;
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'/') {
|
|
x->state = R_XML_STATE_ELEM3;
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'>') {
|
|
x->state = R_XML_STATE_MISC2;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_ELEM3:
|
|
if (ch == (ut8)'>') {
|
|
x->state = R_XML_STATE_MISC2;
|
|
return r_xml_selfclose(x, ch);
|
|
}
|
|
break;
|
|
case R_XML_STATE_ENC0:
|
|
if (R_XML_IS_SP (ch)) {
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'=') {
|
|
x->state = R_XML_STATE_ENC1;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_ENC1:
|
|
if (R_XML_IS_SP (ch)) {
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'\'' || ch == (ut8)'"') {
|
|
x->state = R_XML_STATE_ENC2;
|
|
x->quote = ch;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_ENC2:
|
|
if (R_XML_IS_ALPHA (ch)) {
|
|
x->state = R_XML_STATE_ENC3;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_ENC3:
|
|
if (r_xml_isEncName (ch)) {
|
|
return R_XML_OK;
|
|
}
|
|
if (x->quote == ch) {
|
|
x->state = R_XML_STATE_XMLDECL6;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_ETAG0:
|
|
if (R_XML_IS_NAME_START (ch)) {
|
|
x->state = R_XML_STATE_ETAG1;
|
|
return xml_elemclose(x, ch);
|
|
}
|
|
break;
|
|
case R_XML_STATE_ETAG1:
|
|
if (R_XML_IS_NAME (ch)) {
|
|
return xml_elemclose(x, ch);
|
|
}
|
|
if (R_XML_IS_SP (ch)) {
|
|
x->state = R_XML_STATE_ETAG2;
|
|
return xml_elemcloseend (x, ch);
|
|
}
|
|
if (ch == (ut8)'>') {
|
|
x->state = R_XML_STATE_MISC2;
|
|
return xml_elemcloseend (x, ch);
|
|
}
|
|
break;
|
|
case R_XML_STATE_ETAG2:
|
|
if (R_XML_IS_SP (ch)) {
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'>') {
|
|
x->state = R_XML_STATE_MISC2;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_INIT:
|
|
if (ch == (ut8)'\xef') {
|
|
x->state = R_XML_STATE_STRING;
|
|
x->nextstate = R_XML_STATE_MISC0;
|
|
x->string = (ut8 *)"\xbb\xbf";
|
|
return R_XML_OK;
|
|
}
|
|
if (R_XML_IS_SP (ch)) {
|
|
x->state = R_XML_STATE_MISC0;
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'<') {
|
|
x->state = R_XML_STATE_le0;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_le0:
|
|
if (ch == (ut8)'!') {
|
|
x->state = R_XML_STATE_LEE1;
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'?') {
|
|
x->state = R_XML_STATE_LEQ0;
|
|
return R_XML_OK;
|
|
}
|
|
if (R_XML_IS_NAME_START (ch)) {
|
|
x->state = R_XML_STATE_ELEM0;
|
|
return xml_elemstart (x, ch);
|
|
}
|
|
break;
|
|
case R_XML_STATE_le1:
|
|
if (ch == (ut8)'!') {
|
|
x->state = R_XML_STATE_LEE1;
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'?') {
|
|
x->state = R_XML_STATE_PI0;
|
|
x->nextstate = R_XML_STATE_MISC1;
|
|
return R_XML_OK;
|
|
}
|
|
if (R_XML_IS_NAME_START (ch)) {
|
|
x->state = R_XML_STATE_ELEM0;
|
|
return xml_elemstart (x, ch);
|
|
}
|
|
break;
|
|
case R_XML_STATE_le2:
|
|
if (ch == (ut8)'!') {
|
|
x->state = R_XML_STATE_LEE2;
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'?') {
|
|
x->state = R_XML_STATE_PI0;
|
|
x->nextstate = R_XML_STATE_MISC2;
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'/') {
|
|
x->state = R_XML_STATE_ETAG0;
|
|
return R_XML_OK;
|
|
}
|
|
if (R_XML_IS_NAME_START (ch)) {
|
|
x->state = R_XML_STATE_ELEM0;
|
|
return xml_elemstart (x, ch);
|
|
}
|
|
break;
|
|
case R_XML_STATE_le3:
|
|
if (ch == (ut8)'!') {
|
|
x->state = R_XML_STATE_COMMENT0;
|
|
x->nextstate = R_XML_STATE_MISC3;
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'?') {
|
|
x->state = R_XML_STATE_PI0;
|
|
x->nextstate = R_XML_STATE_MISC3;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_LEE1:
|
|
if (ch == (ut8)'-') {
|
|
x->state = R_XML_STATE_COMMENT1;
|
|
x->nextstate = R_XML_STATE_MISC1;
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'D') {
|
|
x->state = R_XML_STATE_STRING;
|
|
x->nextstate = R_XML_STATE_DT0;
|
|
x->string = (ut8 *)"OCTYPE";
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_LEE2:
|
|
if (ch == (ut8)'-') {
|
|
x->state = R_XML_STATE_COMMENT1;
|
|
x->nextstate = R_XML_STATE_MISC2;
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'[') {
|
|
x->state = R_XML_STATE_STRING;
|
|
x->nextstate = R_XML_STATE_CD0;
|
|
x->string = (ut8 *)"CDATA[";
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_LEQ0:
|
|
if (ch == (ut8)'x') {
|
|
x->state = R_XML_STATE_XMLDECL0;
|
|
x->nextstate = R_XML_STATE_MISC1;
|
|
return r_xml_pistart (x, ch);
|
|
}
|
|
if (R_XML_IS_NAME_START (ch)) {
|
|
x->state = R_XML_STATE_PI1;
|
|
x->nextstate = R_XML_STATE_MISC1;
|
|
return r_xml_pistart (x, ch);
|
|
}
|
|
break;
|
|
case R_XML_STATE_MISC0:
|
|
if (R_XML_IS_SP (ch)) {
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'<') {
|
|
x->state = R_XML_STATE_le0;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_MISC1:
|
|
if (R_XML_IS_SP (ch)) {
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'<') {
|
|
x->state = R_XML_STATE_le1;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_MISC2:
|
|
if (ch == (ut8)'<') {
|
|
x->state = R_XML_STATE_le2;
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'&') {
|
|
x->state = R_XML_STATE_MISC2a;
|
|
return r_xml_refstart (x, ch);
|
|
}
|
|
if (R_XML_IS_CHAR (ch)) {
|
|
return r_xml_datacontent(x, ch);
|
|
}
|
|
break;
|
|
case R_XML_STATE_MISC2a:
|
|
if (R_XML_IS_REF (ch)) {
|
|
return r_xml_ref(x, ch);
|
|
}
|
|
if (ch == (ut8)'\x3b') {
|
|
x->state = R_XML_STATE_MISC2;
|
|
return r_xml_refcontent(x, ch);
|
|
}
|
|
break;
|
|
case R_XML_STATE_MISC3:
|
|
if (R_XML_IS_SP (ch)) {
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'<') {
|
|
x->state = R_XML_STATE_le3;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_PI0:
|
|
if (R_XML_IS_NAME_START (ch)) {
|
|
x->state = R_XML_STATE_PI1;
|
|
return r_xml_pistart (x, ch);
|
|
}
|
|
break;
|
|
case R_XML_STATE_PI1:
|
|
if (R_XML_IS_NAME (ch)) {
|
|
return r_xml_piname (x, ch);
|
|
}
|
|
if (ch == (ut8)'?') {
|
|
x->state = R_XML_STATE_PI4;
|
|
return r_xml_pinameend (x, ch);
|
|
}
|
|
if (R_XML_IS_SP (ch)) {
|
|
x->state = R_XML_STATE_PI2;
|
|
return r_xml_pinameend (x, ch);
|
|
}
|
|
break;
|
|
case R_XML_STATE_PI2:
|
|
if (ch == (ut8)'?') {
|
|
x->state = R_XML_STATE_PI3;
|
|
return R_XML_OK;
|
|
}
|
|
if (R_XML_IS_CHAR (ch)) {
|
|
return r_xml_datapi1 (x, ch);
|
|
}
|
|
break;
|
|
case R_XML_STATE_PI3:
|
|
if (ch == (ut8)'>') {
|
|
x->state = x->nextstate;
|
|
return r_xml_pivalend (x, ch);
|
|
}
|
|
if (R_XML_IS_CHAR (ch)) {
|
|
x->state = R_XML_STATE_PI2;
|
|
return r_xml_datapi2(x, ch);
|
|
}
|
|
break;
|
|
case R_XML_STATE_PI4:
|
|
if (ch == (ut8)'>') {
|
|
x->state = x->nextstate;
|
|
return r_xml_pivalend (x, ch);
|
|
}
|
|
break;
|
|
case R_XML_STATE_STD0:
|
|
if (R_XML_IS_SP (ch)) {
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'=') {
|
|
x->state = R_XML_STATE_STD1;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_STD1:
|
|
if (R_XML_IS_SP (ch)) {
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'\'' || ch == (ut8)'"') {
|
|
x->state = R_XML_STATE_STD2;
|
|
x->quote = ch;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_STD2:
|
|
if (ch == (ut8)'y') {
|
|
x->state = R_XML_STATE_STRING;
|
|
x->nextstate = R_XML_STATE_STD3;
|
|
x->string = (ut8 *)"es";
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'n') {
|
|
x->state = R_XML_STATE_STRING;
|
|
x->nextstate = R_XML_STATE_STD3;
|
|
x->string = (ut8 *)"o";
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_STD3:
|
|
if (x->quote == ch) {
|
|
x->state = R_XML_STATE_XMLDECL8;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_VER0:
|
|
if (R_XML_IS_SP (ch)) {
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'=') {
|
|
x->state = R_XML_STATE_VER1;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_VER1:
|
|
if (R_XML_IS_SP (ch)) {
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'\'' || ch == (ut8)'"') {
|
|
x->state = R_XML_STATE_STRING;
|
|
x->quote = ch;
|
|
x->nextstate = R_XML_STATE_VER2;
|
|
x->string = (ut8 *)"1.";
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_VER2:
|
|
if (R_XML_IS_NUM(ch)) {
|
|
x->state = R_XML_STATE_VER3;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_VER3:
|
|
if (R_XML_IS_NUM (ch)) {
|
|
return R_XML_OK;
|
|
}
|
|
if (x->quote == ch) {
|
|
x->state = R_XML_STATE_XMLDECL4;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_XMLDECL0:
|
|
if (ch == (ut8)'m') {
|
|
x->state = R_XML_STATE_XMLDECL1;
|
|
return r_xml_piname (x, ch);
|
|
}
|
|
if (R_XML_IS_NAME (ch)) {
|
|
x->state = R_XML_STATE_PI1;
|
|
return r_xml_piname (x, ch);
|
|
}
|
|
if (ch == (ut8)'?') {
|
|
x->state = R_XML_STATE_PI4;
|
|
return r_xml_pinameend (x, ch);
|
|
}
|
|
if (R_XML_IS_SP (ch)) {
|
|
x->state = R_XML_STATE_PI2;
|
|
return r_xml_pinameend (x, ch);
|
|
}
|
|
break;
|
|
case R_XML_STATE_XMLDECL1:
|
|
if (ch == (ut8)'l') {
|
|
x->state = R_XML_STATE_XMLDECL2;
|
|
return r_xml_piname (x, ch);
|
|
}
|
|
if (R_XML_IS_NAME (ch)) {
|
|
x->state = R_XML_STATE_PI1;
|
|
return r_xml_piname (x, ch);
|
|
}
|
|
if (ch == (ut8)'?') {
|
|
x->state = R_XML_STATE_PI4;
|
|
return r_xml_pinameend (x, ch);
|
|
}
|
|
if (R_XML_IS_SP (ch)) {
|
|
x->state = R_XML_STATE_PI2;
|
|
return r_xml_pinameend (x, ch);
|
|
}
|
|
break;
|
|
case R_XML_STATE_XMLDECL2:
|
|
if (R_XML_IS_SP (ch)) {
|
|
x->state = R_XML_STATE_XMLDECL3;
|
|
return r_xml_piabort (x, ch);
|
|
}
|
|
if (R_XML_IS_NAME (ch)) {
|
|
x->state = R_XML_STATE_PI1;
|
|
return r_xml_piname (x, ch);
|
|
}
|
|
break;
|
|
case R_XML_STATE_XMLDECL3:
|
|
if (R_XML_IS_SP (ch)) {
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'v') {
|
|
x->state = R_XML_STATE_STRING;
|
|
x->nextstate = R_XML_STATE_VER0;
|
|
x->string = (ut8 *)"ersion";
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_XMLDECL4:
|
|
if (R_XML_IS_SP (ch)) {
|
|
x->state = R_XML_STATE_XMLDECL5;
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'?') {
|
|
x->state = R_XML_STATE_XMLDECL9;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_XMLDECL5:
|
|
if (R_XML_IS_SP (ch)) {
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'?') {
|
|
x->state = R_XML_STATE_XMLDECL9;
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'e') {
|
|
x->state = R_XML_STATE_STRING;
|
|
x->nextstate = R_XML_STATE_ENC0;
|
|
x->string = (ut8 *)"ncoding";
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'s') {
|
|
x->state = R_XML_STATE_STRING;
|
|
x->nextstate = R_XML_STATE_STD0;
|
|
x->string = (ut8 *)"tandalone";
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_XMLDECL6:
|
|
if (R_XML_IS_SP (ch)) {
|
|
x->state = R_XML_STATE_XMLDECL7;
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'?') {
|
|
x->state = R_XML_STATE_XMLDECL9;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_XMLDECL7:
|
|
if (R_XML_IS_SP (ch)) {
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'?') {
|
|
x->state = R_XML_STATE_XMLDECL9;
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'s') {
|
|
x->state = R_XML_STATE_STRING;
|
|
x->nextstate = R_XML_STATE_STD0;
|
|
x->string = (ut8 *)"tandalone";
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_XMLDECL8:
|
|
if (R_XML_IS_SP (ch)) {
|
|
return R_XML_OK;
|
|
}
|
|
if (ch == (ut8)'?') {
|
|
x->state = R_XML_STATE_XMLDECL9;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
case R_XML_STATE_XMLDECL9:
|
|
if (ch == (ut8)'>') {
|
|
x->state = R_XML_STATE_MISC1;
|
|
return R_XML_OK;
|
|
}
|
|
break;
|
|
}
|
|
return R_XML_ESYN;
|
|
}
|
|
|
|
R_API RXmlRet r_xml_eof(RXml *x) {
|
|
return (x->state == R_XML_STATE_MISC3)? R_XML_OK: R_XML_EEOF;
|
|
}
|