/* r_xml is based on yxml from Yoran Heling (2013-2014) */
/* License: BSD */
/* $ git clone https://g.blicky.net/yxml.git */
/* https://dev.yorhel.nl/r_xml */

#include <r_util.h>
#include <r_util/r_xml.h>

#define R_XML_IS_CHAR(c) true
/* 0xd should be part of SP, too, but r_xml_parse() already normalizes that into 0xa */
#define R_XML_IS_SP(c) ((c) == 0x20 || (c) == 0x09 || (c) == 0x0a)
#define R_XML_IS_ALPHA(c) (((c)|32)-'a' < 26)
#define R_XML_IS_NUM(c) ((c) - '0' < 10)
#define r_xml_isHex(c) (R_XML_IS_NUM(c) || ((c)|32)-'a' < 6)
#define r_xml_isEncName(c) (R_XML_IS_ALPHA(c) || R_XML_IS_NUM(c) || (c) == '.' || (c) == '_' || (c) == '-')
#define R_XML_IS_NAME_START(c) (R_XML_IS_ALPHA(c) || (c) == ':' || (c) == '_' || (c) >= 128)
#define R_XML_IS_NAME(x) (R_XML_IS_NAME_START (x) || R_XML_IS_NUM(x) || (x) == '-' || (x) == '.')
/* XXX: The valid characters are dependent on the quote char, hence the access to x->quote */
#define r_xml_isAttValue(c) (R_XML_IS_CHAR(c) && (c) != x->quote && (c) != '<' && (c) != '&')
/* Anything between '&' and ';', the r_xml_ref* functions will do further
 * validation. Strictly speaking, this is "R_XML_IS_NAME(c) || c == '#'", but
 * this parser doesn't understand entities with '.', ':', etc, anwyay.  */
#define R_XML_IS_REF(c) (R_XML_IS_NUM(c) || R_XML_IS_ALPHA (c) || (c) == '#')

#define INTFROM5CHARS(a, b, c, d, e) ((((ut64)(a))<<32) | (((ut64)(b))<<24) | (((ut64)(c))<<16) | (((ut64)(d))<<8) | (ut64)(e))


/* Set the given char value to ch (0<=ch<=255). */
static inline void r_xml_setchar(char *dest, unsigned ch) {
	*(ut8 *)dest = ch;
}

/* Similar to r_xml_setchar(), but will convert ch (any valid unicode point) to
 * UTF-8 and appends a '\0'. dest must have room for at least 5 bytes. */
static void r_xml_setutf8(char *dest, unsigned ch) {
	if (ch <= 0x007F) {
		r_xml_setchar (dest++, ch);
	} else if (ch <= 0x07FF) {
		r_xml_setchar (dest++, 0xC0 | (ch>>6));
		r_xml_setchar (dest++, 0x80 | (ch & 0x3F));
	} else if (ch <= 0xFFFF) {
		r_xml_setchar (dest++, 0xE0 | (ch>>12));
		r_xml_setchar (dest++, 0x80 | ((ch>>6) & 0x3F));
		r_xml_setchar (dest++, 0x80 | (ch & 0x3F));
	} else {
		r_xml_setchar (dest++, 0xF0 | (ch>>18));
		r_xml_setchar (dest++, 0x80 | ((ch>>12) & 0x3F));
		r_xml_setchar (dest++, 0x80 | ((ch>>6) & 0x3F));
		r_xml_setchar (dest++, 0x80 | (ch & 0x3F));
	}
	*dest = 0;
}

static inline RXmlRet r_xml_datacontent(RXml *x, unsigned ch) {
	r_xml_setchar (x->data, ch);
	x->data[1] = 0;
	return R_XML_CONTENT;
}

static inline RXmlRet r_xml_datapi1(RXml *x, unsigned ch) {
	r_xml_setchar (x->data, ch);
	x->data[1] = 0;
	return R_XML_PICONTENT;
}

static inline RXmlRet r_xml_datapi2(RXml *x, unsigned ch) {
	x->data[0] = '?';
	r_xml_setchar (x->data + 1, ch);
	x->data[2] = 0;
	return R_XML_PICONTENT;
}

static inline RXmlRet r_xml_datacd1(RXml *x, unsigned ch) {
	x->data[0] = ']';
	r_xml_setchar (x->data + 1, ch);
	x->data[2] = 0;
	return R_XML_CONTENT;
}

static inline RXmlRet r_xml_datacd2(RXml *x, unsigned ch) {
	x->data[0] = ']';
	x->data[1] = ']';
	r_xml_setchar (x->data + 2, ch);
	x->data[3] = 0;
	return R_XML_CONTENT;
}

static inline RXmlRet r_xml_dataattr(RXml *x, unsigned ch) {
	/* Normalize attribute values according to the XML spec section 3.3.3. */
	r_xml_setchar (x->data, ch == 0x9 || ch == 0xa ? 0x20 : ch);
	x->data[1] = 0;
	return R_XML_ATTRVAL;
}

static RXmlRet r_xml_pushstack(RXml *x, char **res, unsigned ch) {
	if (x->stacklen + 2 >= x->stacksize) {
		return R_XML_ESTACK;
	}
	x->stacklen++;
	*res = (char *)x->stack+x->stacklen;
	x->stack[x->stacklen] = ch;
	x->stacklen++;
	x->stack[x->stacklen] = 0;
	return R_XML_OK;
}

static RXmlRet r_xml_pushstackc(RXml *x, unsigned ch) {
	if (x->stacklen + 1 >= x->stacksize) {
		return R_XML_ESTACK;
	}
	x->stack[x->stacklen] = ch;
	x->stacklen++;
	x->stack[x->stacklen] = 0;
	return R_XML_OK;
}

static void r_xml_popstack(RXml *x) {
	do {
		x->stacklen--;
	} while (x->stack[x->stacklen]);
}

static inline RXmlRet xml_elemstart(RXml *x, unsigned ch) { return r_xml_pushstack(x, &x->elem, ch); }
static inline RXmlRet xml_elemname(RXml *x, unsigned ch) { return r_xml_pushstackc(x, ch); }
static inline RXmlRet xml_elemnameend(RXml *x, unsigned ch) { return R_XML_ELEMSTART; }

/* Also used in xml_elemcloseend (), since this function just removes the last
 * element from the stack and returns ELEMEND. */
static RXmlRet r_xml_selfclose(RXml *x, unsigned ch) {
	r_xml_popstack (x);
	if (x->stacklen) {
		x->elem = (char *)x->stack+x->stacklen-1;
		while (*(x->elem-1)) {
			x->elem--;
		}
		return R_XML_ELEMEND;
	}
	x->elem = (char *)x->stack;
	x->state = R_XML_STATE_MISC3;
	return R_XML_ELEMEND;
}

static inline RXmlRet xml_elemclose(RXml *x, unsigned ch) {
	if (*((ut8 *)x->elem) != ch) {
		return R_XML_ECLOSE;
	}
	x->elem++;
	return R_XML_OK;
}

static inline RXmlRet xml_elemcloseend (RXml *x, unsigned ch) {
	if (*x->elem) {
		return R_XML_ECLOSE;
	}
	return r_xml_selfclose (x, ch);
}

static inline RXmlRet r_xml_attrstart(RXml *x, unsigned ch) { return r_xml_pushstack(x, &x->attr, ch); }
static inline RXmlRet r_xml_attrname(RXml *x, unsigned ch) { return r_xml_pushstackc(x, ch); }
static inline RXmlRet r_xml_attrnameend(RXml *x, unsigned ch) { return R_XML_ATTRSTART; }
static inline RXmlRet r_xml_attrvalend(RXml *x, unsigned ch) { r_xml_popstack(x); return R_XML_ATTREND; }


static inline RXmlRet r_xml_pistart(RXml *x, unsigned ch) { return r_xml_pushstack(x, &x->pi, ch); }
static inline RXmlRet r_xml_piname(RXml *x, unsigned ch) { return r_xml_pushstackc(x, ch); }
static inline RXmlRet r_xml_piabort(RXml *x, unsigned ch) { r_xml_popstack(x); return R_XML_OK; }
static inline RXmlRet r_xml_pinameend(RXml *x, unsigned ch) {
	return (x->pi[0]|32) == 'x' && (x->pi[1]|32) == 'm' && (x->pi[2]|32) == 'l' && !x->pi[3] ? R_XML_ESYN : R_XML_PISTART;
}
static inline RXmlRet r_xml_pivalend(RXml *x, unsigned ch) { r_xml_popstack(x); x->pi = (char *)x->stack; return R_XML_PIEND; }

static inline RXmlRet r_xml_refstart(RXml *x, unsigned ch) {
	memset (x->data, 0, sizeof (x->data));
	x->reflen = 0;
	return R_XML_OK;
}

static RXmlRet r_xml_ref(RXml *x, unsigned ch) {
	if (x->reflen >= sizeof (x->data) - 1) {
		return R_XML_EREF;
	}
	r_xml_setchar (x->data + x->reflen, ch);
	x->reflen++;
	return R_XML_OK;
}

static RXmlRet r_xml_refend (RXml *x, RXmlRet ret) {
	ut8 *r = (ut8 *)x->data;
	unsigned ch = 0;
	if (*r == '#') {
		if (r[1] == 'x') {
			for (r += 2; r_xml_isHex((ut8)*r); r++) {
				ch = (ch<<4) + (*r <= '9' ? *r-'0' : (*r|32)-'a' + 10);
			}
		} else {
			for (r++; R_XML_IS_NUM((ut8)*r); r++) {
				ch = (ch*10) + (*r-'0');
			}
		}
		if (*r)
			ch = 0;
	} else {
		ut64 i = INTFROM5CHARS (r[0], r[1], r[2], r[3], r[4]);
		ch =
			i == INTFROM5CHARS ('l','t', 0,  0, 0) ? '<' :
			i == INTFROM5CHARS ('g','t', 0,  0, 0) ? '>' :
			i == INTFROM5CHARS ('a','m','p', 0, 0) ? '&' :
			i == INTFROM5CHARS ('a','p','o','s',0) ? '\'':
			i == INTFROM5CHARS ('q','u','o','t',0) ? '"' : 0;
	}

	/* Codepoints not allowed in the XML 1.1 definition of a Char */
	if (!ch || ch > 0x10FFFF || ch == 0xFFFE || ch == 0xFFFF || (ch-0xDFFF) < 0x7FF) {
		return R_XML_EREF;
	}
	r_xml_setutf8 (x->data, ch);
	return ret;
}

static inline RXmlRet r_xml_refcontent(RXml *x, ut8 ch) { return r_xml_refend (x, R_XML_CONTENT); }
static inline RXmlRet r_xml_refattrval(RXml *x, ut8 ch) { return r_xml_refend (x, R_XML_ATTRVAL); }

R_API void r_xml_init(RXml *x, void *stack, size_t stacksize) {
	r_return_if_fail (x);
	memset (x, 0, sizeof (*x)); // probably unnecessary
	x->line = 1;
	x->stack = (ut8*)stack;
	x->stacksize = stacksize;
	*x->stack = 0;
	x->elem = x->pi = x->attr = (char *)x->stack;
	x->state = R_XML_STATE_INIT;
}

R_API RXml *r_xml_new(int stacksize) {
	RXml *x = R_NEW (RXml);
	if (x) {
		r_xml_init (x, malloc (stacksize), stacksize);
	}
	return x;
}

R_API void r_xml_free(RXml *x) {
	if (x) {
		free (x->stack);
		free (x);
	}
}

R_API RXmlRet r_xml_parse(RXml *x, int _ch) {
	/* Ensure that characters are in the range of 0..255 rather than -126..125.
	 * All character comparisons are done with positive integers. */
	ut32 ch = (ut32)(_ch + 256) & 0xff;
	if (!ch) {
		return R_XML_ESYN;
	}
	x->total++;

	/* End-of-Line normalization, "\rX", "\r\n" and "\n" are recognized and
	 * normalized to a single '\n' as per XML 1.0 section 2.11. XML 1.1 adds
	 * some non-ASCII character sequences to this list, but we can only handle
	 * ASCII here without making assumptions about the input encoding. */
	if (x->ignore == ch) {
		x->ignore = 0;
		return R_XML_OK;
	}
	x->ignore = (ch == 0xd) * 0xa;
	if (ch == 0xa || ch == 0xd) {
		ch = 0xa;
		x->line++;
		x->byte = 0;
	}
	x->byte++;

	switch (x->state) {
	case R_XML_STATE_STRING:
		if (ch == *x->string) {
			x->string++;
			if (!*x->string) {
				x->state = x->nextstate;
			}
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_ATTR0:
		if (R_XML_IS_NAME (ch)) {
			return r_xml_attrname (x, ch);
		}
		if (R_XML_IS_SP (ch)) {
			x->state = R_XML_STATE_ATTR1;
			return r_xml_attrnameend (x, ch);
		}
		if (ch == (ut8)'=') {
			x->state = R_XML_STATE_ATTR2;
			return r_xml_attrnameend (x, ch);
		}
		break;
	case R_XML_STATE_ATTR1:
		if (R_XML_IS_SP (ch)) {
			return R_XML_OK;
		}
		if (ch == (ut8)'=') {
			x->state = R_XML_STATE_ATTR2;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_ATTR2:
		if (R_XML_IS_SP (ch)) {
			return R_XML_OK;
		}
		if (ch == (ut8)'\'' || ch == (ut8)'"') {
			x->state = R_XML_STATE_ATTR3;
			x->quote = ch;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_ATTR3:
		if (r_xml_isAttValue(ch)) {
			return r_xml_dataattr (x, ch);
		}
		if (ch == (ut8)'&') {
			x->state = R_XML_STATE_ATTR4;
			return r_xml_refstart (x, ch);
		}
		if (x->quote == ch) {
			x->state = R_XML_STATE_ELEM2;
			return r_xml_attrvalend (x, ch);
		}
		break;
	case R_XML_STATE_ATTR4:
		if (R_XML_IS_REF (ch)) {
			return r_xml_ref (x, ch);
		}
		if (ch == (ut8)'\x3b') {
			x->state = R_XML_STATE_ATTR3;
			return r_xml_refattrval (x, ch);
		}
		break;
	case R_XML_STATE_CD0:
		if (ch == (ut8)']') {
			x->state = R_XML_STATE_CD1;
			return R_XML_OK;
		}
		if (R_XML_IS_CHAR (ch)) {
			return r_xml_datacontent(x, ch);
		}
		break;
	case R_XML_STATE_CD1:
		if (ch == (ut8)']') {
			x->state = R_XML_STATE_CD2;
			return R_XML_OK;
		}
		if (R_XML_IS_CHAR (ch)) {
			x->state = R_XML_STATE_CD0;
			return r_xml_datacd1 (x, ch);
		}
		break;
	case R_XML_STATE_CD2:
		if (ch == (ut8)']') {
			return r_xml_datacontent (x, ch);
		}
		if (ch == (ut8)'>') {
			x->state = R_XML_STATE_MISC2;
			return R_XML_OK;
		}
		if (R_XML_IS_CHAR (ch)) {
			x->state = R_XML_STATE_CD0;
			return r_xml_datacd2 (x, ch);
		}
		break;
	case R_XML_STATE_COMMENT0:
		if (ch == (ut8)'-') {
			x->state = R_XML_STATE_COMMENT1;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_COMMENT1:
		if (ch == (ut8)'-') {
			x->state = R_XML_STATE_COMMENT2;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_COMMENT2:
		if (ch == (ut8)'-') {
			x->state = R_XML_STATE_COMMENT3;
			return R_XML_OK;
		}
		if (R_XML_IS_CHAR (ch)) {
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_COMMENT3:
		if (ch == (ut8)'-') {
			x->state = R_XML_STATE_COMMENT4;
			return R_XML_OK;
		}
		if (R_XML_IS_CHAR(ch)) {
			x->state = R_XML_STATE_COMMENT2;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_COMMENT4:
		if (ch == (ut8)'>') {
			x->state = x->nextstate;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_DT0:
		if (ch == (ut8)'>') {
			x->state = R_XML_STATE_MISC1;
			return R_XML_OK;
		}
		if (ch == (ut8)'\'' || ch == (ut8)'"') {
			x->state = R_XML_STATE_DT1;
			x->quote = ch;
			x->nextstate = R_XML_STATE_DT0;
			return R_XML_OK;
		}
		if (ch == (ut8)'<') {
			x->state = R_XML_STATE_DT2;
			return R_XML_OK;
		}
		if (R_XML_IS_CHAR (ch)) {
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_DT1:
		if (x->quote == ch) {
			x->state = x->nextstate;
			return R_XML_OK;
		}
		if (R_XML_IS_CHAR (ch)) {
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_DT2:
		if (ch == (ut8)'?') {
			x->state = R_XML_STATE_PI0;
			x->nextstate = R_XML_STATE_DT0;
			return R_XML_OK;
		}
		if (ch == (ut8)'!') {
			x->state = R_XML_STATE_DT3;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_DT3:
		if (ch == (ut8)'-') {
			x->state = R_XML_STATE_COMMENT1;
			x->nextstate = R_XML_STATE_DT0;
			return R_XML_OK;
		}
		if (R_XML_IS_CHAR (ch)) {
			x->state = R_XML_STATE_DT4;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_DT4:
		if (ch == (ut8)'\'' || ch == (ut8)'"') {
			x->state = R_XML_STATE_DT1;
			x->quote = ch;
			x->nextstate = R_XML_STATE_DT4;
			return R_XML_OK;
		}
		if (ch == (ut8)'>') {
			x->state = R_XML_STATE_DT0;
			return R_XML_OK;
		}
		if (R_XML_IS_CHAR (ch)) {
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_ELEM0:
		if (R_XML_IS_NAME (ch)) {
			return xml_elemname (x, ch);
		}
		if (R_XML_IS_SP (ch)) {
			x->state = R_XML_STATE_ELEM1;
			return xml_elemnameend (x, ch);
		}
		if (ch == (ut8)'/') {
			x->state = R_XML_STATE_ELEM3;
			return xml_elemnameend (x, ch);
		}
		if (ch == (ut8)'>') {
			x->state = R_XML_STATE_MISC2;
			return xml_elemnameend (x, ch);
		}
		break;
	case R_XML_STATE_ELEM1:
		if (R_XML_IS_SP (ch)) {
			return R_XML_OK;
		}
		if (ch == (ut8)'/') {
			x->state = R_XML_STATE_ELEM3;
			return R_XML_OK;
		}
		if (ch == (ut8)'>') {
			x->state = R_XML_STATE_MISC2;
			return R_XML_OK;
		}
		if (R_XML_IS_NAME_START (ch)) {
			x->state = R_XML_STATE_ATTR0;
			return r_xml_attrstart (x, ch);
		}
		break;
	case R_XML_STATE_ELEM2:
		if (R_XML_IS_SP (ch)) {
			x->state = R_XML_STATE_ELEM1;
			return R_XML_OK;
		}
		if (ch == (ut8)'/') {
			x->state = R_XML_STATE_ELEM3;
			return R_XML_OK;
		}
		if (ch == (ut8)'>') {
			x->state = R_XML_STATE_MISC2;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_ELEM3:
		if (ch == (ut8)'>') {
			x->state = R_XML_STATE_MISC2;
			return r_xml_selfclose(x, ch);
		}
		break;
	case R_XML_STATE_ENC0:
		if (R_XML_IS_SP (ch)) {
			return R_XML_OK;
		}
		if (ch == (ut8)'=') {
			x->state = R_XML_STATE_ENC1;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_ENC1:
		if (R_XML_IS_SP (ch)) {
			return R_XML_OK;
		}
		if (ch == (ut8)'\'' || ch == (ut8)'"') {
			x->state = R_XML_STATE_ENC2;
			x->quote = ch;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_ENC2:
		if (R_XML_IS_ALPHA (ch)) {
			x->state = R_XML_STATE_ENC3;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_ENC3:
		if (r_xml_isEncName (ch)) {
			return R_XML_OK;
		}
		if (x->quote == ch) {
			x->state = R_XML_STATE_XMLDECL6;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_ETAG0:
		if (R_XML_IS_NAME_START (ch)) {
			x->state = R_XML_STATE_ETAG1;
			return xml_elemclose(x, ch);
		}
		break;
	case R_XML_STATE_ETAG1:
		if (R_XML_IS_NAME (ch)) {
			return xml_elemclose(x, ch);
		}
		if (R_XML_IS_SP (ch)) {
			x->state = R_XML_STATE_ETAG2;
			return xml_elemcloseend (x, ch);
		}
		if (ch == (ut8)'>') {
			x->state = R_XML_STATE_MISC2;
			return xml_elemcloseend (x, ch);
		}
		break;
	case R_XML_STATE_ETAG2:
		if (R_XML_IS_SP (ch)) {
			return R_XML_OK;
		}
		if (ch == (ut8)'>') {
			x->state = R_XML_STATE_MISC2;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_INIT:
		if (ch == (ut8)'\xef') {
			x->state = R_XML_STATE_STRING;
			x->nextstate = R_XML_STATE_MISC0;
			x->string = (ut8 *)"\xbb\xbf";
			return R_XML_OK;
		}
		if (R_XML_IS_SP (ch)) {
			x->state = R_XML_STATE_MISC0;
			return R_XML_OK;
		}
		if (ch == (ut8)'<') {
			x->state = R_XML_STATE_le0;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_le0:
		if (ch == (ut8)'!') {
			x->state = R_XML_STATE_LEE1;
			return R_XML_OK;
		}
		if (ch == (ut8)'?') {
			x->state = R_XML_STATE_LEQ0;
			return R_XML_OK;
		}
		if (R_XML_IS_NAME_START (ch)) {
			x->state = R_XML_STATE_ELEM0;
			return xml_elemstart (x, ch);
		}
		break;
	case R_XML_STATE_le1:
		if (ch == (ut8)'!') {
			x->state = R_XML_STATE_LEE1;
			return R_XML_OK;
		}
		if (ch == (ut8)'?') {
			x->state = R_XML_STATE_PI0;
			x->nextstate = R_XML_STATE_MISC1;
			return R_XML_OK;
		}
		if (R_XML_IS_NAME_START (ch)) {
			x->state = R_XML_STATE_ELEM0;
			return xml_elemstart (x, ch);
		}
		break;
	case R_XML_STATE_le2:
		if (ch == (ut8)'!') {
			x->state = R_XML_STATE_LEE2;
			return R_XML_OK;
		}
		if (ch == (ut8)'?') {
			x->state = R_XML_STATE_PI0;
			x->nextstate = R_XML_STATE_MISC2;
			return R_XML_OK;
		}
		if (ch == (ut8)'/') {
			x->state = R_XML_STATE_ETAG0;
			return R_XML_OK;
		}
		if (R_XML_IS_NAME_START (ch)) {
			x->state = R_XML_STATE_ELEM0;
			return xml_elemstart (x, ch);
		}
		break;
	case R_XML_STATE_le3:
		if (ch == (ut8)'!') {
			x->state = R_XML_STATE_COMMENT0;
			x->nextstate = R_XML_STATE_MISC3;
			return R_XML_OK;
		}
		if (ch == (ut8)'?') {
			x->state = R_XML_STATE_PI0;
			x->nextstate = R_XML_STATE_MISC3;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_LEE1:
		if (ch == (ut8)'-') {
			x->state = R_XML_STATE_COMMENT1;
			x->nextstate = R_XML_STATE_MISC1;
			return R_XML_OK;
		}
		if (ch == (ut8)'D') {
			x->state = R_XML_STATE_STRING;
			x->nextstate = R_XML_STATE_DT0;
			x->string = (ut8 *)"OCTYPE";
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_LEE2:
		if (ch == (ut8)'-') {
			x->state = R_XML_STATE_COMMENT1;
			x->nextstate = R_XML_STATE_MISC2;
			return R_XML_OK;
		}
		if (ch == (ut8)'[') {
			x->state = R_XML_STATE_STRING;
			x->nextstate = R_XML_STATE_CD0;
			x->string = (ut8 *)"CDATA[";
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_LEQ0:
		if (ch == (ut8)'x') {
			x->state = R_XML_STATE_XMLDECL0;
			x->nextstate = R_XML_STATE_MISC1;
			return r_xml_pistart (x, ch);
		}
		if (R_XML_IS_NAME_START (ch)) {
			x->state = R_XML_STATE_PI1;
			x->nextstate = R_XML_STATE_MISC1;
			return r_xml_pistart (x, ch);
		}
		break;
	case R_XML_STATE_MISC0:
		if (R_XML_IS_SP (ch)) {
			return R_XML_OK;
		}
		if (ch == (ut8)'<') {
			x->state = R_XML_STATE_le0;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_MISC1:
		if (R_XML_IS_SP (ch)) {
			return R_XML_OK;
		}
		if (ch == (ut8)'<') {
			x->state = R_XML_STATE_le1;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_MISC2:
		if (ch == (ut8)'<') {
			x->state = R_XML_STATE_le2;
			return R_XML_OK;
		}
		if (ch == (ut8)'&') {
			x->state = R_XML_STATE_MISC2a;
			return r_xml_refstart (x, ch);
		}
		if (R_XML_IS_CHAR (ch)) {
			return r_xml_datacontent(x, ch);
		}
		break;
	case R_XML_STATE_MISC2a:
		if (R_XML_IS_REF (ch)) {
			return r_xml_ref(x, ch);
		}
		if (ch == (ut8)'\x3b') {
			x->state = R_XML_STATE_MISC2;
			return r_xml_refcontent(x, ch);
		}
		break;
	case R_XML_STATE_MISC3:
		if (R_XML_IS_SP (ch)) {
			return R_XML_OK;
		}
		if (ch == (ut8)'<') {
			x->state = R_XML_STATE_le3;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_PI0:
		if (R_XML_IS_NAME_START (ch)) {
			x->state = R_XML_STATE_PI1;
			return r_xml_pistart (x, ch);
		}
		break;
	case R_XML_STATE_PI1:
		if (R_XML_IS_NAME (ch)) {
			return r_xml_piname (x, ch);
		}
		if (ch == (ut8)'?') {
			x->state = R_XML_STATE_PI4;
			return r_xml_pinameend (x, ch);
		}
		if (R_XML_IS_SP (ch)) {
			x->state = R_XML_STATE_PI2;
			return r_xml_pinameend (x, ch);
		}
		break;
	case R_XML_STATE_PI2:
		if (ch == (ut8)'?') {
			x->state = R_XML_STATE_PI3;
			return R_XML_OK;
		}
		if (R_XML_IS_CHAR (ch)) {
			return r_xml_datapi1 (x, ch);
		}
		break;
	case R_XML_STATE_PI3:
		if (ch == (ut8)'>') {
			x->state = x->nextstate;
			return r_xml_pivalend (x, ch);
		}
		if (R_XML_IS_CHAR (ch)) {
			x->state = R_XML_STATE_PI2;
			return r_xml_datapi2(x, ch);
		}
		break;
	case R_XML_STATE_PI4:
		if (ch == (ut8)'>') {
			x->state = x->nextstate;
			return r_xml_pivalend (x, ch);
		}
		break;
	case R_XML_STATE_STD0:
		if (R_XML_IS_SP (ch)) {
			return R_XML_OK;
		}
		if (ch == (ut8)'=') {
			x->state = R_XML_STATE_STD1;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_STD1:
		if (R_XML_IS_SP (ch)) {
			return R_XML_OK;
		}
		if (ch == (ut8)'\'' || ch == (ut8)'"') {
			x->state = R_XML_STATE_STD2;
			x->quote = ch;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_STD2:
		if (ch == (ut8)'y') {
			x->state = R_XML_STATE_STRING;
			x->nextstate = R_XML_STATE_STD3;
			x->string = (ut8 *)"es";
			return R_XML_OK;
		}
		if (ch == (ut8)'n') {
			x->state = R_XML_STATE_STRING;
			x->nextstate = R_XML_STATE_STD3;
			x->string = (ut8 *)"o";
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_STD3:
		if (x->quote == ch) {
			x->state = R_XML_STATE_XMLDECL8;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_VER0:
		if (R_XML_IS_SP (ch)) {
			return R_XML_OK;
		}
		if (ch == (ut8)'=') {
			x->state = R_XML_STATE_VER1;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_VER1:
		if (R_XML_IS_SP (ch)) {
			return R_XML_OK;
		}
		if (ch == (ut8)'\'' || ch == (ut8)'"') {
			x->state = R_XML_STATE_STRING;
			x->quote = ch;
			x->nextstate = R_XML_STATE_VER2;
			x->string = (ut8 *)"1.";
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_VER2:
		if (R_XML_IS_NUM(ch)) {
			x->state = R_XML_STATE_VER3;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_VER3:
		if (R_XML_IS_NUM (ch)) {
			return R_XML_OK;
		}
		if (x->quote == ch) {
			x->state = R_XML_STATE_XMLDECL4;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_XMLDECL0:
		if (ch == (ut8)'m') {
			x->state = R_XML_STATE_XMLDECL1;
			return r_xml_piname (x, ch);
		}
		if (R_XML_IS_NAME (ch)) {
			x->state = R_XML_STATE_PI1;
			return r_xml_piname (x, ch);
		}
		if (ch == (ut8)'?') {
			x->state = R_XML_STATE_PI4;
			return r_xml_pinameend (x, ch);
		}
		if (R_XML_IS_SP (ch)) {
			x->state = R_XML_STATE_PI2;
			return r_xml_pinameend (x, ch);
		}
		break;
	case R_XML_STATE_XMLDECL1:
		if (ch == (ut8)'l') {
			x->state = R_XML_STATE_XMLDECL2;
			return r_xml_piname (x, ch);
		}
		if (R_XML_IS_NAME (ch)) {
			x->state = R_XML_STATE_PI1;
			return r_xml_piname (x, ch);
		}
		if (ch == (ut8)'?') {
			x->state = R_XML_STATE_PI4;
			return r_xml_pinameend (x, ch);
		}
		if (R_XML_IS_SP (ch)) {
			x->state = R_XML_STATE_PI2;
			return r_xml_pinameend (x, ch);
		}
		break;
	case R_XML_STATE_XMLDECL2:
		if (R_XML_IS_SP (ch)) {
			x->state = R_XML_STATE_XMLDECL3;
			return r_xml_piabort (x, ch);
		}
		if (R_XML_IS_NAME (ch)) {
			x->state = R_XML_STATE_PI1;
			return r_xml_piname (x, ch);
		}
		break;
	case R_XML_STATE_XMLDECL3:
		if (R_XML_IS_SP (ch)) {
			return R_XML_OK;
		}
		if (ch == (ut8)'v') {
			x->state = R_XML_STATE_STRING;
			x->nextstate = R_XML_STATE_VER0;
			x->string = (ut8 *)"ersion";
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_XMLDECL4:
		if (R_XML_IS_SP (ch)) {
			x->state = R_XML_STATE_XMLDECL5;
			return R_XML_OK;
		}
		if (ch == (ut8)'?') {
			x->state = R_XML_STATE_XMLDECL9;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_XMLDECL5:
		if (R_XML_IS_SP (ch)) {
			return R_XML_OK;
		}
		if (ch == (ut8)'?') {
			x->state = R_XML_STATE_XMLDECL9;
			return R_XML_OK;
		}
		if (ch == (ut8)'e') {
			x->state = R_XML_STATE_STRING;
			x->nextstate = R_XML_STATE_ENC0;
			x->string = (ut8 *)"ncoding";
			return R_XML_OK;
		}
		if (ch == (ut8)'s') {
			x->state = R_XML_STATE_STRING;
			x->nextstate = R_XML_STATE_STD0;
			x->string = (ut8 *)"tandalone";
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_XMLDECL6:
		if (R_XML_IS_SP (ch)) {
			x->state = R_XML_STATE_XMLDECL7;
			return R_XML_OK;
		}
		if (ch == (ut8)'?') {
			x->state = R_XML_STATE_XMLDECL9;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_XMLDECL7:
		if (R_XML_IS_SP (ch)) {
			return R_XML_OK;
		}
		if (ch == (ut8)'?') {
			x->state = R_XML_STATE_XMLDECL9;
			return R_XML_OK;
		}
		if (ch == (ut8)'s') {
			x->state = R_XML_STATE_STRING;
			x->nextstate = R_XML_STATE_STD0;
			x->string = (ut8 *)"tandalone";
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_XMLDECL8:
		if (R_XML_IS_SP (ch)) {
			return R_XML_OK;
		}
		if (ch == (ut8)'?') {
			x->state = R_XML_STATE_XMLDECL9;
			return R_XML_OK;
		}
		break;
	case R_XML_STATE_XMLDECL9:
		if (ch == (ut8)'>') {
			x->state = R_XML_STATE_MISC1;
			return R_XML_OK;
		}
		break;
	}
	return R_XML_ESYN;
}

R_API RXmlRet r_xml_eof(RXml *x) {
	return (x->state == R_XML_STATE_MISC3)? R_XML_OK: R_XML_EEOF;
}