Closing reported bugs: 617 1591 1592, adding an HTML parser, Daniel

This commit is contained in:
Daniel Veillard 1999-07-05 16:50:46 +00:00
parent 97fea18b71
commit be70ff7162
20 changed files with 6453 additions and 40 deletions

View File

@ -1,3 +1,11 @@
Mon Jul 5 18:45:31 CEST 1999 Daniel Veillard <Daniel.Veillard@w3.org>
* parser.c, entities.c, valid.c: cleanup bug #1591
* configure.in: cleanup bug #1592
* HTMLparser.[ch], testHTML.c: started adding an HTML parser using
the same tree back-end. Hence gdome will be available for it.
* doc/Makefile.am: close bug #617
Sat Jun 26 23:36:38 EDT 1999 Daniel Veillard <Daniel.Veillard@w3.org>
* parser.c: alloctate a per parser context SAX interface block

2372
HTMLparser.c Normal file

File diff suppressed because it is too large Load Diff

34
HTMLparser.h Normal file
View File

@ -0,0 +1,34 @@
/*
* HTMLparser.h : inf=terface for an HTML 4.0 non-verifying parser
*
* See Copyright for the status of this software.
*
* Daniel.Veillard@w3.org
*/
#ifndef __HTML_PARSER_H__
#define __HTML_PARSER_H__
#include "parser.h"
typedef xmlParserCtxt htmlParserCtxt;
typedef xmlParserCtxtPtr htmlParserCtxtPtr;
typedef xmlParserNodeInfo htmlParserNodeInfo;
typedef xmlSAXHandler htmlSAXHandler;
typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;
typedef xmlParserInput htmlParserInput;
typedef xmlParserInputPtr htmlParserInputPtr;
typedef xmlDocPtr htmlDocPtr;
typedef xmlNodePtr htmlNodePtr;
xmlEntityPtr htmlParseEntityRef(htmlParserCtxtPtr ctxt);
int htmlParseCharRef(htmlParserCtxtPtr ctxt);
void htmlParseElement(htmlParserCtxtPtr ctxt);
htmlDocPtr htmlSAXParseDoc(CHAR *cur, const char *encoding,
htmlSAXHandlerPtr sax, void *userData);
htmlDocPtr htmlParseDoc(CHAR *cur, const char *encoding);
htmlDocPtr htmlSAXParseFile(const char *filename, const char *encoding,
htmlSAXHandlerPtr sax, void *userData);
htmlDocPtr htmlParseFile(const char *filename, const char *encoding);
#endif /* __HTML_PARSER_H__ */

View File

@ -6,7 +6,7 @@ INCLUDES = -I@srcdir@ @CORBA_CFLAGS@ $(VERSION_FLAGS)
VERSION_FLAGS = -DLIBXML_VERSION=\"@LIBXML_VERSION@\"
noinst_PROGRAMS=tester testSAX
noinst_PROGRAMS=tester testSAX testHTML
bin_SCRIPTS=xml-config
@ -20,6 +20,7 @@ libxml_la_SOURCES = \
encoding.c \
error.c \
parser.c \
HTMLparser.c \
debugXML.c \
tree.c \
valid.c
@ -49,6 +50,11 @@ testSAX_LDFLAGS =
testSAX_DEPENDENCIES = $(DEPS)
testSAX_LDADD= $(LDADDS)
testHTML_SOURCES=testHTML.c
testHTML_LDFLAGS =
testHTML_DEPENDENCIES = $(DEPS)
testHTML_LDADD= $(LDADDS)
check-local: tests
testall : tests SVGtests SAXtests

67
SAX.c
View File

@ -817,6 +817,9 @@ comment(void *ctx, const CHAR *value)
xmlAddChild(ctxt->node, ret);
}
/*
* Default handler for XML, builds the DOM tree
*/
xmlSAXHandler xmlDefaultSAXHandler = {
internalSubset,
isStandalone,
@ -877,3 +880,67 @@ xmlDefaultSAXHandlerInit(void)
xmlDefaultSAXHandler.error = xmlParserError;
xmlDefaultSAXHandler.fatalError = xmlParserError;
}
/*
* Default handler for HTML, builds the DOM tree
*/
xmlSAXHandler htmlDefaultSAXHandler = {
NULL,
NULL,
NULL,
NULL,
NULL,
getEntity,
NULL,
NULL,
NULL,
NULL,
NULL,
setDocumentLocator,
startDocument,
endDocument,
startElement,
endElement,
NULL,
characters,
ignorableWhitespace,
NULL,
comment,
xmlParserWarning,
xmlParserError,
xmlParserError,
};
/**
* htmlDefaultSAXHandlerInit:
*
* Initialize the default SAX handler
*/
void
htmlDefaultSAXHandlerInit(void)
{
htmlDefaultSAXHandler.internalSubset = NULL;
htmlDefaultSAXHandler.isStandalone = NULL;
htmlDefaultSAXHandler.hasInternalSubset = NULL;
htmlDefaultSAXHandler.hasExternalSubset = NULL;
htmlDefaultSAXHandler.resolveEntity = NULL;
htmlDefaultSAXHandler.getEntity = getEntity;
htmlDefaultSAXHandler.entityDecl = NULL;
htmlDefaultSAXHandler.attributeDecl = NULL;
htmlDefaultSAXHandler.elementDecl = NULL;
htmlDefaultSAXHandler.notationDecl = NULL;
htmlDefaultSAXHandler.unparsedEntityDecl = NULL;
htmlDefaultSAXHandler.setDocumentLocator = setDocumentLocator;
htmlDefaultSAXHandler.startDocument = startDocument;
htmlDefaultSAXHandler.endDocument = endDocument;
htmlDefaultSAXHandler.startElement = startElement;
htmlDefaultSAXHandler.endElement = endElement;
htmlDefaultSAXHandler.reference = NULL;
htmlDefaultSAXHandler.characters = characters;
htmlDefaultSAXHandler.ignorableWhitespace = ignorableWhitespace;
htmlDefaultSAXHandler.processingInstruction = NULL;
htmlDefaultSAXHandler.comment = comment;
htmlDefaultSAXHandler.warning = xmlParserWarning;
htmlDefaultSAXHandler.error = xmlParserError;
htmlDefaultSAXHandler.fatalError = xmlParserError;
}

274
SAXresult/slashdot.rdf Normal file
View File

@ -0,0 +1,274 @@
SAX.setDocumentLocator()
SAX.startDocument()
SAX.startElement(rdf:RDF, xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#', xmlns='http://my.netscape.com/rdf/simple/0.9/')
SAX.characters(
<channel>
<title>Slash, 4)
SAX.startElement(channel)
SAX.characters(
<title>Slashdot:News for , 5)
SAX.startElement(title)
SAX.characters(Slashdot:News for Nerds. Stuff, 44)
SAX.endElement(title)
SAX.characters(
<link>http://slashdot.org, 5)
SAX.startElement(link)
SAX.characters(http://slashdot.org/</link>
, 20)
SAX.endElement(link)
SAX.characters(
<description>News for Ner, 5)
SAX.startElement(description)
SAX.characters(News for Nerds. Stuff that Ma, 35)
SAX.endElement(description)
SAX.characters(
</channel>
<image>
<, 3)
SAX.endElement(channel)
SAX.characters(
<image>
<title>Slashdo, 4)
SAX.startElement(image)
SAX.characters(
<title>Slashdot</title>
, 5)
SAX.startElement(title)
SAX.characters(Slashdot</title>
<url>http, 8)
SAX.endElement(title)
SAX.characters(
<url>http://slashdot.org/, 5)
SAX.startElement(url)
SAX.characters(http://slashdot.org/images/sla, 41)
SAX.endElement(url)
SAX.characters(
<link>http://slashdot.org, 5)
SAX.startElement(link)
SAX.characters(http://slashdot.org</link>
<, 19)
SAX.endElement(link)
SAX.characters(
</image>
<item>
<t, 3)
SAX.endElement(image)
SAX.characters(
<item>
<title>100 Mb, 6)
SAX.startElement(item)
SAX.characters(
<title>100 Mbit/s on Fibr, 5)
SAX.startElement(title)
SAX.characters(100 Mbit/s on Fibre to the hom, 31)
SAX.endElement(title)
SAX.characters(
<link>http://slashdot.org, 5)
SAX.startElement(link)
SAX.characters(http://slashdot.org/articles/9, 51)
SAX.endElement(link)
SAX.characters(
</item>
<item>
<ti, 3)
SAX.endElement(item)
SAX.characters(
<item>
<title>Gimp 1, 6)
SAX.startElement(item)
SAX.characters(
<title>Gimp 1.2 Preview</, 5)
SAX.startElement(title)
SAX.characters(Gimp 1.2 Preview</title>
<, 16)
SAX.endElement(title)
SAX.characters(
<link>http://slashdot.org, 5)
SAX.startElement(link)
SAX.characters(http://slashdot.org/articles/9, 51)
SAX.endElement(link)
SAX.characters(
</item>
<item>
<ti, 3)
SAX.endElement(item)
SAX.characters(
<item>
<title>Sony's, 6)
SAX.startElement(item)
SAX.characters(
<title>Sony's AIBO robot , 5)
SAX.startElement(title)
SAX.characters(Sony's AIBO robot Sold Out</ti, 26)
SAX.endElement(title)
SAX.characters(
<link>http://slashdot.org, 5)
SAX.startElement(link)
SAX.characters(http://slashdot.org/articles/9, 51)
SAX.endElement(link)
SAX.characters(
</item>
<item>
<ti, 3)
SAX.endElement(item)
SAX.characters(
<item>
<title>Ask Sl, 6)
SAX.startElement(item)
SAX.characters(
<title>Ask Slashdot: Anot, 5)
SAX.startElement(title)
SAX.characters(Ask Slashdot: Another Word for, 40)
SAX.endElement(title)
SAX.characters(
<link>http://slashdot.org, 5)
SAX.startElement(link)
SAX.characters(http://slashdot.org/askslashdo, 54)
SAX.endElement(link)
SAX.characters(
</item>
<item>
<ti, 3)
SAX.endElement(item)
SAX.characters(
<item>
<title>Corel , 6)
SAX.startElement(item)
SAX.characters(
<title>Corel Linux FAQ</t, 5)
SAX.startElement(title)
SAX.characters(Corel Linux FAQ</title>
<l, 15)
SAX.endElement(title)
SAX.characters(
<link>http://slashdot.org, 5)
SAX.startElement(link)
SAX.characters(http://slashdot.org/articles/9, 51)
SAX.endElement(link)
SAX.characters(
</item>
<item>
<ti, 3)
SAX.endElement(item)
SAX.characters(
<item>
<title>Upside, 6)
SAX.startElement(item)
SAX.characters(
<title>Upside downsides M, 5)
SAX.startElement(title)
SAX.characters(Upside downsides MP3.COM.</tit, 25)
SAX.endElement(title)
SAX.characters(
<link>http://slashdot.org, 5)
SAX.startElement(link)
SAX.characters(http://slashdot.org/articles/9, 51)
SAX.endElement(link)
SAX.characters(
</item>
<item>
<ti, 3)
SAX.endElement(item)
SAX.characters(
<item>
<title>2 Tera, 6)
SAX.startElement(item)
SAX.characters(
<title>2 Terabits of Band, 5)
SAX.startElement(title)
SAX.characters(2 Terabits of Bandwidth</title, 23)
SAX.endElement(title)
SAX.characters(
<link>http://slashdot.org, 5)
SAX.startElement(link)
SAX.characters(http://slashdot.org/articles/9, 51)
SAX.endElement(link)
SAX.characters(
</item>
<item>
<ti, 3)
SAX.endElement(item)
SAX.characters(
<item>
<title>Suppre, 6)
SAX.startElement(item)
SAX.characters(
<title>Suppression of col, 5)
SAX.startElement(title)
SAX.characters(Suppression of cold fusion res, 36)
SAX.endElement(title)
SAX.characters(
<link>http://slashdot.org, 5)
SAX.startElement(link)
SAX.characters(http://slashdot.org/articles/9, 51)
SAX.endElement(link)
SAX.characters(
</item>
<item>
<ti, 3)
SAX.endElement(item)
SAX.characters(
<item>
<title>Califo, 6)
SAX.startElement(item)
SAX.characters(
<title>California Gov. Ha, 5)
SAX.startElement(title)
SAX.characters(California Gov. Halts Wage Inf, 36)
SAX.endElement(title)
SAX.characters(
<link>http://slashdot.org, 5)
SAX.startElement(link)
SAX.characters(http://slashdot.org/articles/9, 50)
SAX.endElement(link)
SAX.characters(
</item>
<item>
<ti, 3)
SAX.endElement(item)
SAX.characters(
<item>
<title>Red Ha, 6)
SAX.startElement(item)
SAX.characters(
<title>Red Hat Announces , 5)
SAX.startElement(title)
SAX.characters(Red Hat Announces IPO</title>
, 21)
SAX.endElement(title)
SAX.characters(
<link>http://slashdot.org, 5)
SAX.startElement(link)
SAX.characters(http://slashdot.org/articles/9, 51)
SAX.endElement(link)
SAX.characters(
</item>
</rdf:RDF>, 3)
SAX.endElement(item)
SAX.characters(
</rdf:RDF>, 1)
SAX.endElement(rdf:RDF)
SAX.endDocument()

641
SAXresult/slashdot.xml Normal file
View File

@ -0,0 +1,641 @@
SAX.setDocumentLocator()
SAX.startDocument()
SAX.startElement(ultramode)
SAX.characters(
<story>
<title>100 Mbit/, 2)
SAX.startElement(story)
SAX.characters(
<title>100 Mbit/s on Fibr, 5)
SAX.startElement(title)
SAX.characters(100 Mbit/s on Fibre to the hom, 31)
SAX.endElement(title)
SAX.characters(
<url>http://slashdot.org/, 5)
SAX.startElement(url)
SAX.characters(http://slashdot.org/articles/9, 51)
SAX.endElement(url)
SAX.characters(
<time>1999-06-06 14:39:59, 5)
SAX.startElement(time)
SAX.characters(1999-06-06 14:39:59</time>
, 19)
SAX.endElement(time)
SAX.characters(
<author>CmdrTaco</author>, 5)
SAX.startElement(author)
SAX.characters(CmdrTaco</author>
<departm, 8)
SAX.endElement(author)
SAX.characters(
<department>wouldn't-it-b, 5)
SAX.startElement(department)
SAX.characters(wouldn't-it-be-nice</departmen, 19)
SAX.endElement(department)
SAX.characters(
<topic>internet</topic>
, 5)
SAX.startElement(topic)
SAX.characters(internet</topic>
<comments, 8)
SAX.endElement(topic)
SAX.characters(
<comments>20</comments>
, 5)
SAX.startElement(comments)
SAX.characters(20</comments>
<section>art, 2)
SAX.endElement(comments)
SAX.characters(
<section>articles</sectio, 5)
SAX.startElement(section)
SAX.characters(articles</section>
<image>, 8)
SAX.endElement(section)
SAX.characters(
<image>topicinternet.jpg<, 5)
SAX.startElement(image)
SAX.characters(topicinternet.jpg</image>
</, 17)
SAX.endElement(image)
SAX.characters(
</story>
<story>
<titl, 3)
SAX.endElement(story)
SAX.characters(
<story>
<title>Gimp 1.2 , 2)
SAX.startElement(story)
SAX.characters(
<title>Gimp 1.2 Preview</, 5)
SAX.startElement(title)
SAX.characters(Gimp 1.2 Preview</title>
<, 16)
SAX.endElement(title)
SAX.characters(
<url>http://slashdot.org/, 5)
SAX.startElement(url)
SAX.characters(http://slashdot.org/articles/9, 51)
SAX.endElement(url)
SAX.characters(
<time>1999-06-06 14:38:40, 5)
SAX.startElement(time)
SAX.characters(1999-06-06 14:38:40</time>
, 19)
SAX.endElement(time)
SAX.characters(
<author>CmdrTaco</author>, 5)
SAX.startElement(author)
SAX.characters(CmdrTaco</author>
<departm, 8)
SAX.endElement(author)
SAX.characters(
<department>stuff-to-read, 5)
SAX.startElement(department)
SAX.characters(stuff-to-read</department>
, 13)
SAX.endElement(department)
SAX.characters(
<topic>gimp</topic>
<, 5)
SAX.startElement(topic)
SAX.characters(gimp</topic>
<comments>12<, 4)
SAX.endElement(topic)
SAX.characters(
<comments>12</comments>
, 5)
SAX.startElement(comments)
SAX.characters(12</comments>
<section>art, 2)
SAX.endElement(comments)
SAX.characters(
<section>articles</sectio, 5)
SAX.startElement(section)
SAX.characters(articles</section>
<image>, 8)
SAX.endElement(section)
SAX.characters(
<image>topicgimp.gif</ima, 5)
SAX.startElement(image)
SAX.characters(topicgimp.gif</image>
</stor, 13)
SAX.endElement(image)
SAX.characters(
</story>
<story>
<titl, 3)
SAX.endElement(story)
SAX.characters(
<story>
<title>Sony's AI, 2)
SAX.startElement(story)
SAX.characters(
<title>Sony's AIBO robot , 5)
SAX.startElement(title)
SAX.characters(Sony's AIBO robot Sold Out</ti, 26)
SAX.endElement(title)
SAX.characters(
<url>http://slashdot.org/, 5)
SAX.startElement(url)
SAX.characters(http://slashdot.org/articles/9, 51)
SAX.endElement(url)
SAX.characters(
<time>1999-06-06 14:32:51, 5)
SAX.startElement(time)
SAX.characters(1999-06-06 14:32:51</time>
, 19)
SAX.endElement(time)
SAX.characters(
<author>CmdrTaco</author>, 5)
SAX.startElement(author)
SAX.characters(CmdrTaco</author>
<departm, 8)
SAX.endElement(author)
SAX.characters(
<department>stuff-to-see<, 5)
SAX.startElement(department)
SAX.characters(stuff-to-see</department>
, 12)
SAX.endElement(department)
SAX.characters(
<topic>tech</topic>
<, 5)
SAX.startElement(topic)
SAX.characters(tech</topic>
<comments>10<, 4)
SAX.endElement(topic)
SAX.characters(
<comments>10</comments>
, 5)
SAX.startElement(comments)
SAX.characters(10</comments>
<section>art, 2)
SAX.endElement(comments)
SAX.characters(
<section>articles</sectio, 5)
SAX.startElement(section)
SAX.characters(articles</section>
<image>, 8)
SAX.endElement(section)
SAX.characters(
<image>topictech2.jpg</im, 5)
SAX.startElement(image)
SAX.characters(topictech2.jpg</image>
</sto, 14)
SAX.endElement(image)
SAX.characters(
</story>
<story>
<titl, 3)
SAX.endElement(story)
SAX.characters(
<story>
<title>Ask Slash, 2)
SAX.startElement(story)
SAX.characters(
<title>Ask Slashdot: Anot, 5)
SAX.startElement(title)
SAX.characters(Ask Slashdot: Another Word for, 40)
SAX.endElement(title)
SAX.characters(
<url>http://slashdot.org/, 5)
SAX.startElement(url)
SAX.characters(http://slashdot.org/askslashdo, 54)
SAX.endElement(url)
SAX.characters(
<time>1999-06-05 20:00:00, 5)
SAX.startElement(time)
SAX.characters(1999-06-05 20:00:00</time>
, 19)
SAX.endElement(time)
SAX.characters(
<author>Cliff</author>
, 5)
SAX.startElement(author)
SAX.characters(Cliff</author>
<department, 5)
SAX.endElement(author)
SAX.characters(
<department>hacker-vs-cra, 5)
SAX.startElement(department)
SAX.characters(hacker-vs-cracker</department>, 17)
SAX.endElement(department)
SAX.characters(
<topic>news</topic>
<, 5)
SAX.startElement(topic)
SAX.characters(news</topic>
<comments>385, 4)
SAX.endElement(topic)
SAX.characters(
<comments>385</comments>
, 5)
SAX.startElement(comments)
SAX.characters(385</comments>
<section>as, 3)
SAX.endElement(comments)
SAX.characters(
<section>askslashdot</sec, 5)
SAX.startElement(section)
SAX.characters(askslashdot</section>
<ima, 11)
SAX.endElement(section)
SAX.characters(
<image>topicnews.gif</ima, 5)
SAX.startElement(image)
SAX.characters(topicnews.gif</image>
</stor, 13)
SAX.endElement(image)
SAX.characters(
</story>
<story>
<titl, 3)
SAX.endElement(story)
SAX.characters(
<story>
<title>Corel Lin, 2)
SAX.startElement(story)
SAX.characters(
<title>Corel Linux FAQ</t, 5)
SAX.startElement(title)
SAX.characters(Corel Linux FAQ</title>
<u, 15)
SAX.endElement(title)
SAX.characters(
<url>http://slashdot.org/, 5)
SAX.startElement(url)
SAX.characters(http://slashdot.org/articles/9, 51)
SAX.endElement(url)
SAX.characters(
<time>1999-06-05 18:42:06, 5)
SAX.startElement(time)
SAX.characters(1999-06-05 18:42:06</time>
, 19)
SAX.endElement(time)
SAX.characters(
<author>CmdrTaco</author>, 5)
SAX.startElement(author)
SAX.characters(CmdrTaco</author>
<departm, 8)
SAX.endElement(author)
SAX.characters(
<department>stuff-to-read, 5)
SAX.startElement(department)
SAX.characters(stuff-to-read</department>
, 13)
SAX.endElement(department)
SAX.characters(
<topic>corel</topic>
, 5)
SAX.startElement(topic)
SAX.characters(corel</topic>
<comments>16, 5)
SAX.endElement(topic)
SAX.characters(
<comments>164</comments>
, 5)
SAX.startElement(comments)
SAX.characters(164</comments>
<section>ar, 3)
SAX.endElement(comments)
SAX.characters(
<section>articles</sectio, 5)
SAX.startElement(section)
SAX.characters(articles</section>
<image>, 8)
SAX.endElement(section)
SAX.characters(
<image>topiccorel.gif</im, 5)
SAX.startElement(image)
SAX.characters(topiccorel.gif</image>
</sto, 14)
SAX.endElement(image)
SAX.characters(
</story>
<story>
<titl, 3)
SAX.endElement(story)
SAX.characters(
<story>
<title>Upside do, 2)
SAX.startElement(story)
SAX.characters(
<title>Upside downsides M, 5)
SAX.startElement(title)
SAX.characters(Upside downsides MP3.COM.</tit, 25)
SAX.endElement(title)
SAX.characters(
<url>http://slashdot.org/, 5)
SAX.startElement(url)
SAX.characters(http://slashdot.org/articles/9, 51)
SAX.endElement(url)
SAX.characters(
<time>1999-06-05 15:56:45, 5)
SAX.startElement(time)
SAX.characters(1999-06-05 15:56:45</time>
, 19)
SAX.endElement(time)
SAX.characters(
<author>CmdrTaco</author>, 5)
SAX.startElement(author)
SAX.characters(CmdrTaco</author>
<departm, 8)
SAX.endElement(author)
SAX.characters(
<department>stuff-to-thin, 5)
SAX.startElement(department)
SAX.characters(stuff-to-think-about</departme, 20)
SAX.endElement(department)
SAX.characters(
<topic>music</topic>
, 5)
SAX.startElement(topic)
SAX.characters(music</topic>
<comments>48, 5)
SAX.endElement(topic)
SAX.characters(
<comments>48</comments>
, 5)
SAX.startElement(comments)
SAX.characters(48</comments>
<section>art, 2)
SAX.endElement(comments)
SAX.characters(
<section>articles</sectio, 5)
SAX.startElement(section)
SAX.characters(articles</section>
<image>, 8)
SAX.endElement(section)
SAX.characters(
<image>topicmusic.gif</im, 5)
SAX.startElement(image)
SAX.characters(topicmusic.gif</image>
</sto, 14)
SAX.endElement(image)
SAX.characters(
</story>
<story>
<titl, 3)
SAX.endElement(story)
SAX.characters(
<story>
<title>2 Terabit, 2)
SAX.startElement(story)
SAX.characters(
<title>2 Terabits of Band, 5)
SAX.startElement(title)
SAX.characters(2 Terabits of Bandwidth</title, 23)
SAX.endElement(title)
SAX.characters(
<url>http://slashdot.org/, 5)
SAX.startElement(url)
SAX.characters(http://slashdot.org/articles/9, 51)
SAX.endElement(url)
SAX.characters(
<time>1999-06-05 15:53:43, 5)
SAX.startElement(time)
SAX.characters(1999-06-05 15:53:43</time>
, 19)
SAX.endElement(time)
SAX.characters(
<author>CmdrTaco</author>, 5)
SAX.startElement(author)
SAX.characters(CmdrTaco</author>
<departm, 8)
SAX.endElement(author)
SAX.characters(
<department>faster-porn</, 5)
SAX.startElement(department)
SAX.characters(faster-porn</department>
<, 11)
SAX.endElement(department)
SAX.characters(
<topic>internet</topic>
, 5)
SAX.startElement(topic)
SAX.characters(internet</topic>
<comments, 8)
SAX.endElement(topic)
SAX.characters(
<comments>66</comments>
, 5)
SAX.startElement(comments)
SAX.characters(66</comments>
<section>art, 2)
SAX.endElement(comments)
SAX.characters(
<section>articles</sectio, 5)
SAX.startElement(section)
SAX.characters(articles</section>
<image>, 8)
SAX.endElement(section)
SAX.characters(
<image>topicinternet.jpg<, 5)
SAX.startElement(image)
SAX.characters(topicinternet.jpg</image>
</, 17)
SAX.endElement(image)
SAX.characters(
</story>
<story>
<titl, 3)
SAX.endElement(story)
SAX.characters(
<story>
<title>Suppressi, 2)
SAX.startElement(story)
SAX.characters(
<title>Suppression of col, 5)
SAX.startElement(title)
SAX.characters(Suppression of cold fusion res, 36)
SAX.endElement(title)
SAX.characters(
<url>http://slashdot.org/, 5)
SAX.startElement(url)
SAX.characters(http://slashdot.org/articles/9, 51)
SAX.endElement(url)
SAX.characters(
<time>1999-06-04 23:12:29, 5)
SAX.startElement(time)
SAX.characters(1999-06-04 23:12:29</time>
, 19)
SAX.endElement(time)
SAX.characters(
<author>Hemos</author>
, 5)
SAX.startElement(author)
SAX.characters(Hemos</author>
<department, 5)
SAX.endElement(author)
SAX.characters(
<department>possibly-prob, 5)
SAX.startElement(department)
SAX.characters(possibly-probably</department>, 17)
SAX.endElement(department)
SAX.characters(
<topic>science</topic>
, 5)
SAX.startElement(topic)
SAX.characters(science</topic>
<comments>, 7)
SAX.endElement(topic)
SAX.characters(
<comments>217</comments>
, 5)
SAX.startElement(comments)
SAX.characters(217</comments>
<section>ar, 3)
SAX.endElement(comments)
SAX.characters(
<section>articles</sectio, 5)
SAX.startElement(section)
SAX.characters(articles</section>
<image>, 8)
SAX.endElement(section)
SAX.characters(
<image>topicscience.gif</, 5)
SAX.startElement(image)
SAX.characters(topicscience.gif</image>
</s, 16)
SAX.endElement(image)
SAX.characters(
</story>
<story>
<titl, 3)
SAX.endElement(story)
SAX.characters(
<story>
<title>Californi, 2)
SAX.startElement(story)
SAX.characters(
<title>California Gov. Ha, 5)
SAX.startElement(title)
SAX.characters(California Gov. Halts Wage Inf, 36)
SAX.endElement(title)
SAX.characters(
<url>http://slashdot.org/, 5)
SAX.startElement(url)
SAX.characters(http://slashdot.org/articles/9, 50)
SAX.endElement(url)
SAX.characters(
<time>1999-06-04 23:05:34, 5)
SAX.startElement(time)
SAX.characters(1999-06-04 23:05:34</time>
, 19)
SAX.endElement(time)
SAX.characters(
<author>Hemos</author>
, 5)
SAX.startElement(author)
SAX.characters(Hemos</author>
<department, 5)
SAX.endElement(author)
SAX.characters(
<department>woo-hoo!</dep, 5)
SAX.startElement(department)
SAX.characters(woo-hoo!</department>
<top, 8)
SAX.endElement(department)
SAX.characters(
<topic>usa</topic>
<c, 5)
SAX.startElement(topic)
SAX.characters(usa</topic>
<comments>16</, 3)
SAX.endElement(topic)
SAX.characters(
<comments>16</comments>
, 5)
SAX.startElement(comments)
SAX.characters(16</comments>
<section>art, 2)
SAX.endElement(comments)
SAX.characters(
<section>articles</sectio, 5)
SAX.startElement(section)
SAX.characters(articles</section>
<image>, 8)
SAX.endElement(section)
SAX.characters(
<image>topicus.gif</image, 5)
SAX.startElement(image)
SAX.characters(topicus.gif</image>
</story>, 11)
SAX.endElement(image)
SAX.characters(
</story>
<story>
<titl, 3)
SAX.endElement(story)
SAX.characters(
<story>
<title>Red Hat A, 2)
SAX.startElement(story)
SAX.characters(
<title>Red Hat Announces , 5)
SAX.startElement(title)
SAX.characters(Red Hat Announces IPO</title>
, 21)
SAX.endElement(title)
SAX.characters(
<url>http://slashdot.org/, 5)
SAX.startElement(url)
SAX.characters(http://slashdot.org/articles/9, 51)
SAX.endElement(url)
SAX.characters(
<time>1999-06-04 19:30:18, 5)
SAX.startElement(time)
SAX.characters(1999-06-04 19:30:18</time>
, 19)
SAX.endElement(time)
SAX.characters(
<author>Justin</author>
, 5)
SAX.startElement(author)
SAX.characters(Justin</author>
<departmen, 6)
SAX.endElement(author)
SAX.characters(
<department>details-sketc, 5)
SAX.startElement(department)
SAX.characters(details-sketchy</department>
, 15)
SAX.endElement(department)
SAX.characters(
<topic>redhat</topic>
, 5)
SAX.startElement(topic)
SAX.characters(redhat</topic>
<comments>1, 6)
SAX.endElement(topic)
SAX.characters(
<comments>155</comments>
, 5)
SAX.startElement(comments)
SAX.characters(155</comments>
<section>ar, 3)
SAX.endElement(comments)
SAX.characters(
<section>articles</sectio, 5)
SAX.startElement(section)
SAX.characters(articles</section>
<image>, 8)
SAX.endElement(section)
SAX.characters(
<image>topicredhat.gif</i, 5)
SAX.startElement(image)
SAX.characters(topicredhat.gif</image>
</st, 15)
SAX.endElement(image)
SAX.characters(
</story>
</ultramode>
, 3)
SAX.endElement(story)
SAX.characters(
</ultramode>
, 1)
SAX.endElement(ultramode)
SAX.endDocument()

2816
SAXresult/svg3 Normal file

File diff suppressed because one or more lines are too long

View File

@ -55,7 +55,7 @@ fi
AC_SUBST(HTML_DIR)
XML_LIBDIR='-L${libdir}'
XML_INCLUDEDIR='-I${includedir}'
XML_INCLUDEDIR='-I${includedir}/gnome-xml'
XML_LIBS="-lxml $Z_LIBS"
AC_SUBST(XML_LIBDIR)

View File

@ -46,10 +46,10 @@ gnome-xml-sections.txt : scan
rebuild: gnome-xml-sections.txt templates sgml html
install-data-local:
install -d -m 0755 $(TARGET_DIR)
-install -m 0644 $(srcdir)/xml.html $(srcdir)/structure.gif $(srcdir)/DOM.gif $(TARGET_DIR)
-install -m 0644 $(srcdir)/html/*.html $(TARGET_DIR)
-install -m 0644 $(srcdir)/html/index.sgml $(TARGET_DIR)
@INSTALL@ -d -m 0755 $(TARGET_DIR)
-@INSTALL@ -m 0644 $(srcdir)/xml.html $(srcdir)/structure.gif $(srcdir)/DOM.gif $(TARGET_DIR)
-@INSTALL@ -m 0644 $(srcdir)/html/*.html $(TARGET_DIR)
-@INSTALL@ -m 0644 $(srcdir)/html/index.sgml $(TARGET_DIR)
-gtkdoc-fixxref --module=$(DOC_MODULE) --html-dir=$(HTML_DIR)
dist-hook:

View File

@ -570,8 +570,8 @@ xmlCreateEntitiesTable(void) {
ret = (xmlEntitiesTablePtr)
malloc(sizeof(xmlEntitiesTable));
if (ret == NULL) {
fprintf(stderr, "xmlCreateEntitiesTable : malloc(%d) failed\n",
sizeof(xmlEntitiesTable));
fprintf(stderr, "xmlCreateEntitiesTable : malloc(%ld) failed\n",
(long)sizeof(xmlEntitiesTable));
return(NULL);
}
ret->max_entities = XML_MIN_ENTITIES_TABLE;
@ -579,8 +579,8 @@ xmlCreateEntitiesTable(void) {
ret->table = (xmlEntityPtr )
malloc(ret->max_entities * sizeof(xmlEntity));
if (ret == NULL) {
fprintf(stderr, "xmlCreateEntitiesTable : malloc(%d) failed\n",
ret->max_entities * sizeof(xmlEntity));
fprintf(stderr, "xmlCreateEntitiesTable : malloc(%ld) failed\n",
ret->max_entities * (long)sizeof(xmlEntity));
free(ret);
return(NULL);
}

View File

@ -0,0 +1,34 @@
/*
* HTMLparser.h : inf=terface for an HTML 4.0 non-verifying parser
*
* See Copyright for the status of this software.
*
* Daniel.Veillard@w3.org
*/
#ifndef __HTML_PARSER_H__
#define __HTML_PARSER_H__
#include "parser.h"
typedef xmlParserCtxt htmlParserCtxt;
typedef xmlParserCtxtPtr htmlParserCtxtPtr;
typedef xmlParserNodeInfo htmlParserNodeInfo;
typedef xmlSAXHandler htmlSAXHandler;
typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;
typedef xmlParserInput htmlParserInput;
typedef xmlParserInputPtr htmlParserInputPtr;
typedef xmlDocPtr htmlDocPtr;
typedef xmlNodePtr htmlNodePtr;
xmlEntityPtr htmlParseEntityRef(htmlParserCtxtPtr ctxt);
int htmlParseCharRef(htmlParserCtxtPtr ctxt);
void htmlParseElement(htmlParserCtxtPtr ctxt);
htmlDocPtr htmlSAXParseDoc(CHAR *cur, const char *encoding,
htmlSAXHandlerPtr sax, void *userData);
htmlDocPtr htmlParseDoc(CHAR *cur, const char *encoding);
htmlDocPtr htmlSAXParseFile(const char *filename, const char *encoding,
htmlSAXHandlerPtr sax, void *userData);
htmlDocPtr htmlParseFile(const char *filename, const char *encoding);
#endif /* __HTML_PARSER_H__ */

View File

@ -62,6 +62,7 @@ typedef struct _xmlParserCtxt {
const CHAR *version; /* the XML version string */
const CHAR *encoding; /* encoding, if any */
int standalone; /* standalone document */
int html; /* are we parsing an HTML document */
/* Input stream stack */
xmlParserInputPtr input; /* Current input stream */
@ -179,6 +180,7 @@ extern const char *xmlParserVersion;
extern xmlSAXLocator xmlDefaultSAXLocator;
extern xmlSAXHandler xmlDefaultSAXHandler;
extern xmlSAXHandler htmlDefaultSAXHandler;
#include "entities.h"
#include "xml-error.h"
@ -237,6 +239,7 @@ void xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
const xmlParserNodeInfo* info);
void xmlDefaultSAXHandlerInit(void);
void htmlDefaultSAXHandlerInit(void);
#ifdef __cplusplus
}
#endif

View File

@ -230,6 +230,8 @@ typedef struct xmlDoc {
char *name; /* name/filename/URI of the document */
const CHAR *version; /* the XML version string */
const CHAR *encoding; /* encoding, if any */
const CHAR *ID; /* the HTML version */
const CHAR *DTD; /* the HTML dtd URI */
int compression;/* level of zlib compression */
int standalone; /* standalone document (no external refs) */
struct xmlDtd *intSubset; /* the document internal subset */

View File

@ -628,8 +628,8 @@ xmlStrndup(const CHAR *cur, int len) {
CHAR *ret = malloc((len + 1) * sizeof(CHAR));
if (ret == NULL) {
fprintf(stderr, "malloc of %d byte failed\n",
(len + 1) * sizeof(CHAR));
fprintf(stderr, "malloc of %ld byte failed\n",
(len + 1) * (long)sizeof(CHAR));
return(NULL);
}
memcpy(ret, cur, len * sizeof(CHAR));
@ -669,8 +669,8 @@ xmlCharStrndup(const char *cur, int len) {
CHAR *ret = malloc((len + 1) * sizeof(CHAR));
if (ret == NULL) {
fprintf(stderr, "malloc of %d byte failed\n",
(len + 1) * sizeof(CHAR));
fprintf(stderr, "malloc of %ld byte failed\n",
(len + 1) * (long)sizeof(CHAR));
return(NULL);
}
for (i = 0;i < len;i++)
@ -807,8 +807,8 @@ xmlStrncat(CHAR *cur, const CHAR *add, int len) {
size = xmlStrlen(cur);
ret = realloc(cur, (size + len + 1) * sizeof(CHAR));
if (ret == NULL) {
fprintf(stderr, "xmlStrncat: realloc of %d byte failed\n",
(size + len + 1) * sizeof(CHAR));
fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
(size + len + 1) * (long)sizeof(CHAR));
return(cur);
}
memcpy(&ret[size], add, len * sizeof(CHAR));
@ -3548,16 +3548,16 @@ xmlParseStartTag(xmlParserCtxtPtr ctxt) {
maxatts = 10;
atts = (const CHAR **) malloc(maxatts * sizeof(CHAR *));
if (atts == NULL) {
fprintf(stderr, "malloc of %d byte failed\n",
maxatts * sizeof(CHAR *));
fprintf(stderr, "malloc of %ld byte failed\n",
maxatts * (long)sizeof(CHAR *));
return(NULL);
}
} else if (nbatts + 2 < maxatts) {
maxatts *= 2;
atts = (const CHAR **) realloc(atts, maxatts * sizeof(CHAR *));
if (atts == NULL) {
fprintf(stderr, "realloc of %d byte failed\n",
maxatts * sizeof(CHAR *));
fprintf(stderr, "realloc of %ld byte failed\n",
maxatts * (long)sizeof(CHAR *));
return(NULL);
}
}
@ -4359,7 +4359,7 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
}
/**
* xmlCreateFileParserCtxt :
* xmlCreateDocParserCtxt :
* @cur: a pointer to an array of CHAR
*
* Create a parser context for an XML in-memory document.

View File

@ -62,6 +62,7 @@ typedef struct _xmlParserCtxt {
const CHAR *version; /* the XML version string */
const CHAR *encoding; /* encoding, if any */
int standalone; /* standalone document */
int html; /* are we parsing an HTML document */
/* Input stream stack */
xmlParserInputPtr input; /* Current input stream */
@ -179,6 +180,7 @@ extern const char *xmlParserVersion;
extern xmlSAXLocator xmlDefaultSAXLocator;
extern xmlSAXHandler xmlDefaultSAXHandler;
extern xmlSAXHandler htmlDefaultSAXHandler;
#include "entities.h"
#include "xml-error.h"
@ -237,6 +239,7 @@ void xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
const xmlParserNodeInfo* info);
void xmlDefaultSAXHandlerInit(void);
void htmlDefaultSAXHandlerInit(void);
#ifdef __cplusplus
}
#endif

149
testHTML.c Normal file
View File

@ -0,0 +1,149 @@
/*
* testHTML.c : a small tester program for HTML input.
*
* See Copyright for the status of this software.
*
* Daniel.Veillard@w3.org
*/
#ifdef WIN32
#define HAVE_FCNTL_H
#include <io.h>
#else
#include <config.h>
#endif
#include <sys/types.h>
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "HTMLparser.h"
#include "tree.h"
#include "debugXML.h"
static int debug = 0;
static int copy = 0;
/*
* Note: this is perfectly clean HTML, i.e. not a useful test.
*/
static CHAR buffer[] =
"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"\n\
\"http://www.w3.org/TR/REC-html40/loose.dtd\">\n\
<html>\n\
<head>\n\
<title>This service is temporary down</title>\n\
</head>\n\
\n\
<body bgcolor=\"#FFFFFF\">\n\
<h1 align=\"center\">Sorry, this service is temporary down</h1>\n\
We are doing our best to get it back on-line,\n\
\n\
<p>The W3C system administrators</p>\n\
</body>\n\
</html>\n\
";
/************************************************************************
* *
* Debug *
* *
************************************************************************/
void parseAndPrintFile(char *filename) {
htmlDocPtr doc, tmp;
/*
* build an HTML tree from a string;
*/
doc = htmlParseFile(filename, NULL);
/*
* test intermediate copy if needed.
*/
if (copy) {
tmp = doc;
doc = xmlCopyDoc(doc, 1);
xmlFreeDoc(tmp);
}
/*
* print it.
*/
if (!debug)
xmlDocDump(stdout, doc);
else
xmlDebugDumpDocument(stdout, doc);
/*
* free it.
*/
xmlFreeDoc(doc);
}
void parseAndPrintBuffer(CHAR *buf) {
htmlDocPtr doc, tmp;
/*
* build an HTML tree from a string;
*/
doc = htmlParseDoc(buf, NULL);
/*
* test intermediate copy if needed.
*/
if (copy) {
tmp = doc;
doc = xmlCopyDoc(doc, 1);
xmlFreeDoc(tmp);
}
/*
* print it.
*/
if (!debug)
xmlDocDump(stdout, doc);
else
xmlDebugDumpDocument(stdout, doc);
/*
* free it.
*/
xmlFreeDoc(doc);
}
int main(int argc, char **argv) {
int i;
int files = 0;
for (i = 1; i < argc ; i++) {
if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
debug++;
else if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
copy++;
}
for (i = 1; i < argc ; i++) {
if (argv[i][0] != '-') {
parseAndPrintFile(argv[i]);
files ++;
}
}
if (files == 0) {
printf("Usage : %s [--debug] [--copy] HTMLfiles ...\n",
argv[0]);
printf("\tParse the HTML files and output the result of the parsing\n");
printf("\t--debug : dump a debug tree of the in-memory document\n");
printf("\t--copy : used to test the internal copy implementation\n");
}
return(0);
}

12
tree.c
View File

@ -387,6 +387,8 @@ xmlNewDoc(const CHAR *version) {
cur->type = XML_DOCUMENT_NODE;
cur->version = xmlStrdup(version);
cur->ID = NULL;
cur->DTD = NULL;
cur->name = NULL;
cur->root = NULL;
cur->intSubset = NULL;
@ -2650,11 +2652,11 @@ xmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int level) {
*/
static void
xmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
if (oldXMLWDcompatibility)
xmlBufferWriteChar(buf, "<?XML version=");
else
xmlBufferWriteChar(buf, "<?xml version=");
xmlBufferWriteQuotedString(buf, cur->version);
xmlBufferWriteChar(buf, "<?xml version=");
if (cur->version != NULL)
xmlBufferWriteQuotedString(buf, cur->version);
else
xmlBufferWriteChar(buf, "\"1.0\"");
if (cur->encoding != NULL) {
xmlBufferWriteChar(buf, " encoding=");
xmlBufferWriteQuotedString(buf, cur->encoding);

2
tree.h
View File

@ -230,6 +230,8 @@ typedef struct xmlDoc {
char *name; /* name/filename/URI of the document */
const CHAR *version; /* the XML version string */
const CHAR *encoding; /* encoding, if any */
const CHAR *ID; /* the HTML version */
const CHAR *DTD; /* the HTML dtd URI */
int compression;/* level of zlib compression */
int standalone; /* standalone document (no external refs) */
struct xmlDtd *intSubset; /* the document internal subset */

28
valid.c
View File

@ -189,8 +189,8 @@ xmlCreateElementTable(void) {
ret = (xmlElementTablePtr)
malloc(sizeof(xmlElementTable));
if (ret == NULL) {
fprintf(stderr, "xmlCreateElementTable : malloc(%d) failed\n",
sizeof(xmlElementTable));
fprintf(stderr, "xmlCreateElementTable : malloc(%ld) failed\n",
(long)sizeof(xmlElementTable));
return(NULL);
}
ret->max_elements = XML_MIN_ELEMENT_TABLE;
@ -198,8 +198,8 @@ xmlCreateElementTable(void) {
ret->table = (xmlElementPtr )
malloc(ret->max_elements * sizeof(xmlElement));
if (ret == NULL) {
fprintf(stderr, "xmlCreateElementTable : malloc(%d) failed\n",
ret->max_elements * sizeof(xmlElement));
fprintf(stderr, "xmlCreateElementTable : malloc(%ld) failed\n",
ret->max_elements * (long)sizeof(xmlElement));
free(ret);
return(NULL);
}
@ -461,8 +461,8 @@ xmlCreateEnumeration(CHAR *name) {
ret = (xmlEnumerationPtr) malloc(sizeof(xmlEnumeration));
if (ret == NULL) {
fprintf(stderr, "xmlCreateEnumeration : malloc(%d) failed\n",
sizeof(xmlEnumeration));
fprintf(stderr, "xmlCreateEnumeration : malloc(%ld) failed\n",
(long)sizeof(xmlEnumeration));
return(NULL);
}
@ -528,8 +528,8 @@ xmlCreateAttributeTable(void) {
ret = (xmlAttributeTablePtr)
malloc(sizeof(xmlAttributeTable));
if (ret == NULL) {
fprintf(stderr, "xmlCreateAttributeTable : malloc(%d) failed\n",
sizeof(xmlAttributeTable));
fprintf(stderr, "xmlCreateAttributeTable : malloc(%ld) failed\n",
(long)sizeof(xmlAttributeTable));
return(NULL);
}
ret->max_attributes = XML_MIN_ATTRIBUTE_TABLE;
@ -537,8 +537,8 @@ xmlCreateAttributeTable(void) {
ret->table = (xmlAttributePtr )
malloc(ret->max_attributes * sizeof(xmlAttribute));
if (ret == NULL) {
fprintf(stderr, "xmlCreateAttributeTable : malloc(%d) failed\n",
ret->max_attributes * sizeof(xmlAttribute));
fprintf(stderr, "xmlCreateAttributeTable : malloc(%ld) failed\n",
ret->max_attributes * (long)sizeof(xmlAttribute));
free(ret);
return(NULL);
}
@ -858,8 +858,8 @@ xmlCreateNotationTable(void) {
ret = (xmlNotationTablePtr)
malloc(sizeof(xmlNotationTable));
if (ret == NULL) {
fprintf(stderr, "xmlCreateNotationTable : malloc(%d) failed\n",
sizeof(xmlNotationTable));
fprintf(stderr, "xmlCreateNotationTable : malloc(%ld) failed\n",
(long)sizeof(xmlNotationTable));
return(NULL);
}
ret->max_notations = XML_MIN_NOTATION_TABLE;
@ -867,8 +867,8 @@ xmlCreateNotationTable(void) {
ret->table = (xmlNotationPtr )
malloc(ret->max_notations * sizeof(xmlNotation));
if (ret == NULL) {
fprintf(stderr, "xmlCreateNotationTable : malloc(%d) failed\n",
ret->max_notations * sizeof(xmlNotation));
fprintf(stderr, "xmlCreateNotationTable : malloc(%ld) failed\n",
ret->max_notations * (long)sizeof(xmlNotation));
free(ret);
return(NULL);
}