mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-07 12:15:51 +00:00
160 lines
4.5 KiB
C
160 lines
4.5 KiB
C
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
|
*
|
|
* The contents of this file are subject to the Netscape Public
|
|
* License Version 1.1 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy of
|
|
* the License at http://www.mozilla.org/NPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS
|
|
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
|
* implied. See the License for the specific language governing
|
|
* rights and limitations under the License.
|
|
*
|
|
* The Original Code is mozilla.org code.
|
|
*
|
|
* The Initial Developer of the Original Code is Netscape
|
|
* Communications Corporation. Portions created by Netscape are
|
|
* Copyright (C) 1998 Netscape Communications Corporation. All
|
|
* Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
*/
|
|
/*** htmparse.h ***************************************************/
|
|
/* description: html parser */
|
|
|
|
|
|
/********************************************************************
|
|
|
|
The client calls CRAWL_ParserPut, and gets called back with
|
|
a function whose prototype is described by CRAWL_ParseFunc. The
|
|
parser calls this function whenever a begin or end tag, or run of
|
|
character data has been scanned, and does not build a tree.
|
|
|
|
$Revision: 1.2 $
|
|
$Date: 1999/11/06 02:32:28 $
|
|
|
|
*********************************************************************/
|
|
|
|
|
|
#ifndef htmparse_h___
|
|
#define htmparse_h___
|
|
#include "prtypes.h"
|
|
|
|
/* return values from CRAWL_ParserPut */
|
|
#define CRAWL_PARSE_NO_ERROR 0 /* no error */
|
|
#define CRAWL_PARSE_ERROR 1 /* syntax error */
|
|
#define CRAWL_PARSE_TERMINATE 2 /* don't continue to parse */
|
|
#define CRAWL_PARSE_OUT_OF_MEMORY 3 /* out of memory */
|
|
|
|
/* return values for CRAWL_ParseFunc callback function */
|
|
#define PARSE_GET_NEXT_TOKEN 0
|
|
#define PARSE_STOP 1
|
|
#define PARSE_OUT_OF_MEMORY 2
|
|
|
|
typedef struct _CRAWL_TagStruc *CRAWL_Tag;
|
|
|
|
/*
|
|
The client of this API creates a reference to this with CRAWL_MakeParseObj and provides it as
|
|
a parameter to CRAWL_ParserPut. It is passed as a parameter to the CRAWL_ParseFunc function and
|
|
the parsed data may be extracted from it through the API. It should be considered opaque data.
|
|
*/
|
|
typedef struct _CRAWL_ParseObjStruc *CRAWL_ParseObj;
|
|
|
|
/*
|
|
Typedef for a parse callback. The memory in CRAWL_ParseObj is reused across successive calls.
|
|
*/
|
|
typedef int
|
|
(PR_CALLBACK *CRAWL_ParseFunc)(CRAWL_ParseObj obj, PRBool isTag, void *data);
|
|
|
|
/****************************************************************************************/
|
|
/* public API */
|
|
/****************************************************************************************/
|
|
|
|
NSPR_BEGIN_EXTERN_C
|
|
|
|
/*
|
|
Returns the tag parsed
|
|
*/
|
|
PR_EXTERN(CRAWL_Tag)
|
|
CRAWL_GetTagParsed(CRAWL_ParseObj obj);
|
|
|
|
/*
|
|
Returns the character data between tags
|
|
*/
|
|
PR_EXTERN(char*)
|
|
CRAWL_GetDataParsed(CRAWL_ParseObj obj);
|
|
|
|
/*
|
|
Returns the tag name
|
|
*/
|
|
PR_EXTERN(char*)
|
|
CRAWL_GetTagName(CRAWL_Tag tag);
|
|
|
|
/*
|
|
Returns the libparse tag code as defined in pa_tags.h
|
|
*/
|
|
PR_EXTERN(intn)
|
|
CRAWL_GetTagToken(CRAWL_Tag tag);
|
|
|
|
/*
|
|
a tag of the form <tagname /> Empty tags are recognized only if the page has been designated as
|
|
containing RDF (the API doesn't support this yet)
|
|
*/
|
|
PR_EXTERN(PRBool)
|
|
CRAWL_IsEmptyTag(CRAWL_Tag tag);
|
|
|
|
/*
|
|
A tag of the form </tagname>.
|
|
*/
|
|
PR_EXTERN(PRBool)
|
|
CRAWL_IsEndTag(CRAWL_Tag tag);
|
|
|
|
/*
|
|
Returns the tag attribute given a name
|
|
*/
|
|
PR_EXTERN(char*)
|
|
CRAWL_GetAttributeValue(CRAWL_Tag tag, char *attributeName);
|
|
|
|
/*
|
|
Returns the number of attributes for the tag.
|
|
*/
|
|
PR_EXTERN(uint16)
|
|
CRAWL_GetNumberOfAttributes(CRAWL_Tag tag);
|
|
|
|
/*
|
|
Returns the nth attribute of the tag.
|
|
*/
|
|
PR_EXTERN(char*)
|
|
CRAWL_GetNthAttributeName(CRAWL_Tag tag, uint16 n);
|
|
|
|
/*
|
|
Returns the nth attribute value of the tag.
|
|
*/
|
|
PR_EXTERN(char*)
|
|
CRAWL_GetNthAttributeValue(CRAWL_Tag tag, uint16 n);
|
|
|
|
/*
|
|
Creates a new CRAWL_ParseObj suitable for passing to CRAWL_ParserPut.
|
|
Returns NULL if out of memory.
|
|
*/
|
|
PR_EXTERN(CRAWL_ParseObj)
|
|
CRAWL_MakeParseObj();
|
|
|
|
/*
|
|
Destroys the CRAWL_ParseObj and all associated memory
|
|
*/
|
|
PR_EXTERN(void)
|
|
CRAWL_DestroyParseObj(CRAWL_ParseObj obj);
|
|
|
|
/*
|
|
Parse characters in buffer and call func when an element (tag or data) has been
|
|
scanned. Returns an error code. The same CRAWL_ParseObj must be provided for successive
|
|
puts in the same buffer. It is up to the caller to create and destroy the CRAWL_ParseObj.
|
|
*/
|
|
PR_EXTERN(int)
|
|
CRAWL_ParserPut(CRAWL_ParseObj obj, char *str, uint32 len, CRAWL_ParseFunc func, void *data);
|
|
|
|
NSPR_END_EXTERN_C
|
|
|
|
#endif /* htmparse_h___ */
|