139 lines
2.8 KiB
C

#include <string.h>
#include <ctype.h>
#include "lexer.h"
static void skip( lx_state_t* lexer )
{
if ( lexer->current < lexer->end )
{
lexer->current++;
}
}
int lx_next( lx_state_t* lexer )
{
/* skip spaces */
for ( ;; )
{
if ( isspace( *lexer->current ) )
{
if ( *lexer->current == '\n' )
{
lexer->line++;
}
skip( lexer );
}
else if ( *lexer->current != 0 )
{
break;
}
else
{
/* return LX_EOF if we've reached the end of the input */
lexer->lexeme.str = "<eof>";
lexer->lexeme.len = 5;
lexer->token = LX_EOF;
return 0;
}
}
lexer->lexeme.str = lexer->current;
/* if the last token was the "version" tag, parse anything until a blank */
if ( lexer->last_was_version )
{
do
{
skip( lexer );
}
while ( !isspace( *lexer->current ) );
lexer->lexeme.len = lexer->current - lexer->lexeme.str;
lexer->token = LX_VERSION;
lexer->last_was_version = 0;
return 0;
}
/* if the character is alphabetic or '_', the token is an identifier */
if ( isalpha( *lexer->current ) || *lexer->current == '_' )
{
/* get all alphanumeric and '_' characters */
do
{
skip( lexer );
}
while ( isalnum( *lexer->current ) || *lexer->current == '_' );
lexer->lexeme.len = lexer->current - lexer->lexeme.str;
lexer->token = LX_TAG;
lexer->last_was_version = !strncmp( lexer->lexeme.str, "version", 7 );
return 0;
}
/* if the character is an hex digit, the token is a number */
if ( isxdigit( *lexer->current ) )
{
do
{
skip( lexer );
}
while ( isxdigit( *lexer->current ) );
lexer->lexeme.len = lexer->current - lexer->lexeme.str;
lexer->token = LX_NUMBER;
return 0;
}
/* if the character is a quote, it's a string */
if ( *lexer->current == '"' )
{
/* get anything until another quote */
do
{
skip( lexer );
if ( *lexer->current == '"' && lexer->current[ -1 ] != '\\' )
{
break;
}
}
while ( lexer->current < lexer->end );
if ( lexer->current == lexer->end )
{
return LX_UNTERMINATED_STRING;
}
skip( lexer );
lexer->lexeme.str++;
lexer->lexeme.len = lexer->current - lexer->lexeme.str - 1;
lexer->token = LX_STRING;
return 0;
}
/* otherwise the token is a symbol */
lexer->lexeme.len = 1;
switch ( *lexer->current++ )
{
case '(': lexer->token = LX_LPAREN; return 0;
case ')': lexer->token = LX_RPAREN; return 0;
}
return LX_INVALID_CHARACTER;
}
void lx_new( lx_state_t* lexer, const char* source, unsigned srclen )
{
lexer->line = 1;
lexer->current = source;
lexer->end = source + srclen;
lexer->lexeme.str = NULL;
lexer->lexeme.len = 0;
lexer->token = 0;
lexer->last_was_version = 0;
}