/** * Copyright (c) 2013 Mozilla Foundation and Contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "parser_internal.h" #include "cuetext_internal.h" #include "cue_internal.h" #include "string_internal.h" static void webvtt_skipwhite( webvtt_byte **position ); #ifdef min # undef min #endif #define min(a,b) ( (a) < (b) ? (a) : (b) ) /** * ERROR macro used for webvtt_parse_cuetext */ #undef ERROR #define ERROR(code) \ do \ { \ if( self->error ) \ if( self->error( self->userdata, line, col, code ) < 0 ) \ return WEBVTT_PARSE_ERROR; \ } while(0) /** * Macros for return statuses based on memory operations. * This is to avoid many if statements checking for multiple memory operation * return statuses in functions. */ #define CHECK_MEMORY_OP(status) \ if( status != WEBVTT_SUCCESS ) \ return status; \ #define CHECK_MEMORY_OP_JUMP(status_var, returned_status) \ if( returned_status != WEBVTT_SUCCESS) \ { \ status_var = returned_status; \ goto dealloc; \ } \ /** * This will only work on null-terminated strings, remember that! */ static void webvtt_skipwhite( webvtt_byte **position ) { webvtt_byte *p = *position; while( *p && webvtt_iswhite(*p) ) { ++p; } *position = p; } WEBVTT_INTERN webvtt_status webvtt_create_cuetext_token( webvtt_cuetext_token **token, webvtt_cuetext_token_type token_type ) { webvtt_cuetext_token *temp_token = (webvtt_cuetext_token *)webvtt_alloc0( sizeof(*temp_token) ); if( !temp_token ) { return WEBVTT_OUT_OF_MEMORY; } temp_token->token_type = token_type; *token = temp_token; return WEBVTT_SUCCESS; } WEBVTT_INTERN webvtt_status webvtt_create_cuetext_start_token( webvtt_cuetext_token **token, webvtt_string *tag_name, webvtt_stringlist *css_classes, webvtt_string *annotation ) { webvtt_status status; webvtt_cuetext_start_token_data sd; if( WEBVTT_FAILED( status = webvtt_create_cuetext_token( token, START_TOKEN ) ) ) { return status; } webvtt_copy_string( &(*token)->tag_name, tag_name ); webvtt_copy_stringlist( &sd.css_classes, css_classes ); webvtt_copy_string( &sd.annotations, annotation ); (*token)->start_token_data = sd; return WEBVTT_SUCCESS; } WEBVTT_INTERN webvtt_status webvtt_create_cuetext_end_token( webvtt_cuetext_token **token, webvtt_string *tag_name ) { webvtt_status status; if( WEBVTT_FAILED( status = webvtt_create_cuetext_token( token, END_TOKEN ) ) ) { return status; } webvtt_copy_string( &(*token)->tag_name, tag_name ); return WEBVTT_SUCCESS; } WEBVTT_INTERN webvtt_status webvtt_create_cuetext_text_token( webvtt_cuetext_token **token, webvtt_string *text ) { webvtt_status status; if( WEBVTT_FAILED( status = webvtt_create_cuetext_token( token, TEXT_TOKEN ) ) ) { return status; } webvtt_copy_string( &(*token)->text, text); return WEBVTT_SUCCESS; } WEBVTT_INTERN webvtt_status webvtt_create_cuetext_timestamp_token( webvtt_cuetext_token **token, webvtt_timestamp time_stamp ) { webvtt_status status; if( WEBVTT_FAILED( status = webvtt_create_cuetext_token( token, TIME_STAMP_TOKEN ) ) ) { return status; } (*token)->time_stamp = time_stamp; return WEBVTT_SUCCESS; } WEBVTT_INTERN void webvtt_delete_cuetext_token( webvtt_cuetext_token **token ) { webvtt_cuetext_start_token_data data; webvtt_cuetext_token *t; if( !token ) { return; } if( !*token ) { return; } t = *token; /** * Note that time stamp tokens do not need to free any internal data because * they do not allocate anything. */ switch( t->token_type ) { case START_TOKEN: data = t->start_token_data; webvtt_release_stringlist( &data.css_classes ); webvtt_release_string( &data.annotations ); webvtt_release_string( &t->tag_name ); break; case END_TOKEN: webvtt_release_string( &t->tag_name ); break; case TEXT_TOKEN: webvtt_release_string( &t->text ); break; } webvtt_free( t ); *token = 0; } /** * Definitions for tag names that accept annotationsm */ #define V_TAG_LENGTH 1 webvtt_byte v_tag[V_TAG_LENGTH] = { UTF8_V }; WEBVTT_INTERN int tag_accepts_annotation( webvtt_string *tag_name ) { return memcmp( webvtt_string_text( tag_name ), v_tag, min(webvtt_string_length( tag_name ), V_TAG_LENGTH) ) == 0; } /** * Definitions for tag tokens that are more then one character long. */ #define RUBY_TAG_LENGTH 4 #define RUBY_TEXT_TAG_LENGTH 2 webvtt_byte ruby_tag[RUBY_TAG_LENGTH] = { UTF8_R, UTF8_U, UTF8_B, UTF8_Y }; webvtt_byte rt_tag[RUBY_TEXT_TAG_LENGTH] = { UTF8_R, UTF8_T }; WEBVTT_INTERN webvtt_status webvtt_get_node_kind_from_tag_name( webvtt_string *tag_name, webvtt_node_kind *kind ) { if( !tag_name || !kind ) { return WEBVTT_INVALID_PARAM; } if( webvtt_string_length(tag_name) == 1 ) { switch( webvtt_string_text(tag_name)[0] ) { case( UTF8_B ): *kind = WEBVTT_BOLD; break; case( UTF8_I ): *kind = WEBVTT_ITALIC; break; case( UTF8_U ): *kind = WEBVTT_UNDERLINE; break; case( UTF8_C ): *kind = WEBVTT_CLASS; break; case( UTF8_V ): *kind = WEBVTT_VOICE; break; } } else if( memcmp( webvtt_string_text(tag_name), ruby_tag, min(webvtt_string_length(tag_name), RUBY_TAG_LENGTH) ) == 0 ) { *kind = WEBVTT_RUBY; } else if( memcmp( webvtt_string_text(tag_name), rt_tag, min(webvtt_string_length(tag_name), RUBY_TEXT_TAG_LENGTH) ) == 0 ) { *kind = WEBVTT_RUBY_TEXT; } else { return WEBVTT_INVALID_TAG_NAME; } return WEBVTT_SUCCESS; } WEBVTT_INTERN webvtt_status webvtt_create_node_from_token( webvtt_cuetext_token *token, webvtt_node **node, webvtt_node *parent ) { webvtt_node_kind kind; if( !token || !node || !parent ) { return WEBVTT_INVALID_PARAM; } /** * We've recieved a node that is not null. * In order to prevent memory leaks caused by overwriting a node which the * caller has not released return unsuccessful. */ if( *node ) { return WEBVTT_UNSUCCESSFUL; } switch ( token->token_type ) { case( TEXT_TOKEN ): return webvtt_create_text_leaf_node( node, parent, &token->text ); break; case( START_TOKEN ): CHECK_MEMORY_OP( webvtt_get_node_kind_from_tag_name( &token->tag_name, &kind) ); return webvtt_create_internal_node( node, parent, kind, token->start_token_data.css_classes, &token->start_token_data.annotations ); break; case ( TIME_STAMP_TOKEN ): return webvtt_create_time_stamp_leaf_node( node, parent, token->time_stamp ); break; default: return WEBVTT_INVALID_TOKEN_TYPE; } } WEBVTT_INTERN webvtt_status webvtt_cuetext_tokenizer_data_state( webvtt_byte **position, webvtt_cuetext_token_state *token_state, webvtt_string *result ) { for ( ; *token_state == DATA; (*position)++ ) { switch( **position ) { case UTF8_AMPERSAND: *token_state = ESCAPE; break; case UTF8_LESS_THAN: if( webvtt_string_length(result) == 0 ) { *token_state = TAG; } else { return WEBVTT_SUCCESS; } break; case UTF8_NULL_BYTE: return WEBVTT_SUCCESS; break; default: CHECK_MEMORY_OP( webvtt_string_putc( result, *position[0] ) ); break; } } return WEBVTT_UNFINISHED; } /** * Definitions for valid escape values. * The semicolon is implicit in the comparison. */ #define AMP_ESCAPE_LENGTH 4 #define LT_ESCAPE_LENGTH 3 #define GT_ESCAPE_LENGTH 3 #define RLM_ESCAPE_LENGTH 4 #define LRM_ESCAPE_LENGTH 4 #define NBSP_ESCAPE_LENGTH 5 #define RLM_REPLACE_LENGTH 3 #define LRM_REPLACE_LENGTH 3 #define NBSP_REPLACE_LENGTH 2 webvtt_byte amp_escape[AMP_ESCAPE_LENGTH] = { UTF8_AMPERSAND, UTF8_A, UTF8_M, UTF8_P }; webvtt_byte lt_escape[LT_ESCAPE_LENGTH] = { UTF8_AMPERSAND, UTF8_L, UTF8_T }; webvtt_byte gt_escape[GT_ESCAPE_LENGTH] = { UTF8_AMPERSAND, UTF8_G, UTF8_T }; webvtt_byte rlm_escape[RLM_ESCAPE_LENGTH] = { UTF8_AMPERSAND, UTF8_R, UTF8_L, UTF8_M }; webvtt_byte lrm_escape[LRM_ESCAPE_LENGTH] = { UTF8_AMPERSAND, UTF8_L, UTF8_R, UTF8_M }; webvtt_byte nbsp_escape[NBSP_ESCAPE_LENGTH] = { UTF8_AMPERSAND, UTF8_N, UTF8_B, UTF8_S, UTF8_P }; webvtt_byte rlm_replace[RLM_REPLACE_LENGTH] = { UTF8_RIGHT_TO_LEFT_1, UTF8_RIGHT_TO_LEFT_2, UTF8_RIGHT_TO_LEFT_3 }; webvtt_byte lrm_replace[LRM_REPLACE_LENGTH] = { UTF8_LEFT_TO_RIGHT_1, UTF8_LEFT_TO_RIGHT_2, UTF8_LEFT_TO_RIGHT_3 }; webvtt_byte nbsp_replace[NBSP_REPLACE_LENGTH] = { UTF8_NO_BREAK_SPACE_1, UTF8_NO_BREAK_SPACE_2 }; WEBVTT_INTERN webvtt_status webvtt_cuetext_tokenizer_escape_state( webvtt_byte **position, webvtt_cuetext_token_state *token_state, webvtt_string *result ) { webvtt_string buffer; webvtt_status status = WEBVTT_SUCCESS; CHECK_MEMORY_OP_JUMP( status, webvtt_create_string( 1, &buffer ) ); /** * Append ampersand here because the algorithm is not able to add it to the * buffer when it reads it in the DATA state tokenizer. */ CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( &buffer, UTF8_AMPERSAND ) ); for( ; *token_state == ESCAPE; (*position)++ ) { /** * We have encountered a token termination point. * Append buffer to result and return success. */ if( **position == UTF8_NULL_BYTE || **position == UTF8_LESS_THAN ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_append_string( result, &buffer ) ); goto dealloc; } /** * This means we have enocuntered a malformed escape character sequence. * This means that we need to add that malformed text to the result and * recreate the buffer to prepare for a new escape sequence. */ else if( **position == UTF8_AMPERSAND ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_append_string( result, &buffer ) ); webvtt_release_string( &buffer ); CHECK_MEMORY_OP_JUMP( status, webvtt_create_string( 1, &buffer ) ); CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( &buffer, *position[0] ) ); } /** * We've encountered the semicolon which is the end of an escape sequence. * Check if buffer contains a valid escape sequence and if it does append * the interpretation to result and change the state to DATA. */ else if( **position == UTF8_SEMI_COLON ) { if( memcmp( webvtt_string_text(&buffer), amp_escape, min(webvtt_string_length(&buffer), AMP_ESCAPE_LENGTH ) ) == 0 ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( result, UTF8_AMPERSAND ) ); } else if( memcmp( webvtt_string_text(&buffer), lt_escape, min(webvtt_string_length(&buffer), LT_ESCAPE_LENGTH ) ) == 0 ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( result, UTF8_LESS_THAN ) ); } else if( memcmp( webvtt_string_text(&buffer), gt_escape, min(webvtt_string_length(&buffer), GT_ESCAPE_LENGTH) ) == 0 ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( result, UTF8_GREATER_THAN ) ); } else if( memcmp( webvtt_string_text(&buffer), rlm_escape, min(webvtt_string_length(&buffer), RLM_ESCAPE_LENGTH) ) == 0 ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_append( result, rlm_replace, RLM_REPLACE_LENGTH ) ); } else if( memcmp( webvtt_string_text(&buffer), lrm_escape, min(webvtt_string_length(&buffer), LRM_ESCAPE_LENGTH) ) == 0 ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_append( result, lrm_replace, LRM_REPLACE_LENGTH ) ); } else if( memcmp( webvtt_string_text(&buffer), nbsp_escape, min(webvtt_string_length(&buffer), NBSP_ESCAPE_LENGTH) ) == 0 ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_append( result, nbsp_replace, NBSP_REPLACE_LENGTH ) ); } else { CHECK_MEMORY_OP_JUMP( status, webvtt_string_append_string( result, &buffer ) ); CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( result, **position ) ); } *token_state = DATA; } /** * Character is alphanumeric. This means we are in the body of the escape * sequence. */ else if( webvtt_isalphanum( **position ) ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( &buffer, **position ) ); } /** * If we have not found an alphanumeric character then we have encountered * a malformed escape sequence. Add buffer to result and continue to parse * in DATA state. */ else { CHECK_MEMORY_OP_JUMP( status, webvtt_string_append_string( result, &buffer ) ); CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( result, **position ) ); *token_state = DATA; } } dealloc: webvtt_release_string( &buffer ); return status; } WEBVTT_INTERN webvtt_status webvtt_cuetext_tokenizer_tag_state( webvtt_byte **position, webvtt_cuetext_token_state *token_state, webvtt_string *result ) { for( ; *token_state == TAG; (*position)++ ) { if( **position == UTF8_TAB || **position == UTF8_LINE_FEED || **position == UTF8_CARRIAGE_RETURN || **position == UTF8_FORM_FEED || **position == UTF8_SPACE ) { *token_state = START_TAG_ANNOTATION; } else if( webvtt_isdigit( **position ) ) { CHECK_MEMORY_OP( webvtt_string_putc( result, **position ) ); *token_state = TIME_STAMP_TAG; } else { switch( **position ) { case UTF8_FULL_STOP: *token_state = START_TAG_CLASS; break; case UTF8_SOLIDUS: *token_state = END_TAG; break; case UTF8_GREATER_THAN: return WEBVTT_SUCCESS; break; case UTF8_NULL_BYTE: return WEBVTT_SUCCESS; break; default: CHECK_MEMORY_OP( webvtt_string_putc( result, **position ) ); *token_state = START_TAG; } } } return WEBVTT_UNFINISHED; } WEBVTT_INTERN webvtt_status webvtt_cuetext_tokenizer_start_tag_state( webvtt_byte **position, webvtt_cuetext_token_state *token_state, webvtt_string *result ) { for( ; *token_state == START_TAG; (*position)++ ) { if( **position == UTF8_TAB || **position == UTF8_FORM_FEED || **position == UTF8_SPACE || **position == UTF8_LINE_FEED || **position == UTF8_CARRIAGE_RETURN ) { *token_state = START_TAG_ANNOTATION; } else { switch( **position ) { case UTF8_TAB: *token_state = START_TAG_ANNOTATION; break; case UTF8_FULL_STOP: *token_state = START_TAG_CLASS; break; case UTF8_GREATER_THAN: return WEBVTT_SUCCESS; break; default: CHECK_MEMORY_OP( webvtt_string_putc( result, **position ) ); break; } } } return WEBVTT_UNFINISHED; } WEBVTT_INTERN webvtt_status webvtt_cuetext_tokenizer_start_tag_class_state( webvtt_byte **position, webvtt_cuetext_token_state *token_state, webvtt_stringlist *css_classes ) { webvtt_string buffer; webvtt_status status = WEBVTT_SUCCESS; CHECK_MEMORY_OP( webvtt_create_string( 1, &buffer ) ); for( ; *token_state == START_TAG_CLASS; (*position)++ ) { if( **position == UTF8_TAB || **position == UTF8_FORM_FEED || **position == UTF8_SPACE || **position == UTF8_LINE_FEED || **position == UTF8_CARRIAGE_RETURN) { CHECK_MEMORY_OP_JUMP( status, webvtt_stringlist_push( css_classes, &buffer ) ); *token_state = START_TAG_ANNOTATION; return WEBVTT_SUCCESS; } else if( **position == UTF8_GREATER_THAN || **position == UTF8_NULL_BYTE ) { CHECK_MEMORY_OP_JUMP( status, webvtt_stringlist_push( css_classes, &buffer ) ); webvtt_release_string( &buffer ); return WEBVTT_SUCCESS; } else if( **position == UTF8_FULL_STOP ) { CHECK_MEMORY_OP_JUMP( status, webvtt_stringlist_push( css_classes, &buffer ) ); webvtt_release_string( &buffer ); CHECK_MEMORY_OP( webvtt_create_string( 1, &buffer ) ); } else { CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( &buffer, **position ) ); } } dealloc: webvtt_release_string( &buffer ); return status; } WEBVTT_INTERN webvtt_status webvtt_cuetext_tokenizer_start_tag_annotation_state( webvtt_byte **position, webvtt_cuetext_token_state *token_state, webvtt_string *annotation ) { for( ; *token_state == START_TAG_ANNOTATION; (*position)++ ) { if( **position == UTF8_NULL_BYTE || **position == UTF8_GREATER_THAN ) { return WEBVTT_SUCCESS; } CHECK_MEMORY_OP( webvtt_string_putc( annotation, **position ) ); } return WEBVTT_UNFINISHED; } WEBVTT_INTERN webvtt_status webvtt_cuetext_tokenizer_end_tag_state( webvtt_byte **position, webvtt_cuetext_token_state *token_state, webvtt_string *result ) { for( ; *token_state == END_TAG; (*position)++ ) { if( **position == UTF8_GREATER_THAN || **position == UTF8_NULL_BYTE ) { return WEBVTT_SUCCESS; } CHECK_MEMORY_OP( webvtt_string_putc( result, **position ) ); } return WEBVTT_UNFINISHED; } WEBVTT_INTERN webvtt_status webvtt_cuetext_tokenizer_time_stamp_tag_state( webvtt_byte **position, webvtt_cuetext_token_state *token_state, webvtt_string *result ) { for( ; *token_state == TIME_STAMP_TAG; (*position)++ ) { if( **position == UTF8_GREATER_THAN || **position == UTF8_NULL_BYTE ) { return WEBVTT_SUCCESS; } CHECK_MEMORY_OP( webvtt_string_putc( result, **position ) ); } return WEBVTT_UNFINISHED; } /** * Need to set up differently. * Get a status in order to return at end and release memeory. */ WEBVTT_INTERN webvtt_status webvtt_cuetext_tokenizer( webvtt_byte **position, webvtt_cuetext_token **token ) { webvtt_cuetext_token_state token_state = DATA; webvtt_string result, annotation; webvtt_stringlist *css_classes; webvtt_timestamp time_stamp = 0; webvtt_status status = WEBVTT_UNFINISHED; if( !position ) { return WEBVTT_INVALID_PARAM; } webvtt_create_string( 10, &result ); webvtt_create_string( 10, &annotation ); webvtt_create_stringlist( &css_classes ); /** * Loop while the tokenizer is not finished. * Based on the state of the tokenizer enter a function to handle that * particular tokenizer state. Those functions will loop until they either * change the state of the tokenizer or reach a valid token end point. */ while( status == WEBVTT_UNFINISHED ) { switch( token_state ) { case DATA : status = webvtt_cuetext_tokenizer_data_state( position, &token_state, &result ); break; case ESCAPE: status = webvtt_cuetext_tokenizer_escape_state( position, &token_state, &result ); break; case TAG: status = webvtt_cuetext_tokenizer_tag_state( position, &token_state, &result ); break; case START_TAG: status = webvtt_cuetext_tokenizer_start_tag_state( position, &token_state, &result ); break; case START_TAG_CLASS: status = webvtt_cuetext_tokenizer_start_tag_class_state( position, &token_state, css_classes ); break; case START_TAG_ANNOTATION: status = webvtt_cuetext_tokenizer_start_tag_annotation_state( position, &token_state, &annotation ); break; case END_TAG: status = webvtt_cuetext_tokenizer_end_tag_state( position, &token_state, &result ); break; case TIME_STAMP_TAG: status = webvtt_cuetext_tokenizer_time_stamp_tag_state( position, &token_state, &result ); break; } if( token_state == START_TAG_ANNOTATION ) { webvtt_skipwhite( position ); } } if( **position == UTF8_GREATER_THAN ) { (*position)++; } if( status == WEBVTT_SUCCESS ) { /** * The state that the tokenizer left off on will tell us what kind of token * needs to be made. */ if( token_state == DATA || token_state == ESCAPE ) { status = webvtt_create_cuetext_text_token( token, &result ); } else if(token_state == TAG || token_state == START_TAG || token_state == START_TAG_CLASS || token_state == START_TAG_ANNOTATION) { /** * If the tag does not accept an annotation then release the current * annotation and intialize annotation to a safe empty state */ if( !tag_accepts_annotation( &result ) ) { webvtt_release_string( &annotation ); webvtt_init_string( &annotation ); } status = webvtt_create_cuetext_start_token( token, &result, css_classes, &annotation ); } else if( token_state == END_TAG ) { status = webvtt_create_cuetext_end_token( token, &result ); } else if( token_state == TIME_STAMP_TAG ) { parse_timestamp( webvtt_string_text( &result ), &time_stamp ); status = webvtt_create_cuetext_timestamp_token( token, time_stamp ); } else { status = WEBVTT_INVALID_TOKEN_STATE; } } webvtt_release_stringlist( &css_classes ); webvtt_release_string( &result ); webvtt_release_string( &annotation ); return status; } /** * Currently line and len are not being kept track of. * Don't think pnode_length is needed as nodes track there list count * internally. */ WEBVTT_INTERN webvtt_status webvtt_parse_cuetext( webvtt_parser self, webvtt_cue *cue, webvtt_string *payload, int finished ) { const webvtt_byte *cue_text; webvtt_status status; webvtt_byte *position; webvtt_node *node_head; webvtt_node *current_node; webvtt_node *temp_node; webvtt_cuetext_token *token; webvtt_node_kind kind; if( !cue ) { return WEBVTT_INVALID_PARAM; } cue_text = webvtt_string_text( payload ); if( !cue_text ) { return WEBVTT_INVALID_PARAM; } if ( WEBVTT_FAILED(status = webvtt_create_head_node( &cue->node_head ) ) ) { return status; } position = (webvtt_byte *)cue_text; node_head = cue->node_head; current_node = node_head; temp_node = NULL; token = NULL; /** * Routine taken from the W3C specification * http://dev.w3.org/html5/webvtt/#webvtt-cue-text-parsing-rules */ while( *position != UTF8_NULL_BYTE ) { webvtt_delete_cuetext_token( &token ); /* Step 7. */ switch( webvtt_cuetext_tokenizer( &position, &token ) ) { case( WEBVTT_UNFINISHED ): /* Error here. */ break; /* Step 8. */ case( WEBVTT_SUCCESS ): /** * If we've found an end token which has a valid end token tag name and * a tag name that is equal to the current node then set current to the * parent of current. */ if( token->token_type == END_TOKEN ) { /** * We have encountered an end token but we are at the top of the list * and thus have not encountered any start tokens yet, throw away the * token. */ if( current_node->kind == WEBVTT_HEAD_NODE ) { continue; } /** * We have encountered an end token but it is not in a format that is * supported, throw away the token. */ if( webvtt_get_node_kind_from_tag_name( &token->tag_name, &kind ) == WEBVTT_INVALID_TAG_NAME ) { continue; } /** * We have encountered an end token and it matches the start token of * the node that we are currently on. Move back up the list of nodes * and continue parsing. */ if( current_node->kind == kind ) { current_node = current_node->parent; } } else { /** * Attempt to create a valid node from the token. * If successful then attach the node to the current nodes list and * also set current to the newly created node if it is an internal * node type. */ if( webvtt_create_node_from_token( token, &temp_node, current_node ) != WEBVTT_SUCCESS ) { /* Do something here? */ } else { webvtt_attach_internal_node( current_node, temp_node ); if( WEBVTT_IS_VALID_INTERNAL_NODE( temp_node->kind ) ) { current_node = temp_node; } /* Release the node as attach internal node increases the count. */ webvtt_release_node( &temp_node ); } } break; } webvtt_skipwhite( &position ); } webvtt_delete_cuetext_token( &token ); return WEBVTT_SUCCESS; }