mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-06 00:55:37 +00:00
a2d1c31646
Bug 872822 - Update WebVTT library to v0.5. r=rillian
1684 lines
48 KiB
C
1684 lines
48 KiB
C
/**
|
|
* Copyright (c) 2013 Mozilla Foundation and Contributors
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are
|
|
* met:
|
|
*
|
|
* - Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* - Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "parser_internal.h"
|
|
#include "cuetext_internal.h"
|
|
#include "cue_internal.h"
|
|
#include <string.h>
|
|
|
|
#define _ERROR(X) do { if( skip_error == 0 ) { ERROR(X); } } while(0)
|
|
|
|
static const char separator[] = {
|
|
'-', '-', '>'
|
|
};
|
|
/* UTF8 encoding of U+FFFD REPLACEMENT CHAR */
|
|
static const char replacement[] = { 0xEF, 0xBF, 0xBD };
|
|
|
|
#define MSECS_PER_HOUR (3600000)
|
|
#define MSECS_PER_MINUTE (60000)
|
|
#define MSECS_PER_SECOND (1000)
|
|
#define BUFFER (self->buffer + self->position)
|
|
#define MALFORMED_TIME ((webvtt_timestamp_t)-1.0)
|
|
|
|
static webvtt_status find_bytes( const char *buffer, webvtt_uint len,
|
|
const char *sbytes, webvtt_uint slen );
|
|
static webvtt_int64 parse_int( const char **pb, int *pdigits );
|
|
static void skip_spacetab( const char *text, webvtt_uint *pos,
|
|
webvtt_uint len, webvtt_uint *column );
|
|
static void skip_until_white( const char *text, webvtt_uint *pos,
|
|
webvtt_uint len, webvtt_uint *column );
|
|
|
|
WEBVTT_EXPORT webvtt_status
|
|
webvtt_create_parser( webvtt_cue_fn on_read,
|
|
webvtt_error_fn on_error, void *
|
|
userdata,
|
|
webvtt_parser *ppout )
|
|
{
|
|
webvtt_parser p;
|
|
if( !on_read || !on_error || !ppout ) {
|
|
return WEBVTT_INVALID_PARAM;
|
|
}
|
|
|
|
if( !( p = ( webvtt_parser )webvtt_alloc0( sizeof * p ) ) ) {
|
|
return WEBVTT_OUT_OF_MEMORY;
|
|
}
|
|
|
|
memset( p->astack, 0, sizeof( p->astack ) );
|
|
p->stack = p->astack;
|
|
p->top = p->stack;
|
|
p->top->state = T_INITIAL;
|
|
p->stack_alloc = sizeof( p->astack ) / sizeof( p->astack[0] );
|
|
|
|
p->read = on_read;
|
|
p->error = on_error;
|
|
p->column = p->line = 1;
|
|
p->userdata = userdata;
|
|
p->finished = 0;
|
|
*ppout = p;
|
|
|
|
return WEBVTT_SUCCESS;
|
|
}
|
|
|
|
/**
|
|
* Helper to validate a cue and, if valid, notify the application that a cue has
|
|
* been read.
|
|
* If it fails to validate, silently delete the cue.
|
|
*
|
|
* ( This might not be the best way to go about this, and additionally,
|
|
* webvtt_validate_cue has no means to report errors with the cue, and we do
|
|
* nothing with its return value )
|
|
*/
|
|
static void
|
|
finish_cue( webvtt_parser self, webvtt_cue **pcue )
|
|
{
|
|
if( pcue ) {
|
|
webvtt_cue *cue = *pcue;
|
|
if( cue ) {
|
|
if( webvtt_validate_cue( cue ) ) {
|
|
self->read( self->userdata, cue );
|
|
} else {
|
|
webvtt_release_cue( &cue );
|
|
}
|
|
*pcue = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* This routine tries to clean up the stack
|
|
* for us, to prevent leaks.
|
|
*
|
|
* It should also help find errors in stack management.
|
|
*/
|
|
WEBVTT_INTERN void
|
|
cleanup_stack( webvtt_parser self )
|
|
{
|
|
webvtt_state *st = self->top;
|
|
while( st >= self->stack ) {
|
|
switch( st->type ) {
|
|
case V_CUE:
|
|
webvtt_release_cue( &st->v.cue );
|
|
break;
|
|
case V_TEXT:
|
|
webvtt_release_string( &st->v.text );
|
|
break;
|
|
/**
|
|
* TODO: Clean up cuetext nodes as well.
|
|
* Eventually the cuetext parser will probably be making use
|
|
* of this stack, and will need to manage it well also.
|
|
*/
|
|
default:
|
|
break;
|
|
}
|
|
st->type = V_NONE;
|
|
st->line = st->column = st->token = 0;
|
|
st->v.cue = NULL;
|
|
if( st > self->stack ) {
|
|
--self->top;
|
|
}
|
|
--st;
|
|
}
|
|
if( self->stack != self->astack ) {
|
|
/**
|
|
* If the stack is dynamically allocated (probably not),
|
|
* then point it to the statically allocated one (and zeromem it),
|
|
* then finally delete the old dynamically allocated stack
|
|
*/
|
|
webvtt_state *pst = self->stack;
|
|
memset( self->astack, 0, sizeof( self->astack ) );
|
|
self->stack = self->astack;
|
|
self->stack_alloc = sizeof( self->astack ) / sizeof( *( self->astack ) );
|
|
webvtt_free( pst );
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
*/
|
|
WEBVTT_EXPORT webvtt_status
|
|
webvtt_finish_parsing( webvtt_parser self )
|
|
{
|
|
webvtt_status status = WEBVTT_SUCCESS;
|
|
const char buffer[] = "\0";
|
|
const webvtt_uint len = 0;
|
|
webvtt_uint pos = 0;
|
|
|
|
if( !self->finished ) {
|
|
self->finished = 1;
|
|
|
|
retry:
|
|
switch( self->mode ) {
|
|
/**
|
|
* We've left off parsing cue settings and are not in the empty state,
|
|
* return WEBVTT_CUE_INCOMPLETE.
|
|
*/
|
|
case M_WEBVTT:
|
|
if( self->top->state == T_CUEREAD ) {
|
|
SAFE_ASSERT( self->top != self->stack );
|
|
--self->top;
|
|
self->popped = 1;
|
|
}
|
|
|
|
if( self->top->state == T_CUE ) {
|
|
webvtt_string text;
|
|
webvtt_cue *cue;
|
|
if( self->top->type == V_NONE ) {
|
|
webvtt_create_cue( &self->top->v.cue );
|
|
self->top->type = V_CUE;
|
|
}
|
|
cue = self->top->v.cue;
|
|
SAFE_ASSERT( self->popped && (self->top+1)->state == T_CUEREAD );
|
|
SAFE_ASSERT( cue != 0 );
|
|
text.d = (self->top+1)->v.text.d;
|
|
(self->top+1)->v.text.d = 0;
|
|
(self->top+1)->type = V_NONE;
|
|
(self->top+1)->state = 0;
|
|
self->column = 1;
|
|
status = webvtt_proc_cueline( self, cue, &text );
|
|
if( cue_is_incomplete( cue ) ) {
|
|
ERROR( WEBVTT_CUE_INCOMPLETE );
|
|
}
|
|
++self->line;
|
|
self->column = 1;
|
|
if( self->mode == M_CUETEXT ) {
|
|
goto retry;
|
|
}
|
|
}
|
|
break;
|
|
/**
|
|
* We've left off on trying to read in a cue text.
|
|
* Parse the partial cue text read and pass the cue back to the
|
|
* application if possible.
|
|
*/
|
|
case M_CUETEXT:
|
|
status = webvtt_proc_cuetext( self, buffer, &pos, len, self->finished );
|
|
break;
|
|
case M_SKIP_CUE:
|
|
/* Nothing to do here. */
|
|
break;
|
|
}
|
|
cleanup_stack( self );
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
WEBVTT_EXPORT void
|
|
webvtt_delete_parser( webvtt_parser self )
|
|
{
|
|
if( self ) {
|
|
cleanup_stack( self );
|
|
|
|
webvtt_release_string( &self->line_buffer );
|
|
webvtt_free( self );
|
|
}
|
|
}
|
|
|
|
#define BEGIN_STATE(State) case State: {
|
|
#define END_STATE } break;
|
|
#define IF_TOKEN(Token,Actions) case Token: { Actions } break;
|
|
#define BEGIN_DFA switch(top->state) {
|
|
#define END_DFA }
|
|
#define BEGIN_TOKEN switch(token) {
|
|
#define END_TOKEN }
|
|
#define IF_TRANSITION(Token,State) if( token == Token ) { self->state = State;
|
|
#define ELIF_TRANSITION(Token,State) } else IF_TRANSITION(Token,State)
|
|
#define ENDIF }
|
|
#define ELSE } else {
|
|
|
|
static int
|
|
find_newline( const char *buffer, webvtt_uint *pos, webvtt_uint len )
|
|
{
|
|
while( *pos < len ) {
|
|
if( buffer[ *pos ] == '\r' || buffer[ *pos ] == '\n' ) {
|
|
return 1;
|
|
} else {
|
|
( *pos )++;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
static void
|
|
skip_spacetab( const char *text, webvtt_uint *pos, webvtt_uint len,
|
|
webvtt_uint *column )
|
|
{
|
|
webvtt_uint c = 0;
|
|
if( !column ) {
|
|
column = &c;
|
|
}
|
|
while( *pos < len ) {
|
|
char ch = text[ *pos ];
|
|
if( ch == ' ' || ch == '\t' ) {
|
|
++( *pos );
|
|
++( *column );
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
skip_until_white( const char *text, webvtt_uint *pos, webvtt_uint len,
|
|
webvtt_uint *column )
|
|
{
|
|
webvtt_uint c = 0;
|
|
if( !column ) {
|
|
column = &c;
|
|
}
|
|
while( *pos < len ) {
|
|
char ch = text[ *pos ];
|
|
if( ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' ) {
|
|
break;
|
|
} else {
|
|
int length = webvtt_utf8_length( text + *pos );
|
|
*pos += length;
|
|
++( *column );
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* basic strnstr-ish routine
|
|
*/
|
|
static webvtt_status
|
|
find_bytes( const char *buffer, webvtt_uint len,
|
|
const char *sbytes, webvtt_uint slen )
|
|
{
|
|
webvtt_uint slen2;
|
|
// check params for integrity
|
|
if( !buffer || len < 1 || !sbytes || slen < 1 ) {
|
|
return WEBVTT_INVALID_PARAM;
|
|
}
|
|
|
|
slen2 = slen - 1;
|
|
while( len-- >= slen && *buffer ){
|
|
if( *buffer == *sbytes && memcmp( buffer + 1, sbytes + 1, slen2 ) == 0 ) {
|
|
return WEBVTT_SUCCESS;
|
|
}
|
|
buffer++;
|
|
}
|
|
|
|
return WEBVTT_NO_MATCH_FOUND;
|
|
}
|
|
|
|
/**
|
|
* Helpers to figure out what state we're on
|
|
*/
|
|
#define SP (self->top)
|
|
#define AT_BOTTOM (self->top == self->stack)
|
|
#define ON_HEAP (self->stack_alloc == sizeof(p->astack) / sizeof(p->astack[0]))
|
|
#define STACK_SIZE ((webvtt_uint)(self->top - self->stack))
|
|
#define FRAME(i) (self->top - (i))
|
|
#define FRAMEUP(i) (self->top + (i))
|
|
#define RECHECK goto _recheck;
|
|
#define BACK (SP->back)
|
|
/**
|
|
* More state stack helpers
|
|
*/
|
|
WEBVTT_INTERN webvtt_status
|
|
do_push( webvtt_parser self, webvtt_uint token, webvtt_uint back,
|
|
webvtt_uint state, void *data, webvtt_state_value_type type,
|
|
webvtt_uint line, webvtt_uint column )
|
|
{
|
|
if( STACK_SIZE + 1 >= self->stack_alloc ) {
|
|
webvtt_state *stack =
|
|
( webvtt_state * )webvtt_alloc0( sizeof( webvtt_state ) *
|
|
( self->stack_alloc << 1 ) ), *tmp;
|
|
if( !stack ) {
|
|
ERROR( WEBVTT_ALLOCATION_FAILED );
|
|
return WEBVTT_OUT_OF_MEMORY;
|
|
}
|
|
memcpy( stack, self->stack, sizeof( webvtt_state ) * self->stack_alloc );
|
|
tmp = self->stack;
|
|
self->stack = stack;
|
|
self->top = stack + ( self->top - tmp );
|
|
if( tmp != self->astack ) {
|
|
webvtt_free( tmp );
|
|
}
|
|
}
|
|
++self->top;
|
|
self->top->state = state;
|
|
self->top->flags = 0;
|
|
self->top->type = type;
|
|
self->top->token = ( webvtt_token )token;
|
|
self->top->line = line;
|
|
self->top->back = back;
|
|
self->top->column = column;
|
|
self->top->v.cue = ( webvtt_cue * )data;
|
|
return WEBVTT_SUCCESS;
|
|
}
|
|
static int
|
|
do_pop( webvtt_parser self )
|
|
{
|
|
int count = self->top->back;
|
|
self->top -= count;
|
|
self->top->back = 0;
|
|
self->popped = 1;
|
|
return count;
|
|
}
|
|
|
|
#define PUSH0(S,V,T) \
|
|
do { \
|
|
self->popped = 0; \
|
|
if( do_push(self,token,BACK+1,(S),(void*)(V),T,last_line, last_column) \
|
|
== WEBVTT_OUT_OF_MEMORY ) \
|
|
return WEBVTT_OUT_OF_MEMORY; \
|
|
} while(0)
|
|
|
|
#define PUSH(S,B,V,T) \
|
|
do { \
|
|
self->popped = 0; \
|
|
if( do_push(self,token,(B),(S),(void*)(V),T,last_line, last_column) \
|
|
== WEBVTT_OUT_OF_MEMORY ) \
|
|
return WEBVTT_OUT_OF_MEMORY; \
|
|
} while(0)
|
|
|
|
#define POP() \
|
|
do \
|
|
{ \
|
|
--(self->top); \
|
|
self->popped = 1; \
|
|
} while(0)
|
|
#define POPBACK() do_pop(self)
|
|
|
|
static webvtt_status
|
|
webvtt_parse_cuesetting( webvtt_parser self, const char *text,
|
|
webvtt_uint *pos, webvtt_uint len, webvtt_error bv, webvtt_token
|
|
keyword, webvtt_token values[], webvtt_uint *value_column ) {
|
|
enum webvtt_param_mode
|
|
{
|
|
P_KEYWORD,
|
|
P_COLON,
|
|
P_VALUE
|
|
};
|
|
int i;
|
|
webvtt_bool precws = 0;
|
|
webvtt_bool prevws = 0;
|
|
static const webvtt_token value_tokens[] = {
|
|
INTEGER, RL, LR, START, MIDDLE, END, LEFT, RIGHT, PERCENTAGE, 0
|
|
};
|
|
static const webvtt_token keyword_tokens[] = {
|
|
ALIGN, SIZE, LINE, POSITION, VERTICAL, 0
|
|
};
|
|
enum webvtt_param_mode mode = P_KEYWORD;
|
|
webvtt_uint keyword_column = 0;
|
|
while( *pos < len ) {
|
|
webvtt_uint last_line = self->line;
|
|
webvtt_uint last_column = self->column;
|
|
webvtt_uint last_pos = *pos;
|
|
webvtt_token tk = webvtt_lex( self, text, pos, len, 1 );
|
|
webvtt_uint tp = self->token_pos;
|
|
self->token_pos = 0;
|
|
|
|
switch( mode ) {
|
|
case P_KEYWORD:
|
|
switch( tk ) {
|
|
case ALIGN:
|
|
case SIZE:
|
|
case POSITION:
|
|
case VERTICAL:
|
|
case LINE:
|
|
if( tk != keyword ) {
|
|
*pos -= tp;
|
|
self->column -= tp;
|
|
return WEBVTT_NEXT_CUESETTING;
|
|
}
|
|
if( *pos < len ) {
|
|
webvtt_uint column = last_column;
|
|
char ch = text[ *pos ];
|
|
if( ch != ':' ) {
|
|
webvtt_error e = WEBVTT_INVALID_CUESETTING;
|
|
if( ch == ' ' || ch == '\t' ) {
|
|
column = self->column;
|
|
e = WEBVTT_UNEXPECTED_WHITESPACE;
|
|
skip_spacetab( text, pos, len, &self->column );
|
|
if( text[ *pos ] == ':' ) {
|
|
skip_until_white( text, pos, len, &self->column );
|
|
}
|
|
} else {
|
|
skip_until_white( text, pos, len, &self->column );
|
|
}
|
|
ERROR_AT_COLUMN( e, column );
|
|
} else {
|
|
mode = P_COLON;
|
|
keyword_column = last_column;
|
|
}
|
|
} else {
|
|
ERROR_AT_COLUMN( WEBVTT_INVALID_CUESETTING, last_column );
|
|
}
|
|
break;
|
|
case WHITESPACE:
|
|
break;
|
|
case NEWLINE:
|
|
return WEBVTT_SUCCESS;
|
|
break;
|
|
default:
|
|
ERROR_AT( WEBVTT_INVALID_CUESETTING, last_line,
|
|
last_column );
|
|
*pos = *pos + tp + 1;
|
|
while( *pos < len && text[ *pos ] != ' ' && text[ *pos ] != '\t' ) {
|
|
if( text[ *pos ] == '\n' || text[ *pos ] == '\r' ) {
|
|
return WEBVTT_SUCCESS;
|
|
}
|
|
++( *pos );
|
|
++self->column;
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
case P_COLON:
|
|
if( tk == WHITESPACE && !precws ) {
|
|
ERROR_AT( WEBVTT_UNEXPECTED_WHITESPACE, last_line,
|
|
last_column
|
|
);
|
|
precws = 1;
|
|
} else if( tk == COLON ) {
|
|
mode = P_VALUE;
|
|
} else if( token_in_list( tk, value_tokens ) ) {
|
|
ERROR_AT( WEBVTT_MISSING_CUESETTING_DELIMITER, last_line,
|
|
last_column );
|
|
mode = P_VALUE;
|
|
goto get_value;
|
|
} else if( token_in_list( tk, keyword_tokens ) ) {
|
|
ERROR_AT( WEBVTT_INVALID_CUESETTING, last_line,
|
|
keyword_column );
|
|
} else {
|
|
ERROR_AT( WEBVTT_INVALID_CUESETTING_DELIMITER, last_line,
|
|
last_column );
|
|
*pos = last_pos + tp + 1;
|
|
}
|
|
break;
|
|
case P_VALUE:
|
|
get_value:
|
|
if( tk == WHITESPACE && !prevws ) {
|
|
ERROR_AT( WEBVTT_UNEXPECTED_WHITESPACE, last_line,
|
|
last_column );
|
|
} else if( ( i = find_token( tk, values ) ) >= 0 ) {
|
|
webvtt_token t = values[ i ] & TF_TOKEN_MASK;
|
|
int flags = values[ i ] & TF_FLAGS_MASK;
|
|
*value_column = last_column;
|
|
if( *pos < len ) {
|
|
char ch = text[ *pos ];
|
|
if( ch != ' ' && ch != '\t' && ch != '\r' && ch != '\n' ) {
|
|
goto bad_value;
|
|
}
|
|
}
|
|
switch( t ) {
|
|
case INTEGER:
|
|
case PERCENTAGE:
|
|
if( ( flags & TF_SIGN_MASK ) != TF_SIGN_MASK ) {
|
|
const char p = self->token[ 0 ];
|
|
if( ( ( flags & TF_NEGATIVE ) && p != '-' )
|
|
|| ( ( flags & TF_POSITIVE ) && p == '-' ) ) {
|
|
goto bad_value;
|
|
}
|
|
}
|
|
break;
|
|
|
|
default: /* Currently, other types don't need these checks */
|
|
break;
|
|
}
|
|
return i + 1;
|
|
} else {
|
|
bad_value:
|
|
ERROR_AT( bv, last_line, last_column );
|
|
bad_value_eol:
|
|
while( *pos < len && text[ *pos ] != ' ' && text[ *pos ] != '\t' ) {
|
|
if( text[ *pos ] == '\n' || text[ *pos ] == '\r' ) {
|
|
return WEBVTT_SUCCESS;
|
|
}
|
|
++( *pos );
|
|
++self->column;
|
|
}
|
|
if( *pos >= len ) {
|
|
return WEBVTT_SUCCESS;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
if( mode == P_VALUE && *pos >= len ) {
|
|
ERROR( bv );
|
|
goto bad_value_eol;
|
|
}
|
|
return WEBVTT_NEXT_CUESETTING;
|
|
}
|
|
|
|
WEBVTT_INTERN webvtt_status
|
|
webvtt_parse_align( webvtt_parser self, webvtt_cue *cue,
|
|
const char *text, webvtt_uint *pos, webvtt_uint len )
|
|
{
|
|
webvtt_uint last_line = self->line;
|
|
webvtt_uint last_column = self->column;
|
|
webvtt_status v;
|
|
webvtt_uint vc;
|
|
webvtt_token tokens[] = { START, MIDDLE, END, LEFT, RIGHT, 0 };
|
|
webvtt_align_type values[] = {
|
|
WEBVTT_ALIGN_START, WEBVTT_ALIGN_MIDDLE, WEBVTT_ALIGN_END,
|
|
WEBVTT_ALIGN_LEFT, WEBVTT_ALIGN_RIGHT
|
|
};
|
|
if( ( v = webvtt_parse_cuesetting( self, text, pos, len,
|
|
WEBVTT_ALIGN_BAD_VALUE, ALIGN, tokens, &vc ) ) > 0 ) {
|
|
if( cue->flags & CUE_HAVE_ALIGN ) {
|
|
ERROR_AT( WEBVTT_ALIGN_ALREADY_SET, last_line, last_column );
|
|
}
|
|
cue->flags |= CUE_HAVE_ALIGN;
|
|
cue->settings.align = values[ v - 1 ];
|
|
}
|
|
return v >= 0 ? WEBVTT_SUCCESS : v;
|
|
}
|
|
|
|
WEBVTT_INTERN webvtt_status
|
|
webvtt_parse_line( webvtt_parser self, webvtt_cue *cue, const char *text,
|
|
webvtt_uint *pos, webvtt_uint len )
|
|
{
|
|
webvtt_uint last_line = self->line;
|
|
webvtt_uint last_column = self->column;
|
|
webvtt_status v;
|
|
webvtt_uint vc;
|
|
webvtt_bool first_flag = 0;
|
|
webvtt_token values[] = { INTEGER, PERCENTAGE|TF_POSITIVE, 0 };
|
|
if( ( v = webvtt_parse_cuesetting( self, text, pos, len,
|
|
WEBVTT_LINE_BAD_VALUE, LINE, values, &vc ) ) > 0 ) {
|
|
int digits;
|
|
webvtt_int64 value;
|
|
const char *t = self->token;
|
|
if( cue->flags & CUE_HAVE_LINE ) {
|
|
ERROR_AT( WEBVTT_LINE_ALREADY_SET, last_line, last_column );
|
|
} else {
|
|
first_flag = 1;
|
|
}
|
|
cue->flags |= CUE_HAVE_LINE;
|
|
value = parse_int( &t, &digits );
|
|
switch( values[ v - 1 ] & TF_TOKEN_MASK ) {
|
|
case INTEGER: {
|
|
cue->snap_to_lines = 1;
|
|
cue->settings.line = ( int )value;
|
|
}
|
|
break;
|
|
|
|
case PERCENTAGE: {
|
|
if( value < 0 || value > 100 ) {
|
|
if( first_flag ) {
|
|
cue->flags &= ~CUE_HAVE_LINE;
|
|
}
|
|
ERROR_AT_COLUMN( WEBVTT_LINE_BAD_VALUE, vc );
|
|
return WEBVTT_SUCCESS;
|
|
}
|
|
cue->snap_to_lines = 0;
|
|
cue->settings.line = ( int )value;
|
|
} break;
|
|
}
|
|
}
|
|
return v >= 0 ? WEBVTT_SUCCESS : v;
|
|
}
|
|
|
|
WEBVTT_INTERN webvtt_status
|
|
webvtt_parse_position( webvtt_parser self, webvtt_cue *cue,
|
|
const char *text, webvtt_uint *pos,
|
|
webvtt_uint len )
|
|
{
|
|
webvtt_uint last_line = self->line;
|
|
webvtt_uint last_column = self->column;
|
|
webvtt_status v;
|
|
webvtt_uint vc;
|
|
webvtt_bool first_flag = 0;
|
|
webvtt_token values[] = { PERCENTAGE|TF_POSITIVE, 0 };
|
|
if( ( v = webvtt_parse_cuesetting( self, text, pos, len,
|
|
WEBVTT_POSITION_BAD_VALUE, POSITION, values, &vc ) ) > 0 ) {
|
|
int digits;
|
|
webvtt_int64 value;
|
|
const char *t = self->token;
|
|
if( cue->flags & CUE_HAVE_LINE ) {
|
|
ERROR_AT( WEBVTT_POSITION_ALREADY_SET, last_line, last_column );
|
|
} else {
|
|
first_flag = 1;
|
|
}
|
|
cue->flags |= CUE_HAVE_POSITION;
|
|
value = parse_int( &t, &digits );
|
|
if( value < 0 || value > 100 ) {
|
|
if( first_flag ) {
|
|
cue->flags &= ~CUE_HAVE_POSITION;
|
|
}
|
|
ERROR_AT_COLUMN( WEBVTT_POSITION_BAD_VALUE, vc );
|
|
return WEBVTT_SUCCESS;
|
|
}
|
|
cue->settings.position = ( int )value;
|
|
}
|
|
return v >= 0 ? WEBVTT_SUCCESS : v;
|
|
}
|
|
|
|
WEBVTT_INTERN webvtt_status
|
|
webvtt_parse_size( webvtt_parser self, webvtt_cue *cue, const char *text,
|
|
webvtt_uint *pos, webvtt_uint len )
|
|
{
|
|
webvtt_uint last_line = self->line;
|
|
webvtt_uint last_column = self->column;
|
|
webvtt_status v;
|
|
webvtt_uint vc;
|
|
webvtt_token tokens[] = { PERCENTAGE|TF_POSITIVE, 0 };
|
|
if( ( v = webvtt_parse_cuesetting( self, text, pos, len,
|
|
WEBVTT_SIZE_BAD_VALUE, SIZE, tokens, &vc ) ) > 0 ) {
|
|
if( cue->flags & CUE_HAVE_SIZE ) {
|
|
ERROR_AT( WEBVTT_SIZE_ALREADY_SET, last_line, last_column );
|
|
}
|
|
cue->flags |= CUE_HAVE_SIZE;
|
|
if( tokens[ v - 1 ] ) {
|
|
int digits;
|
|
const char *t = self->token;
|
|
webvtt_int64 value;
|
|
self->token_pos = 0;
|
|
value = parse_int( &t, &digits );
|
|
if( value < 0 || value > 100 ) {
|
|
ERROR_AT_COLUMN( WEBVTT_SIZE_BAD_VALUE, vc );
|
|
} else {
|
|
cue->settings.size = ( int )value;
|
|
}
|
|
}
|
|
}
|
|
return v >= 0 ? WEBVTT_SUCCESS : v;
|
|
}
|
|
|
|
WEBVTT_INTERN webvtt_status
|
|
webvtt_parse_vertical( webvtt_parser self, webvtt_cue *cue,
|
|
const char *text, webvtt_uint *pos,
|
|
webvtt_uint len )
|
|
{
|
|
webvtt_uint last_line = self->line;
|
|
webvtt_uint last_column = self->column;
|
|
webvtt_status v;
|
|
webvtt_uint vc;
|
|
webvtt_token tokens[] = { RL, LR, 0 };
|
|
webvtt_vertical_type values[] = { WEBVTT_VERTICAL_RL, WEBVTT_VERTICAL_LR };
|
|
if( ( v = webvtt_parse_cuesetting( self, text, pos, len,
|
|
WEBVTT_VERTICAL_BAD_VALUE, VERTICAL, tokens, &vc ) ) > 0 ) {
|
|
if( cue->flags & CUE_HAVE_VERTICAL ) {
|
|
ERROR_AT( WEBVTT_VERTICAL_ALREADY_SET, last_line, last_column );
|
|
}
|
|
cue->flags |= CUE_HAVE_VERTICAL;
|
|
cue->settings.vertical = values[ v - 1 ];
|
|
}
|
|
return v >= 0 ? WEBVTT_SUCCESS : v;
|
|
}
|
|
/**
|
|
* Read a timestamp into 'result' field, following the rules of the cue-times
|
|
* section of the draft:
|
|
* http://dev.w3.org/html5/webvtt/#collect-webvtt-cue-timings-and-settings
|
|
*
|
|
* - Ignore whitespace
|
|
* - Return immediately after having parsed a timestamp
|
|
* - Return error if timestamp invalid.
|
|
*/
|
|
WEBVTT_INTERN webvtt_status
|
|
webvtt_get_timestamp( webvtt_parser self, webvtt_timestamp *result,
|
|
const char *text, webvtt_uint *pos,
|
|
webvtt_uint len, const char *accepted )
|
|
{
|
|
webvtt_uint last_column = self->column;
|
|
webvtt_uint last_line = self->line;
|
|
webvtt_token token;
|
|
while( *pos < len ) {
|
|
last_column = self->column;
|
|
last_line = self->line;
|
|
token = webvtt_lex( self, text, pos, len, 1 );
|
|
self->token_pos = 0;
|
|
|
|
if( token == TIMESTAMP ) {
|
|
if( *pos < len && text[ *pos ] != '\r' && text[ *pos ] != '\n' &&
|
|
text[ *pos ] != ' ' && text[ *pos ] != '\t' ) {
|
|
if( accepted == 0 || !strchr( accepted, text[ *pos ] ) ) {
|
|
ERROR_AT( WEBVTT_EXPECTED_TIMESTAMP, last_line, last_column );
|
|
return WEBVTT_PARSE_ERROR;
|
|
}
|
|
}
|
|
|
|
if( !parse_timestamp( self->token, result ) ) {
|
|
/* Read a bad timestamp, throw away and abort cue */
|
|
ERROR_AT( WEBVTT_MALFORMED_TIMESTAMP, last_line, last_column );
|
|
if( BAD_TIMESTAMP( *result ) ) {
|
|
return WEBVTT_PARSE_ERROR;
|
|
}
|
|
}
|
|
return WEBVTT_SUCCESS;
|
|
}
|
|
else if( token == WHITESPACE ) {
|
|
/* Ignore whitespace */
|
|
} else {
|
|
/* Not a timestamp, this is an error. */
|
|
ERROR_AT( WEBVTT_EXPECTED_TIMESTAMP, last_line, last_column );
|
|
return WEBVTT_PARSE_ERROR;
|
|
}
|
|
}
|
|
|
|
/* If we finish entire line without reading a timestamp,
|
|
this is not a proper cue header and should be discarded. */
|
|
ERROR_AT( WEBVTT_EXPECTED_TIMESTAMP, last_line, last_column );
|
|
return WEBVTT_PARSE_ERROR;
|
|
}
|
|
|
|
WEBVTT_INTERN webvtt_status
|
|
webvtt_proc_cueline( webvtt_parser self, webvtt_cue *cue,
|
|
webvtt_string *line )
|
|
{
|
|
const char *text;
|
|
webvtt_uint length;
|
|
DIE_IF( line == NULL );
|
|
length = webvtt_string_length( line );
|
|
text = webvtt_string_text( line );
|
|
/* backup the column */
|
|
self->column = 1;
|
|
if( find_bytes( text, length, separator, sizeof( separator ) )
|
|
== WEBVTT_SUCCESS) {
|
|
/* It's not a cue id, we found '-->'. It can't be a second
|
|
cueparams line, because if we had it, we would be in
|
|
a different state. */
|
|
int v;
|
|
self->cuetext_line = self->line + 1;
|
|
if( ( v = parse_cueparams( self, text, length, cue ) ) < 0 ) {
|
|
if( v == WEBVTT_PARSE_ERROR ) {
|
|
return WEBVTT_PARSE_ERROR;
|
|
}
|
|
self->mode = M_SKIP_CUE;
|
|
} else {
|
|
cue->flags |= CUE_HAVE_CUEPARAMS;
|
|
self->mode = M_CUETEXT;
|
|
}
|
|
} else {
|
|
/* It is a cue-id */
|
|
if( cue && cue->flags & CUE_HAVE_ID ) {
|
|
/**
|
|
* This isn't actually a cue-id, because we already
|
|
* have one. It seems to be cuetext, which is occurring
|
|
* before cue-params
|
|
*/
|
|
webvtt_release_string( line );
|
|
ERROR( WEBVTT_CUE_INCOMPLETE );
|
|
self->mode = M_SKIP_CUE;
|
|
return WEBVTT_SUCCESS;
|
|
} else {
|
|
webvtt_uint last_column = self->column;
|
|
webvtt_uint last_line = self->line;
|
|
webvtt_token token = UNFINISHED;
|
|
self->column += length;
|
|
self->cuetext_line = self->line;
|
|
if( WEBVTT_FAILED( webvtt_string_append( &cue->id, text,
|
|
length ) ) ) {
|
|
webvtt_release_string( line );
|
|
ERROR( WEBVTT_ALLOCATION_FAILED );
|
|
return WEBVTT_OUT_OF_MEMORY;
|
|
}
|
|
cue->flags |= CUE_HAVE_ID;
|
|
|
|
/* Read cue-params line */
|
|
PUSH0( T_CUEREAD, 0, V_NONE );
|
|
webvtt_init_string( &SP->v.text );
|
|
SP->type = V_TEXT;
|
|
}
|
|
}
|
|
|
|
webvtt_release_string( line );
|
|
return WEBVTT_SUCCESS;
|
|
}
|
|
|
|
WEBVTT_INTERN int
|
|
parse_cueparams( webvtt_parser self, const char *buffer,
|
|
webvtt_uint len, webvtt_cue *cue )
|
|
{
|
|
webvtt_uint pos = 0;
|
|
|
|
enum cp_state {
|
|
CP_STARTTIME = 0, /* Start timestamp */
|
|
CP_SEPARATOR, /* --> */
|
|
CP_ENDTIME, /* End timestamp */
|
|
|
|
CP_SETTING, /* Cuesetting */
|
|
};
|
|
|
|
enum cp_state state = CP_STARTTIME;
|
|
|
|
#define SETST(X) do { baddelim = 0; state = (X); } while( 0 )
|
|
|
|
self->token_pos = 0;
|
|
while( pos < len ) {
|
|
webvtt_uint last_column;
|
|
webvtt_uint last_line;
|
|
webvtt_token token;
|
|
webvtt_uint token_len;
|
|
|
|
switch( state ) {
|
|
/* start timestamp */
|
|
case CP_STARTTIME:
|
|
if( WEBVTT_FAILED( webvtt_get_timestamp( self, &cue->from, buffer, &pos,
|
|
len, "-" ) ) ) {
|
|
/* abort cue */
|
|
return -1;
|
|
}
|
|
|
|
state = CP_SEPARATOR;
|
|
break;
|
|
|
|
/* '-->' separator */
|
|
case CP_SEPARATOR:
|
|
last_column = self->column;
|
|
last_line = self->line;
|
|
token = webvtt_lex( self, buffer, &pos, len, 1 );
|
|
self->token_pos = 0;
|
|
if( token == SEPARATOR ) {
|
|
/* Expect end timestamp */
|
|
state = CP_ENDTIME;
|
|
} else if( token == WHITESPACE ) {
|
|
/* ignore whitespace */
|
|
} else {
|
|
/* Unexpected token. Abort cue. */
|
|
ERROR_AT( WEBVTT_EXPECTED_CUETIME_SEPARATOR, last_line, last_column );
|
|
return -1;
|
|
}
|
|
break;
|
|
|
|
/* end timestamp */
|
|
case CP_ENDTIME:
|
|
if( WEBVTT_FAILED( webvtt_get_timestamp( self, &cue->until, buffer,
|
|
&pos, len, 0 ) ) ) {
|
|
/* abort cue */
|
|
return -1;
|
|
}
|
|
|
|
/* Expect cuesetting */
|
|
state = CP_SETTING;
|
|
break;
|
|
|
|
|
|
default:
|
|
/**
|
|
* Most of these branches need to read a token for the time
|
|
* being, so they're grouped together here.
|
|
*
|
|
* TODO: Remove need to call lexer at all here.
|
|
*/
|
|
last_column = self->column;
|
|
last_line = self->line;
|
|
token = webvtt_lex( self, buffer, &pos, len, 1 );
|
|
token_len = self->token_pos;
|
|
self->token_pos = 0;
|
|
|
|
switch( token ) {
|
|
case NEWLINE:
|
|
return 0;
|
|
case WHITESPACE:
|
|
continue;
|
|
|
|
case VERTICAL:
|
|
{
|
|
webvtt_status status;
|
|
pos -= token_len; /* Required for parse_vertical() */
|
|
self->column = last_column; /* Reset for parse_vertical() */
|
|
status = webvtt_parse_vertical( self, cue, buffer, &pos, len );
|
|
if( status == WEBVTT_PARSE_ERROR ) {
|
|
return WEBVTT_PARSE_ERROR;
|
|
}
|
|
}
|
|
break;
|
|
|
|
|
|
case POSITION:
|
|
{
|
|
webvtt_status status;
|
|
pos -= token_len; /* Required for parse_position() */
|
|
self->column = last_column; /* Reset for parse_position() */
|
|
status = webvtt_parse_position( self, cue, buffer, &pos, len );
|
|
if( status == WEBVTT_PARSE_ERROR ) {
|
|
return WEBVTT_PARSE_ERROR;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case ALIGN:
|
|
{
|
|
webvtt_status status;
|
|
pos -= token_len; /* Required for parse_align() */
|
|
self->column = last_column; /* Reset for parse_align() */
|
|
status = webvtt_parse_align( self, cue, buffer, &pos, len );
|
|
if( status == WEBVTT_PARSE_ERROR ) {
|
|
return WEBVTT_PARSE_ERROR;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case SIZE:
|
|
{
|
|
webvtt_status status;
|
|
pos -= token_len; /* Required for parse_size() */
|
|
self->column = last_column; /* Reset for parse_size() */
|
|
status = webvtt_parse_size( self, cue, buffer, &pos, len );
|
|
if( status == WEBVTT_PARSE_ERROR ) {
|
|
return WEBVTT_PARSE_ERROR;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case LINE:
|
|
{
|
|
webvtt_status status;
|
|
pos -= token_len; /* Required for parse_line() */
|
|
self->column = last_column; /* Reset for parse_line() */
|
|
status = webvtt_parse_line( self, cue, buffer, &pos, len );
|
|
if( status == WEBVTT_PARSE_ERROR ) {
|
|
return WEBVTT_PARSE_ERROR;
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
ERROR_AT_COLUMN( WEBVTT_INVALID_CUESETTING, last_column );
|
|
while( pos < len && buffer[pos] != '\t' && buffer[pos] != ' ' ) {
|
|
++pos;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
self->token_pos = 0;
|
|
}
|
|
/**
|
|
* If we didn't finish in a good state...
|
|
*/
|
|
if( state != CP_SETTING ) {
|
|
/* if we never made it to the cuesettings, we didn't finish the cuetimes */
|
|
if( state < CP_SETTING ) {
|
|
ERROR( WEBVTT_UNFINISHED_CUETIMES );
|
|
return -1;
|
|
} else {
|
|
/* if we did, we should report an error but continue parsing. */
|
|
webvtt_error e = WEBVTT_INVALID_CUESETTING;
|
|
ERROR( e );
|
|
}
|
|
}
|
|
#undef SETST
|
|
return 0;
|
|
}
|
|
|
|
static webvtt_status
|
|
parse_webvtt( webvtt_parser self, const char *buffer, webvtt_uint *ppos,
|
|
webvtt_uint len, int finish )
|
|
{
|
|
webvtt_status status = WEBVTT_SUCCESS;
|
|
webvtt_token token = 0;
|
|
webvtt_uint pos = *ppos;
|
|
int skip_error = 0;
|
|
|
|
while( pos < len ) {
|
|
webvtt_uint last_column, last_line;
|
|
skip_error = 0;
|
|
last_column = self->column;
|
|
last_line = self->line;
|
|
|
|
/**
|
|
* If we're in certain states, we don't want to get a token and just
|
|
* want to read text instead.
|
|
*/
|
|
if( SP->state == T_CUEREAD ) {
|
|
DIE_IF( SP->type != V_TEXT );
|
|
if( SP->flags == 0 ) {
|
|
int v;
|
|
if( ( v = webvtt_string_getline( &SP->v.text, buffer, &pos, len, 0,
|
|
finish ) ) ) {
|
|
if( v < 0 ) {
|
|
webvtt_release_string( &SP->v.text );
|
|
SP->type = V_NONE;
|
|
POP();
|
|
ERROR( WEBVTT_ALLOCATION_FAILED );
|
|
status = WEBVTT_OUT_OF_MEMORY;
|
|
goto _finish;
|
|
}
|
|
/* replace '\0' with u+fffd */
|
|
if( WEBVTT_FAILED( status = webvtt_string_replace_all( &SP->v.text,
|
|
"\0", 1,
|
|
replacement,
|
|
3 ) ) ) {
|
|
webvtt_release_string( &SP->v.text );
|
|
SP->type = V_NONE;
|
|
POP();
|
|
ERROR( WEBVTT_ALLOCATION_FAILED );
|
|
goto _finish;
|
|
}
|
|
SP->flags = 1;
|
|
}
|
|
}
|
|
if( SP->flags ) {
|
|
webvtt_token token = webvtt_lex_newline( self, buffer, &pos, len,
|
|
self->finished );
|
|
if( token == NEWLINE ) {
|
|
POP();
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get the next token from the stream
|
|
*
|
|
* If the token is 'UNFINISHED', but we are at the end of our input
|
|
* data, change it to BADTOKEN because it will never be finished.
|
|
*
|
|
* Otherwise, if we are expecting further data at some point, and have
|
|
* an unfinished token, return and let the next chunk deal with it.
|
|
*/
|
|
if( SP->state != T_CUE ||
|
|
!( self->popped && FRAMEUP( 1 )->state == T_CUEREAD ) ) {
|
|
/**
|
|
* We don't tokenize in certain states
|
|
*/
|
|
token = webvtt_lex( self, buffer, &pos, len, finish );
|
|
if( token == UNFINISHED ) {
|
|
if( finish ) {
|
|
token = BADTOKEN;
|
|
} else if( pos == len ) {
|
|
goto _finish;
|
|
}
|
|
}
|
|
}
|
|
_recheck:
|
|
switch( SP->state ) {
|
|
default:
|
|
/* Should never happen */
|
|
break;
|
|
|
|
case T_INITIAL:
|
|
/**
|
|
* In the initial state:
|
|
* We should have WEBVTT as the first token returned,
|
|
* otherwise this isn't really a valid file.
|
|
*
|
|
* If we get 'WEBVTT', push us into the TAG state, where we
|
|
* check for a tag comment (arbitrary text following a whitespace
|
|
* after the WEBVTT token) until a newline
|
|
*
|
|
* If WEBVTT is not the first token, then report error and
|
|
* abort parsing.
|
|
*/
|
|
if( token == WEBVTT ) {
|
|
PUSH0( T_TAG, 0, V_NONE );
|
|
break;
|
|
} else if( token != UNFINISHED ) {
|
|
ERROR_AT( WEBVTT_MALFORMED_TAG, 1, 1 );
|
|
status = WEBVTT_PARSE_ERROR;
|
|
goto _finish;
|
|
}
|
|
break;
|
|
|
|
case T_TAG:
|
|
/**
|
|
* If we have a WHITESPACE following the WEBVTT token,
|
|
* switch to T_TAGCOMMENT state and skip the comment.
|
|
* Otherwise, if it's a NEWLINE, we can just skip to
|
|
* the T_BODY state.
|
|
*
|
|
* Otherwise, we didn't actually have a WEBVTT token,
|
|
* and should feel ashamed.
|
|
*/
|
|
if( token == WHITESPACE ) {
|
|
/* switch to comment skipper */
|
|
PUSH0( T_TAGCOMMENT, 0, V_NONE );
|
|
} else if( token == NEWLINE ) {
|
|
/* switch to NEWLINE counter */
|
|
POPBACK();
|
|
self->popped = 0;
|
|
SP->state = T_BODY;
|
|
PUSH0( T_EOL, 1, V_INTEGER );
|
|
break;
|
|
} else {
|
|
/**
|
|
* This wasn't preceded by an actual WEBVTT token, it's more
|
|
* like WEBVTTasdasd, which is not valid. Report an error,
|
|
* which should be considered fatal.
|
|
*/
|
|
if( !skip_error ) {
|
|
ERROR_AT_COLUMN( WEBVTT_MALFORMED_TAG, FRAME( 1 )->column );
|
|
skip_error = 1;
|
|
status = WEBVTT_PARSE_ERROR;
|
|
goto _finish;
|
|
}
|
|
}
|
|
break;
|
|
|
|
/**
|
|
* COMMENT -- Read until EOL, ignore everything else
|
|
*/
|
|
case T_TAGCOMMENT:
|
|
switch( token ) {
|
|
case NEWLINE:
|
|
/**
|
|
* If we encounter a newline, switch to NEWLINE mode,
|
|
* and set up so that when we POPBACK() we are in the
|
|
* T_BODY state.
|
|
*/
|
|
POPBACK();
|
|
PUSH0( T_EOL, 1, V_INTEGER );
|
|
break;
|
|
default:
|
|
find_newline( buffer, &pos, len );
|
|
continue;
|
|
}
|
|
break;
|
|
|
|
case T_CUEID:
|
|
switch( token ) {
|
|
/**
|
|
* We're only really expecting a newline here --
|
|
* The cue id should have been read already
|
|
*/
|
|
case NEWLINE:
|
|
SP->state = T_FROM;
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
/**
|
|
* Count EOLs, POP when finished
|
|
*/
|
|
case T_EOL:
|
|
switch( token ) {
|
|
case NEWLINE:
|
|
SP->v.value++;
|
|
break;
|
|
default:
|
|
POPBACK();
|
|
RECHECK
|
|
}
|
|
break;
|
|
|
|
case T_BODY:
|
|
if( self->popped && FRAMEUP( 1 )->state == T_EOL ) {
|
|
if( FRAMEUP( 1 )->v.value < 2 ) {
|
|
ERROR_AT_COLUMN( WEBVTT_EXPECTED_EOL, 1 );
|
|
}
|
|
FRAMEUP( 1 )->state = 0;
|
|
FRAMEUP( 1 )->v.cue = NULL;
|
|
}
|
|
if( token == NOTE ) {
|
|
PUSH0( T_COMMENT, 0, V_NONE );
|
|
} else if( token != NEWLINE ) {
|
|
webvtt_cue *cue = 0;
|
|
webvtt_string tk = { 0 };
|
|
if( WEBVTT_FAILED( status = webvtt_create_cue( &cue ) ) ) {
|
|
if( status == WEBVTT_OUT_OF_MEMORY ) {
|
|
ERROR( WEBVTT_ALLOCATION_FAILED );
|
|
}
|
|
goto _finish;
|
|
}
|
|
if( WEBVTT_FAILED( status = webvtt_create_string_with_text( &tk,
|
|
self->token, self->token_pos ) ) ) {
|
|
if( status == WEBVTT_OUT_OF_MEMORY ) {
|
|
ERROR( WEBVTT_ALLOCATION_FAILED );
|
|
}
|
|
webvtt_release_cue( &cue );
|
|
goto _finish;
|
|
}
|
|
PUSH0( T_CUE, cue, V_CUE );
|
|
PUSH0( T_CUEREAD, 0, V_TEXT );
|
|
SP->v.text.d = tk.d;
|
|
}
|
|
break;
|
|
|
|
|
|
case T_CUE:
|
|
{
|
|
webvtt_cue *cue;
|
|
webvtt_state *st;
|
|
webvtt_string text;
|
|
SAFE_ASSERT( self->popped && FRAMEUP( 1 )->state == T_CUEREAD );
|
|
/**
|
|
* We're expecting either cue-id (contains '-->') or cue
|
|
* params
|
|
*/
|
|
cue = SP->v.cue;
|
|
st = FRAMEUP( 1 );
|
|
text.d = st->v.text.d;
|
|
|
|
st->type = V_NONE;
|
|
st->v.cue = NULL;
|
|
|
|
/* FIXME: guard inconsistent state */
|
|
if (!cue) {
|
|
ERROR( WEBVTT_PARSE_ERROR );
|
|
status = WEBVTT_PARSE_ERROR;
|
|
goto _finish;
|
|
}
|
|
|
|
status = webvtt_proc_cueline( self, cue, &text );
|
|
++self->line;
|
|
if( self->mode != M_WEBVTT ) {
|
|
goto _finish;
|
|
}
|
|
self->popped = 0;
|
|
}
|
|
break;
|
|
}
|
|
|
|
/**
|
|
* reset token pos
|
|
*/
|
|
self->token_pos = 0;
|
|
}
|
|
|
|
|
|
_finish:
|
|
if( status == WEBVTT_OUT_OF_MEMORY ) {
|
|
cleanup_stack( self );
|
|
}
|
|
*ppos = pos;
|
|
return status;
|
|
}
|
|
|
|
WEBVTT_INTERN webvtt_status
|
|
webvtt_read_cuetext( webvtt_parser self, const char *b,
|
|
webvtt_uint *ppos, webvtt_uint len, webvtt_bool finish )
|
|
{
|
|
webvtt_status status = WEBVTT_SUCCESS;
|
|
webvtt_uint pos = *ppos;
|
|
int finished = 0;
|
|
int flags = 0;
|
|
webvtt_cue *cue;
|
|
|
|
/* Ensure that we have a cue to work with */
|
|
SAFE_ASSERT( self->top->type = V_CUE );
|
|
cue = self->top->v.cue;
|
|
|
|
/**
|
|
* Hack to support lines spanning multiple buffers.
|
|
*
|
|
* TODO: Do this some better way. This is not good!
|
|
*/
|
|
if( self->line_buffer.d != 0 && self->line_buffer.d->text[
|
|
self->line_buffer.d->length - 1 ] == '\n' ) {
|
|
flags = 1;
|
|
}
|
|
|
|
do {
|
|
if( !flags ) {
|
|
int v;
|
|
if( ( v = webvtt_string_getline( &self->line_buffer, b, &pos, len,
|
|
&self->truncate, finish ) ) ) {
|
|
if( v < 0 || WEBVTT_FAILED( webvtt_string_putc( &self->line_buffer,
|
|
'\n' ) ) ) {
|
|
ERROR( WEBVTT_ALLOCATION_FAILED );
|
|
status = WEBVTT_OUT_OF_MEMORY;
|
|
goto _finish;
|
|
}
|
|
/* replace '\0' with u+fffd */
|
|
if( WEBVTT_FAILED( status =
|
|
webvtt_string_replace_all( &self->line_buffer,
|
|
"\0", 1, replacement,
|
|
3 ) ) ) {
|
|
ERROR( WEBVTT_ALLOCATION_FAILED );
|
|
goto _finish;
|
|
}
|
|
|
|
flags = 1;
|
|
}
|
|
}
|
|
if( flags ) {
|
|
webvtt_token token = webvtt_lex_newline( self, b, &pos, len, finish );
|
|
if( token == NEWLINE ) {
|
|
self->token_pos = 0;
|
|
self->line++;
|
|
|
|
/* Remove the '\n' that we appended to determine that we're in state 1
|
|
*/
|
|
self->line_buffer.d->text[ --self->line_buffer.d->length ] = 0;
|
|
/**
|
|
* We've encountered a line without any cuetext on it, i.e. there is no
|
|
* newline character and len is 0 or there is and len is 1, therefore,
|
|
* the cue text is finished.
|
|
*/
|
|
if( self->line_buffer.d->length == 0 ) {
|
|
webvtt_release_string( &self->line_buffer );
|
|
finished = 1;
|
|
} else if( find_bytes( webvtt_string_text( &self->line_buffer ),
|
|
webvtt_string_length( &self->line_buffer ), separator,
|
|
sizeof( separator ) ) == WEBVTT_SUCCESS ) {
|
|
/**
|
|
* Line contains cue-times separator, and thus we treat it as a
|
|
* separate cue. Trick program into thinking that T_CUEREAD had read
|
|
* this line.
|
|
*/
|
|
do_push( self, 0, 0, T_CUEREAD, 0, V_NONE, self->line, self->column );
|
|
webvtt_copy_string( &SP->v.text, &self->line_buffer );
|
|
webvtt_release_string( &self->line_buffer );
|
|
SP->type = V_TEXT;
|
|
POP();
|
|
finished = 1;
|
|
} else {
|
|
/**
|
|
* If it's not the end of a cue, simply append it to the cue's payload
|
|
* text.
|
|
*/
|
|
if( webvtt_string_length( &cue->body ) &&
|
|
WEBVTT_FAILED( webvtt_string_putc( &cue->body, '\n' ) ) ) {
|
|
status = WEBVTT_OUT_OF_MEMORY;
|
|
goto _finish;
|
|
}
|
|
webvtt_string_append_string( &cue->body, &self->line_buffer );
|
|
webvtt_release_string( &self->line_buffer );
|
|
flags = 0;
|
|
}
|
|
}
|
|
}
|
|
} while( pos < len && !finished );
|
|
_finish:
|
|
*ppos = pos;
|
|
if( finish ) {
|
|
finished = 1;
|
|
}
|
|
|
|
/**
|
|
* If we didn't encounter 2 successive EOLs, and it's not the final buffer in
|
|
* the file, notify the caller.
|
|
*/
|
|
if( !finish && pos >= len && !WEBVTT_FAILED( status ) && !finished ) {
|
|
status = WEBVTT_UNFINISHED;
|
|
}
|
|
return status;
|
|
}
|
|
|
|
WEBVTT_INTERN webvtt_status
|
|
webvtt_proc_cuetext( webvtt_parser self, const char *b,
|
|
webvtt_uint *ppos, webvtt_uint len, webvtt_bool finish )
|
|
{
|
|
webvtt_status status;
|
|
webvtt_cue *cue;
|
|
SAFE_ASSERT( ( self->mode == M_CUETEXT || self->mode == M_SKIP_CUE )
|
|
&& self->top->type == V_CUE );
|
|
cue = self->top->v.cue;
|
|
SAFE_ASSERT( cue != 0 );
|
|
status = webvtt_read_cuetext( self, b, ppos, len, finish );
|
|
|
|
if( status == WEBVTT_SUCCESS ) {
|
|
if( self->mode != M_SKIP_CUE ) {
|
|
/**
|
|
* Once we've successfully read the cuetext into line_buffer, call the
|
|
* cuetext parser from cuetext.c
|
|
*/
|
|
status = webvtt_parse_cuetext( self, cue, &cue->body,
|
|
self->finished );
|
|
|
|
/**
|
|
* return the cue to the user, if possible.
|
|
*/
|
|
finish_cue( self, &cue );
|
|
} else {
|
|
webvtt_release_cue( &cue );
|
|
}
|
|
|
|
self->top->type = V_NONE;
|
|
self->top->state = 0;
|
|
self->top->v.cue = 0;
|
|
|
|
if( (self->top+1)->type == V_NONE ) {
|
|
(self->top+1)->state = 0;
|
|
/* Pop from T_CUE state */
|
|
POP();
|
|
} else {
|
|
/**
|
|
* If we found '-->', we need to create another cue and remain
|
|
* in T_CUE state
|
|
*/
|
|
webvtt_create_cue( &self->top->v.cue );
|
|
self->top->type = V_CUE;
|
|
self->top->state = T_CUE;
|
|
}
|
|
self->mode = M_WEBVTT;
|
|
}
|
|
return status;
|
|
}
|
|
|
|
WEBVTT_EXPORT webvtt_status
|
|
webvtt_parse_chunk( webvtt_parser self, const void *buffer, webvtt_uint len )
|
|
{
|
|
webvtt_status status;
|
|
webvtt_uint pos = 0;
|
|
const char *b = ( const char * )buffer;
|
|
|
|
while( pos < len ) {
|
|
switch( self->mode ) {
|
|
case M_WEBVTT:
|
|
if( WEBVTT_FAILED( status = parse_webvtt( self, b, &pos, len,
|
|
self->finished ) ) ) {
|
|
return status;
|
|
}
|
|
break;
|
|
|
|
case M_CUETEXT:
|
|
/**
|
|
* read in cuetext
|
|
*/
|
|
if( WEBVTT_FAILED( status = webvtt_proc_cuetext( self, b, &pos, len,
|
|
self->finished ) ) ) {
|
|
if( status == WEBVTT_UNFINISHED ) {
|
|
/* Make an exception here, because this isn't really a failure. */
|
|
return WEBVTT_SUCCESS;
|
|
}
|
|
return status;
|
|
}
|
|
|
|
/* If we failed to parse cuetext, return the error */
|
|
if( WEBVTT_FAILED( status ) ) {
|
|
return status;
|
|
}
|
|
break;
|
|
|
|
case M_SKIP_CUE:
|
|
if( WEBVTT_FAILED( status = webvtt_proc_cuetext( self, b, &pos, len,
|
|
self->finished ) ) ) {
|
|
return status;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
return WEBVTT_SUCCESS;
|
|
}
|
|
|
|
#undef SP
|
|
#undef AT_BOTTOM
|
|
#undef ON_HEAP
|
|
#undef STACK_SIZE
|
|
#undef FRAME
|
|
#undef PUSH
|
|
#undef POP
|
|
|
|
/**
|
|
* Get an integer value from a series of digits.
|
|
*/
|
|
static webvtt_int64
|
|
parse_int( const char **pb, int *pdigits )
|
|
{
|
|
int digits = 0;
|
|
webvtt_int64 result = 0;
|
|
webvtt_int64 mul = 1;
|
|
const char *b = *pb;
|
|
while( *b ) {
|
|
char ch = *b;
|
|
if( webvtt_isdigit( ch ) ) {
|
|
/**
|
|
* Digit character, carry on
|
|
*/
|
|
result = result * 10 + ( ch - '0' );
|
|
++digits;
|
|
} else if( mul == 1 && digits == 0 && ch == '-' ) {
|
|
mul = -1;
|
|
} else {
|
|
break;
|
|
}
|
|
++b;
|
|
}
|
|
*pb = b;
|
|
if( pdigits ) {
|
|
*pdigits = digits;
|
|
}
|
|
return result * mul;
|
|
}
|
|
|
|
/**
|
|
* Turn the token of a TIMESTAMP tag into something useful, and returns non-zero
|
|
* returns 0 if it fails
|
|
*/
|
|
WEBVTT_INTERN int
|
|
parse_timestamp( const char *b, webvtt_timestamp *result )
|
|
{
|
|
webvtt_int64 tmp;
|
|
int have_hours = 0;
|
|
int digits;
|
|
int malformed = 0;
|
|
webvtt_int64 v[4];
|
|
if ( !webvtt_isdigit( *b ) ) {
|
|
goto _malformed;
|
|
}
|
|
|
|
/* get sequence of digits */
|
|
v[0] = parse_int( &b, &digits );
|
|
|
|
/**
|
|
* assume v[0] contains hours if more or less than 2 digits, or value is
|
|
* greater than 59
|
|
*/
|
|
if ( digits != 2 || v[0] > 59 ) {
|
|
have_hours = 1;
|
|
}
|
|
|
|
/* fail if missing colon ':' character */
|
|
if ( !*b || *b++ != ':' ) {
|
|
malformed = 1;
|
|
}
|
|
|
|
/* fail if end of data reached, or byte is not an ASCII digit */
|
|
if ( !*b || !webvtt_isdigit( *b ) ) {
|
|
malformed = 1;
|
|
}
|
|
|
|
/* get another integer value, fail if digits is not equal to 2 */
|
|
v[1] = parse_int( &b, &digits );
|
|
if( digits != 2 ) {
|
|
malformed = 1;
|
|
}
|
|
|
|
/* if we already know there's an hour component, or if the next byte is a
|
|
colon ':', read the next value */
|
|
if ( have_hours || ( *b == ':' ) ) {
|
|
if( *b++ != ':' ) {
|
|
goto _malformed;
|
|
}
|
|
if( !*b || !webvtt_isdigit( *b ) ) {
|
|
malformed = 1;
|
|
}
|
|
v[2] = parse_int( &b, &digits );
|
|
if( digits != 2 ) {
|
|
malformed = 1;
|
|
}
|
|
} else {
|
|
/* Otherwise, if there is no hour component, shift everything over */
|
|
v[2] = v[1];
|
|
v[1] = v[0];
|
|
v[0] = 0;
|
|
}
|
|
|
|
/* collect the manditory seconds-frac component. fail if there is no FULL_STOP
|
|
'.' or if there is no ascii digit following it */
|
|
if( *b++ != '.' || !webvtt_isdigit( *b ) ) {
|
|
goto _malformed;
|
|
}
|
|
v[3] = parse_int( &b, &digits );
|
|
if( digits != 3 ) {
|
|
malformed = 1;
|
|
}
|
|
|
|
/* Ensure that minutes and seconds are acceptable values */
|
|
if( v[3] > 999 ) {
|
|
#define MILLIS_PER_SEC (1000)
|
|
tmp = v[3];
|
|
v[2] += tmp / MILLIS_PER_SEC;
|
|
v[3] = tmp % MILLIS_PER_SEC;
|
|
malformed = 1;
|
|
}
|
|
if( v[2] > 59 ) {
|
|
#define SEC_PER_MIN (60)
|
|
tmp = v[2];
|
|
v[1] += tmp / SEC_PER_MIN;
|
|
v[2] = tmp % SEC_PER_MIN;
|
|
malformed = 1;
|
|
}
|
|
if( v[1] > 59 ) {
|
|
#define MIN_PER_HOUR (60)
|
|
tmp = v[1];
|
|
v[0] += tmp / MIN_PER_HOUR;
|
|
v[1] = tmp % MIN_PER_HOUR;
|
|
malformed = 1;
|
|
}
|
|
|
|
*result = ( webvtt_timestamp )( v[0] * MSECS_PER_HOUR )
|
|
+ ( v[1] * MSECS_PER_MINUTE )
|
|
+ ( v[2] * MSECS_PER_SECOND )
|
|
+ ( v[3] );
|
|
|
|
if( malformed ) {
|
|
return 0;
|
|
}
|
|
return 1;
|
|
_malformed:
|
|
*result = 0xFFFFFFFFFFFFFFFF;
|
|
return 0;
|
|
}
|
|
|
|
WEBVTT_INTERN webvtt_bool
|
|
token_in_list( webvtt_token token, const webvtt_token list[] )
|
|
{
|
|
int i = 0;
|
|
webvtt_token t;
|
|
while( ( t = list[ i++ ] ) != 0 ) {
|
|
if( token == t ) {
|
|
return 1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
WEBVTT_INTERN int
|
|
find_token( webvtt_token token, const webvtt_token list[] )
|
|
{
|
|
int i = 0;
|
|
webvtt_token t;
|
|
while( ( t = list[ i ] ) != 0 ) {
|
|
webvtt_token masked = t & TF_TOKEN_MASK;
|
|
if( token == masked ) {
|
|
return i;
|
|
}
|
|
++i;
|
|
}
|
|
return -1;
|
|
}
|