From 8cd1dcac2f4b518f6892a907d2c624d5e8d5ab91 Mon Sep 17 00:00:00 2001 From: Alexey Malov Date: Thu, 16 Nov 2023 10:46:22 +0300 Subject: [PATCH 01/19] Fix unit test target linking problem on MSVC --- CMakeLists.txt | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fb9c918b..4cb22c75 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,10 @@ if (BUILD_LIBRARY) endif (BUILD_LIBRARY) add_subdirectory("src") -if (NOT MSVC) - # There are linking errors when building tests under MSVC right now. - add_subdirectory("test") -endif (NOT MSVC) + +if (MSVC) + # On Windows gtest uses Static CRT by default. This option fixes linking problems. + set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) +endif (MSVC) + +add_subdirectory("test") From 0718e44c686dbb1174bff3708aa2b76f8f07a681 Mon Sep 17 00:00:00 2001 From: Alexey Malov Date: Tue, 14 Nov 2023 18:16:56 +0300 Subject: [PATCH 02/19] Make ptrdiff_t to int cast explicit This fixes MSVC compiler warning --- src/webvtt/cue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/webvtt/cue.c b/src/webvtt/cue.c index 53f4372c..83bb5e71 100644 --- a/src/webvtt/cue.c +++ b/src/webvtt/cue.c @@ -470,7 +470,7 @@ webvtt_cue_set_setting_from_string( webvtt_cue *cue, const char *word ) return WEBVTT_BAD_CUESETTING; } - idx = ( value - word ) - 1; + idx = (int)(( value - word ) - 1); if( idx > 31 ) { return WEBVTT_BAD_CUESETTING; } From b1bde125805f7aaad31b0dd504678e4fc8509d53 Mon Sep 17 00:00:00 2001 From: Alexey Malov Date: Tue, 14 Nov 2023 18:17:48 +0300 Subject: [PATCH 03/19] Fix `status` variable shadowing Variable `status` in a cycle shadows the `status` variable from the outer scope. I can omit shadowing by reusing the variable from the outer scope since it is not used after the cycle. --- src/webvtt/cuetext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/webvtt/cuetext.c b/src/webvtt/cuetext.c index 401cf250..c0dc01dc 100644 --- a/src/webvtt/cuetext.c +++ b/src/webvtt/cuetext.c @@ -708,7 +708,7 @@ webvtt_parse_cuetext( webvtt_parser self, webvtt_cue *cue, * http://dev.w3.org/html5/webvtt/#webvtt-cue-text-parsing-rules */ while( *position != '\0' ) { - webvtt_status status = WEBVTT_SUCCESS; + status = WEBVTT_SUCCESS; webvtt_delete_token( &token ); /* Step 7. */ From 2075f641fed81f4437a187240d9a02e2b994938e Mon Sep 17 00:00:00 2001 From: Alexey Malov Date: Tue, 14 Nov 2023 18:18:45 +0300 Subject: [PATCH 04/19] Fix possible null pointer dereferencing The `node` argument must be checked against NULL before it is dereferenced. --- src/webvtt/node.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/webvtt/node.c b/src/webvtt/node.c index c56bc806..49c5df13 100644 --- a/src/webvtt/node.c +++ b/src/webvtt/node.c @@ -47,8 +47,8 @@ webvtt_ref_node( webvtt_node *node ) WEBVTT_EXPORT void webvtt_init_node( webvtt_node **node ) { - if( *node != &empty_node ) { - if( node && *node ) { + if( node && *node != &empty_node ) { + if( *node ) { webvtt_release_node( node ); } *node = &empty_node; From 1eb506a319c083ddfc3d9f6f3bd98af8f0516a3b Mon Sep 17 00:00:00 2001 From: Alexey Malov Date: Tue, 14 Nov 2023 18:20:44 +0300 Subject: [PATCH 05/19] Fix MSVC code analyzer warnings and possible security issues - Fix possible null pointer dereference in webvtt_proc_cueline function. In cue argument must be checked against NULL before it is dereferenced on lines 520 and 541. Testing cue pointer for NULL on line 525 is too late. - Fix `token` variable shadowin on line 608 - Fix possible problems in webvtt_read_cuetext function. - In line 844 the assertion expression must check the value of self->top->type instead of assigning it. - Line 866: It is necessary check self->line_buffer.d for NULL because it can be NULL due to check on line 852. Otherwise it is possible to dereference a NULL pointer on line 889. - Fix false positive code analyzer warning on possible intermediate result truncation on line 1067: ( ch - '0' ) produces 32-bit value which is then converted to 64 bit when added to `result * 10`. --- src/webvtt/parser.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/webvtt/parser.c b/src/webvtt/parser.c index 084decd8..0ae43bb3 100644 --- a/src/webvtt/parser.c +++ b/src/webvtt/parser.c @@ -303,7 +303,7 @@ do_push( webvtt_parser self, webvtt_uint token, webvtt_uint back, { if( STACK_SIZE + 1 >= self->stack_alloc ) { webvtt_state *stack = - ( webvtt_state * )webvtt_alloc0( sizeof( webvtt_state ) * + ( webvtt_state * )webvtt_alloc0( (webvtt_uint)sizeof( webvtt_state ) * ( self->stack_alloc << 1 ) ), *tmp; if( !stack ) { ERROR( WEBVTT_ALLOCATION_FAILED ); @@ -495,6 +495,7 @@ WEBVTT_INTERN webvtt_status webvtt_proc_cueline( webvtt_parser self, webvtt_cue *cue, webvtt_string *line ) { + SAFE_ASSERT( cue != NULL ); const char *text; webvtt_uint length; DIE_IF( line == NULL ); @@ -604,9 +605,9 @@ parse_webvtt( webvtt_parser self, const char *buffer, webvtt_uint *ppos, } } if( SP->flags ) { - webvtt_token token = webvtt_lex_newline( self, buffer, &pos, len, + webvtt_token t = webvtt_lex_newline( self, buffer, &pos, len, self->finished ); - if( token == NEWLINE ) { + if( t == NEWLINE ) { POP(); continue; } @@ -840,7 +841,7 @@ webvtt_read_cuetext( webvtt_parser self, const char *b, webvtt_cue *cue; /* Ensure that we have a cue to work with */ - SAFE_ASSERT( self->top->type = V_CUE ); + SAFE_ASSERT( self->top->type == V_CUE ); cue = self->top->v.cue; /** @@ -882,6 +883,7 @@ webvtt_read_cuetext( webvtt_parser self, const char *b, self->token_pos = 0; self->line++; + SAFE_ASSERT(self->line_buffer.d); /* Remove the '\n' that we appended to determine that we're in state 1 */ self->line_buffer.d->text[ --self->line_buffer.d->length ] = 0; @@ -1062,7 +1064,7 @@ webvtt_parse_int( const char **pb, int *pdigits ) /** * Digit character, carry on */ - result = result * 10 + ( ch - '0' ); + result = result * 10 + ( (webvtt_int64)ch - '0' ); ++digits; } else if( mul == 1 && digits == 0 && ch == '-' ) { mul = -1; From 3b576c7f03c5887b5afb0573b64beed4e069bb42 Mon Sep 17 00:00:00 2001 From: Alexey Malov Date: Tue, 14 Nov 2023 18:23:07 +0300 Subject: [PATCH 06/19] Fix MSVC compiler and code analyzer warnings Add explicit type casts when size_t is converted to int On line 345 we must check that `str->d` is not NULL because MSVC code analyzer can't prove that `str->d` is not NULL when webvtt_create_string succeeds. `str->d` is assigned to `d` on line 348 and then `d` is dereferenced on line 364. Checking `str->d` against NULL helps the analyzer to understand that there is no NULL pointer derefernce on line 364. --- src/webvtt/string.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/webvtt/string.c b/src/webvtt/string.c index 2323966b..7204de08 100644 --- a/src/webvtt/string.c +++ b/src/webvtt/string.c @@ -130,7 +130,7 @@ webvtt_create_string_with_text( webvtt_string *out, const char *init_text, } if( len < 0 ) { - len = strlen( init_text ); + len = (int)strlen( init_text ); } /** @@ -279,7 +279,7 @@ grow( webvtt_string *str, webvtt_uint need ) } p = d = str->d; - grow = sizeof( *d ) + ( sizeof( char ) * ( d->length + need ) ); + grow = (webvtt_uint32)sizeof(*d) + ((webvtt_uint32)sizeof(char) * (d->length + need)); if( grow < page ) { n = page; @@ -348,7 +348,7 @@ webvtt_string_getline( webvtt_string *src, const char *buffer, d = str->d; } if( len < 0 ) { - len = strlen( buffer ); + len = (int)strlen( buffer ); } n = buffer + len; @@ -414,7 +414,7 @@ webvtt_string_is_equal( const webvtt_string *str, const char *to_compare, } if( len < 0 ) { - len = strlen( to_compare ); + len = (int)strlen( to_compare ); } if( str->d->length != (unsigned)len ) { @@ -437,7 +437,7 @@ webvtt_string_append( webvtt_string *str, const char *buffer, int len ) } if( len < 0 ) { - len = strlen( buffer ); + len = (int)strlen( buffer ); } if( len == 0 ) { From 9f43a7b3d5334339ebace6dee1121104c9c2ebdb Mon Sep 17 00:00:00 2001 From: Alexey Malov Date: Wed, 15 Nov 2023 10:24:47 +0300 Subject: [PATCH 07/19] Make type casts from size_t to int explicit --- src/webvtt/string.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/src/webvtt/string.c b/src/webvtt/string.c index 7204de08..64d00e7f 100644 --- a/src/webvtt/string.c +++ b/src/webvtt/string.c @@ -26,6 +26,8 @@ */ #include "string_internal.h" +#include +#include #include #include @@ -64,6 +66,14 @@ memmem(const void *l, size_t l_len, const void *s, size_t s_len) return NULL; } +static int +checked_strlen(const char* s) +{ + size_t l = strlen(s); + assert(l <= INT_MAX); + return (int)l; +} + static webvtt_string_data empty_string = { { 1 }, /* init refcount */ 0, /* length */ @@ -130,7 +140,7 @@ webvtt_create_string_with_text( webvtt_string *out, const char *init_text, } if( len < 0 ) { - len = (int)strlen( init_text ); + len = checked_strlen(init_text); } /** @@ -348,7 +358,7 @@ webvtt_string_getline( webvtt_string *src, const char *buffer, d = str->d; } if( len < 0 ) { - len = (int)strlen( buffer ); + len = checked_strlen( buffer ); } n = buffer + len; @@ -414,7 +424,7 @@ webvtt_string_is_equal( const webvtt_string *str, const char *to_compare, } if( len < 0 ) { - len = (int)strlen( to_compare ); + len = checked_strlen(to_compare); } if( str->d->length != (unsigned)len ) { @@ -437,7 +447,7 @@ webvtt_string_append( webvtt_string *str, const char *buffer, int len ) } if( len < 0 ) { - len = (int)strlen( buffer ); + len = checked_strlen( buffer ); } if( len == 0 ) { @@ -475,11 +485,11 @@ webvtt_string_replace( webvtt_string *str, const char *search, int search_len, } if( search_len < 0 ) { - search_len = ( int )strlen( search ); + search_len = checked_strlen( search ); } if( replace_len < 0 ) { - replace_len = ( int )strlen( replace ); + replace_len = checked_strlen( replace ); } if( ( p = (char *)memmem( str->d->text, str->d->length, search, @@ -519,11 +529,11 @@ webvtt_string_replace_all( webvtt_string *str, const char *search, } if( search_len < 0 ) { - search_len = ( int )strlen( search ); + search_len = checked_strlen( search ); } if( replace_len < 0 ) { - replace_len = ( int )strlen( replace ); + replace_len = checked_strlen( replace ); } while( ( status = webvtt_string_replace( str, search, search_len, replace, @@ -807,8 +817,9 @@ webvtt_utf8_to_utf16( const char *utf8, const char *end, /* Non-character, overlong sequence, or utf16 surrogate */ return 0xFFFD; } else { + assert((webvtt_uint16)uc == uc); /* Non-surrogate */ - return uc; + return (webvtt_uint16)uc; } } } From 5bf27e8e072e9764fc02149c07b51cc76004a0ad Mon Sep 17 00:00:00 2001 From: Alexey Malov Date: Wed, 15 Nov 2023 10:30:55 +0300 Subject: [PATCH 08/19] Fix MSVC warning C4706 (assignment within conditional expression) --- src/webvtt/string.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/webvtt/string.c b/src/webvtt/string.c index 64d00e7f..9f9a9c30 100644 --- a/src/webvtt/string.c +++ b/src/webvtt/string.c @@ -493,7 +493,7 @@ webvtt_string_replace( webvtt_string *str, const char *search, int search_len, } if( ( p = (char *)memmem( str->d->text, str->d->length, search, - search_len ) ) ) { + search_len ) ) != NULL ) { const char *end; size_t pos = p - str->d->text; if( WEBVTT_FAILED( status = grow( str, replace_len ) ) ) { @@ -806,7 +806,7 @@ webvtt_utf8_to_utf16( const char *utf8, const char *end, uc = ( uc << 6 ) | ( ch & 0x3F ); if (!--need) { int nc; - if ( !( nc = UTF_IS_NONCHAR( uc ) ) && uc > 0xFFFF && uc < 0x110000) { + if ( (( nc = UTF_IS_NONCHAR( uc ) ) == 0) && uc > 0xFFFF && uc < 0x110000) { /* Surrogate pair */ if( high_surrogate ) { *high_surrogate = UTF_HIGH_SURROGATE( uc ); From cd50781e99c4e992d66b409273b71601418081ba Mon Sep 17 00:00:00 2001 From: Alexey Malov Date: Wed, 15 Nov 2023 10:41:15 +0300 Subject: [PATCH 09/19] Fix MSVC null pointer dereference warning --- src/webvtt/string.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/webvtt/string.c b/src/webvtt/string.c index 9f9a9c30..3cf1b6e1 100644 --- a/src/webvtt/string.c +++ b/src/webvtt/string.c @@ -352,7 +352,13 @@ webvtt_string_getline( webvtt_string *src, const char *buffer, /* This had better be a valid string_data, or else NULL. */ d = str->d; if( !str->d ) { - if(WEBVTT_FAILED(webvtt_create_string( 0x100, str ))) { + /** + * Checking if str->d != NULL helps the code analyzer to understand + * that str->d is not null after calling webvtt_create_string. + * And it will not complain on possible null pointer dereference + * when accessing d->length below. + */ + if(WEBVTT_FAILED(webvtt_create_string( 0x100, str )) || !str->d) { return -1; } d = str->d; @@ -444,6 +450,14 @@ webvtt_string_append( webvtt_string *str, const char *buffer, int len ) } if( !str->d ) { webvtt_init_string( str ); + /** + * Help static code analyzer to understand that str->d is not null + * and the later access to str->d->length will not cause + * null pointer dereference. + */ + if (!str->d) { + return WEBVTT_FAILED_ASSERTION; + } } if( len < 0 ) { From 44bfe0b9a8d0e1ee9ccaa70a839bf52a1d6f54b7 Mon Sep 17 00:00:00 2001 From: Alexey Malov Date: Wed, 15 Nov 2023 10:42:08 +0300 Subject: [PATCH 10/19] Fix MSVC C4706 warning (assignment within conditional expression) --- src/webvtt/parser.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/webvtt/parser.c b/src/webvtt/parser.c index 0ae43bb3..0b72a819 100644 --- a/src/webvtt/parser.c +++ b/src/webvtt/parser.c @@ -58,7 +58,7 @@ webvtt_create_parser( webvtt_cue_fn on_read, return WEBVTT_INVALID_PARAM; } - if( !( p = ( webvtt_parser )webvtt_alloc0( sizeof * p ) ) ) { + if( ( p = ( webvtt_parser )webvtt_alloc0( sizeof * p ) ) == NULL ) { return WEBVTT_OUT_OF_MEMORY; } @@ -581,7 +581,7 @@ parse_webvtt( webvtt_parser self, const char *buffer, webvtt_uint *ppos, if( SP->flags == 0 ) { int v; if( ( v = webvtt_string_getline( &SP->v.text, buffer, &pos, len, 0, - finish ) ) ) { + finish ) ) != 0 ) { if( v < 0 ) { webvtt_release_string( &SP->v.text ); SP->type = V_NONE; @@ -858,7 +858,7 @@ webvtt_read_cuetext( webvtt_parser self, const char *b, if( !flags ) { int v; if( ( v = webvtt_string_getline( &self->line_buffer, b, &pos, len, - &self->truncate, finish ) ) ) { + &self->truncate, finish ) ) != 0 ) { if( v < 0 || WEBVTT_FAILED( webvtt_string_putc( &self->line_buffer, '\n' ) ) ) { ERROR( WEBVTT_ALLOCATION_FAILED ); From 24558b39766bd25f39d683e351e005abe9f4ad91 Mon Sep 17 00:00:00 2001 From: Alexey Malov Date: Wed, 15 Nov 2023 10:52:04 +0300 Subject: [PATCH 11/19] Fix MSVC C4706 warning (assignment within conditional expression) --- src/webvtt/cue.c | 10 +++++----- src/webvtt/node.c | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/webvtt/cue.c b/src/webvtt/cue.c index 83bb5e71..e65c2f65 100644 --- a/src/webvtt/cue.c +++ b/src/webvtt/cue.c @@ -194,7 +194,7 @@ webvtt_cue_set_line( webvtt_cue *cue, const char *value ) return WEBVTT_BAD_LINE; } - if( ( c = strchr( value, '%' ) ) && ( ( c[1] != '\0' ) || *value == '-' ) ) { + if( (( c = strchr( value, '%' ) ) != NULL) && ( ( c[1] != '\0' ) || *value == '-' ) ) { /** * 4. If any character in value other than the last character is a U+0025 * PERCENT SIGN character (%), then jump to the step labeled next setting. @@ -279,7 +279,7 @@ webvtt_cue_set_position( webvtt_cue *cue, const char *value ) * 3. If any character in value other than the last character is a U+0025 * PERCENT SIGN character (%), then jump to the step labeled next setting. */ - if( ( c = strchr( value, '%' ) ) && ( c[1] != '\0' ) ) { + if( (( c = strchr( value, '%' ) ) != NULL) && ( c[1] != '\0' ) ) { return WEBVTT_BAD_POSITION; } @@ -351,7 +351,7 @@ webvtt_cue_set_size( webvtt_cue *cue, const char *value ) * 3. If any character in value other than the last character is a U+0025 * PERCENT SIGN character (%), then jump to the step labeled next setting. */ - if( ( c = strchr( value, '%' ) ) && ( c[1] != '\0' ) ) { + if( (( c = strchr( value, '%' ) ) != NULL) && ( c[1] != '\0' ) ) { return WEBVTT_BAD_SIZE; } @@ -495,8 +495,8 @@ webvtt_cue_validate_set_settings( webvtt_parser self, webvtt_cue *cue, return WEBVTT_INVALID_PARAM; } length = (int)webvtt_string_length( settings ); - if( ( eol = strchr( webvtt_string_text( settings ), '\r' ) ) - || ( eol = strchr( webvtt_string_text( settings ), '\n' ) ) ) { + if( (( eol = strchr( webvtt_string_text( settings ), '\r' ) ) != NULL) + || (( eol = strchr( webvtt_string_text( settings ), '\n' ) ) != NULL) ) { length = (int)( eol - webvtt_string_text( settings ) ); } diff --git a/src/webvtt/node.c b/src/webvtt/node.c index 49c5df13..48652c40 100644 --- a/src/webvtt/node.c +++ b/src/webvtt/node.c @@ -66,7 +66,7 @@ webvtt_create_node( webvtt_node **node, webvtt_node_kind kind, return WEBVTT_INVALID_PARAM; } - if( !( temp_node = (webvtt_node *)webvtt_alloc0(sizeof(*temp_node)) ) ) + if( ( temp_node = (webvtt_node *)webvtt_alloc0(sizeof(*temp_node)) ) == NULL ) { return WEBVTT_OUT_OF_MEMORY; } @@ -92,8 +92,8 @@ webvtt_create_internal_node( webvtt_node **node, webvtt_node *parent, return status; } - if ( !( node_data = - (webvtt_internal_node_data *)webvtt_alloc0( sizeof(*node_data) ) ) ) + if ( ( node_data = + (webvtt_internal_node_data *)webvtt_alloc0( sizeof(*node_data) ) ) == NULL ) { return WEBVTT_OUT_OF_MEMORY; } From 6677f98201a0c15602464b5d3a9bf99515003f0c Mon Sep 17 00:00:00 2001 From: Alexey Malov Date: Wed, 15 Nov 2023 11:57:22 +0300 Subject: [PATCH 12/19] Fix MSVC Level 3 warnings --- test/unit/lexer_unittest.cpp | 4 ++-- test/unit/readcuetext_unittest.cpp | 2 +- test/unit/string_unittest.cpp | 18 +++++++++--------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/test/unit/lexer_unittest.cpp b/test/unit/lexer_unittest.cpp index 34ac62b3..288f8be4 100644 --- a/test/unit/lexer_unittest.cpp +++ b/test/unit/lexer_unittest.cpp @@ -21,11 +21,11 @@ class Lexer : public ::testing::Test webvtt_token lex_newline( const std::string &str, webvtt_uint &pos, bool finished = true ) { - return webvtt_lex_newline( self, str.c_str(), &pos, str.size(), finished ); + return webvtt_lex_newline( self, str.c_str(), &pos, static_cast(str.size()), finished ); } webvtt_token lex( const std::string &str, webvtt_uint &pos, bool finished = true ) { - return ::webvtt_lex( self, str.c_str(), &pos, str.size(), finished ); + return ::webvtt_lex( self, str.c_str(), &pos, static_cast(str.size()), finished ); } webvtt_lexer_state lexerState() const { diff --git a/test/unit/readcuetext_unittest.cpp b/test/unit/readcuetext_unittest.cpp index e92d7812..2e5acb97 100644 --- a/test/unit/readcuetext_unittest.cpp +++ b/test/unit/readcuetext_unittest.cpp @@ -33,7 +33,7 @@ class ReadCuetext : public ::testing::Test webvtt_status read_cuetext( const std::string &str, webvtt_uint &pos, bool finished = true ) { - return ::webvtt_read_cuetext( self, str.c_str(), &pos, str.size(), + return ::webvtt_read_cuetext( self, str.c_str(), &pos, static_cast(str.size()), finished ); } diff --git a/test/unit/string_unittest.cpp b/test/unit/string_unittest.cpp index 33e888db..a2a63c4f 100644 --- a/test/unit/string_unittest.cpp +++ b/test/unit/string_unittest.cpp @@ -8,12 +8,12 @@ using namespace WebVTT; */ const char UTF8AnNyungHaSeYo[] = { - 0xEC, 0x95, 0x88, /* U+C548 */ - 0xEB, 0x85, 0x95, /* U+B155 */ - 0xED, 0x95, 0x98, /* U+D558 */ - 0xEC, 0x84, 0xB8, /* U+C138 */ - 0xEC, 0x9A, 0x94, /* U+C694 */ - 0x00 /* NULL */ + '\xEC', '\x95', '\x88', /* U+C548 */ + '\xEB', '\x85', '\x95', /* U+B155 */ + '\xED', '\x95', '\x98', /* U+D558 */ + '\xEC', '\x84', '\xB8', /* U+C138 */ + '\xEC', '\x9A', '\x94', /* U+C694 */ + '\x00' /* NULL */ }; /** @@ -24,7 +24,7 @@ const webvtt_uint16 UTF16AnNyungHaSeYo[] = 0xC548, 0xB155, 0xD558, 0xC138, 0xC694, 0x0000 }; -const char UTF8ReplacementChar[] = { 0xEF, 0xBF, 0xBD }; +const char UTF8ReplacementChar[] = { '\xEF', '\xBF', '\xBD' }; TEST(String,CreateWithTextStrlen) { @@ -339,7 +339,7 @@ TEST(String,IsEmptyCXX) TEST(String,Replace) { char conststr[3] = {0,0,0}; - char expectedOutput[] = { 0xEF, 0xBF, 0xBD, 0, 0, 0 }; + char expectedOutput[] = { '\xEF', '\xBF', '\xBD', 0, 0, 0 }; webvtt_string str; ASSERT_EQ( WEBVTT_SUCCESS, webvtt_create_string_with_text( &str, conststr, 3 ) ); @@ -373,7 +373,7 @@ TEST(String,ReplaceStrlen) TEST(String,ReplaceAll) { char conststr[] = "\0a\0b"; - char expectedOutput[] = { 0xEF, 0xBF, 0xBD, 'a', 0xEF, 0xBF, 0xBD, 'b', 0 }; + char expectedOutput[] = { '\xEF', '\xBF', '\xBD', 'a', '\xEF', '\xBF', '\xBD', 'b', 0 }; webvtt_string str; ASSERT_EQ( WEBVTT_SUCCESS, webvtt_create_string_with_text( &str, conststr, 4 ) ); From 3c2c977cf62b0516f9cfdcf3f2aed21c663c0fd7 Mon Sep 17 00:00:00 2001 From: Alexey Malov Date: Wed, 15 Nov 2023 12:01:25 +0300 Subject: [PATCH 13/19] Fix ptrdiff_t to webvtt_uint implicit conversion warning. --- test/unit/cuetexttokenizer_fixture | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/unit/cuetexttokenizer_fixture b/test/unit/cuetexttokenizer_fixture index e13bdf05..89d7d2c1 100644 --- a/test/unit/cuetexttokenizer_fixture +++ b/test/unit/cuetexttokenizer_fixture @@ -1,6 +1,7 @@ #ifndef __WEBVTT_TEST_CUETOKENIZER_FIXTURE__ # define __WEBVTT_TEST_CUETOKENIZER_FIXTURE__ #include +#include extern "C" { #include } @@ -43,7 +44,9 @@ class CueTextTokenizerTest : public ::testing::Test } webvtt_uint currentCharPos() { - return pos - start; + assert(start <= pos); + auto result = pos - start; + return static_cast(result); } protected: From 2b4456bc7c87e2dccd005e664cb37fb8d03b7a10 Mon Sep 17 00:00:00 2001 From: Alexey Malov Date: Wed, 15 Nov 2023 17:49:38 +0300 Subject: [PATCH 14/19] Check lower array boundary --- src/webvtt/error.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/webvtt/error.c b/src/webvtt/error.c index 4eba355c..ab7aeb0a 100644 --- a/src/webvtt/error.c +++ b/src/webvtt/error.c @@ -68,7 +68,7 @@ static const char *errstr[] = { WEBVTT_EXPORT const char * webvtt_strerror( webvtt_error err ) { - if( err >= (sizeof(errstr) / sizeof(*errstr)) ) { + if( err >= (sizeof(errstr) / sizeof(*errstr)) || (err < 0)) { return ""; } return errstr[ err ]; From 7c899115a63ca8ac3e01859b10100a45d9494ed1 Mon Sep 17 00:00:00 2001 From: Alexey Malov Date: Wed, 15 Nov 2023 17:50:15 +0300 Subject: [PATCH 15/19] Replace reinterpret_cast with static_cast --- src/webvttxx/abstract_parser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/webvttxx/abstract_parser.cpp b/src/webvttxx/abstract_parser.cpp index dcebcf4e..6c908143 100644 --- a/src/webvttxx/abstract_parser.cpp +++ b/src/webvttxx/abstract_parser.cpp @@ -68,7 +68,7 @@ AbstractParser::__parsedCue( void *userdata, webvtt_cue *pcue ) */ webvtt_release_cue( &pcue ); - AbstractParser *self = reinterpret_cast( userdata ); + AbstractParser *self = static_cast( userdata ); self->parsedCue( cue ); } @@ -76,7 +76,7 @@ int WEBVTT_CALLBACK AbstractParser::__reportError( void *userdata, webvtt_uint line, webvtt_uint col, webvtt_error error ) { - AbstractParser *self = reinterpret_cast( userdata ); + AbstractParser *self = static_cast( userdata ); Error err( line, col, error ); if( !self->reportError( err ) ) { return -1; From e673bcea9d62e41303b827cddbc5e98df572abbb Mon Sep 17 00:00:00 2001 From: Alexey Malov Date: Wed, 15 Nov 2023 17:51:23 +0300 Subject: [PATCH 16/19] Fix some MSVC code analyzer warnings - Calling fstream::close is not noexcept. Wrapping it in try-catch block helps static code analyzer to understand that destructor will not throw. - Replaced c-style cast with static_cast. - C++ core guidelines recommend to initialize local variables. Moving reading cycle and buffer into immediately invoked lambda expression reduces number of variables in outer scope and allows to initialize `status` and `finishStatus`. --- src/webvttxx/file_parser.cpp | 37 ++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/src/webvttxx/file_parser.cpp b/src/webvttxx/file_parser.cpp index dfc34b8d..96e52e3a 100644 --- a/src/webvttxx/file_parser.cpp +++ b/src/webvttxx/file_parser.cpp @@ -43,32 +43,37 @@ FileParser::FileParser( const char *fPath ) FileParser::~FileParser() { - if( reader.is_open() ) { - reader.close(); + try { + if( reader.is_open() ) { + reader.close(); + } + } + catch (...) { } } bool FileParser::parse() { - bool final = false; - ::webvtt_status status; - ::webvtt_status finishStatus; - char buffer[0x1000]; if( !reader.good() ) { return false; } - do { - reader.read( buffer, sizeof buffer ); - uint len = (uint)reader.gcount(); - final = reader.eof(); - status = parseChunk( buffer, len ); - } while( !final && !WEBVTT_FAILED(status) ); - if( status == WEBVTT_UNFINISHED ) { - status = WEBVTT_SUCCESS; - } - finishStatus = finishParsing(); + const ::webvtt_status status = [this] { + char buffer[0x1000]; + bool final = false; + ::webvtt_status status = WEBVTT_FAIL; + do + { + reader.read( buffer, sizeof buffer ); + const uint len = static_cast(reader.gcount()); + final = reader.eof(); + status = parseChunk( buffer, len ); + } while( !final && !WEBVTT_FAILED(status) ); + return ( status == WEBVTT_UNFINISHED ) ? WEBVTT_SUCCESS : status; + }(); + + const ::webvtt_status finishStatus = finishParsing(); return !( WEBVTT_FAILED(status) || WEBVTT_FAILED(finishStatus) ); } From b650f1e51cb3ef47de8c908d18d80f01c6d4c125 Mon Sep 17 00:00:00 2001 From: Alexey Malov Date: Thu, 16 Nov 2023 11:39:11 +0300 Subject: [PATCH 17/19] Remove c-style casts C++ code should use C++-style casts or list initialization --- include/webvttxx/cue | 8 ++++---- include/webvttxx/node | 2 +- include/webvttxx/string | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/webvttxx/cue b/include/webvttxx/cue index 32a5133c..4c538b87 100644 --- a/include/webvttxx/cue +++ b/include/webvttxx/cue @@ -126,7 +126,7 @@ public: } inline Direction direction() const { - return (Direction)cue->settings.vertical; + return static_cast(cue->settings.vertical); } inline float relativeLinePosition() const { return ( 1.f / 100.f ) * static_cast( @@ -138,14 +138,14 @@ public: } inline int lineNumber() const { return cue->settings.line; } inline float textPosition() const { - return ( 1.f / 100.f ) * (float)cue->settings.position; + return static_cast(( 1.0 / 100.0 ) * cue->settings.position); } inline uint textPositionPercentage() const { return cue->settings.position; } inline float size() const { - return ( 1.f / 100.f ) * (float)cue->settings.size; + return static_cast(( 1.0 / 100.0 ) * cue->settings.size); } inline uint sizePercentage() const { return cue->settings.size; } - inline Align alignment() const { return (Align)(cue->settings.align); } + inline Align alignment() const { return static_cast(cue->settings.align); } inline bool isHorizontal() const { return orientation() == Horizontal; } inline bool isVertical() const { return orientation() == Vertical; } diff --git a/include/webvttxx/node b/include/webvttxx/node index 1f500146..b0c6c8c1 100644 --- a/include/webvttxx/node +++ b/include/webvttxx/node @@ -73,7 +73,7 @@ public: ~Node() { webvtt_release_node( &node ); } bool isEmpty() const { return kind() == Empty; } - NodeKind kind() const { return (NodeKind)node->kind; } + NodeKind kind() const { return static_cast(node->kind); } int childCount() const { return node->data.internal_data->length; } Node operator[]( int index ) diff --git a/include/webvttxx/string b/include/webvttxx/string index 79810665..14f6d7ef 100644 --- a/include/webvttxx/string +++ b/include/webvttxx/string @@ -115,12 +115,12 @@ public: } static uint32 toUtf32( uint16 low, uint16 high ) { - return (( uint32 )high << 10) + low - 0x35FDC00; + return ( high << uint32{ 10 }) + low - 0x35FDC00; } static uint32 toUtf32( uint16 ch ) { if( !isSurrogate( ch ) ) { - return ( uint32 )ch; + return ch; } return 0xFFFD; } @@ -139,7 +139,7 @@ public: /* Count of Unicode codepoints in string */ inline uint charCount() const { - return (uint)webvtt_utf8_chcount( utf8(), utf8() + length() ); + return static_cast(webvtt_utf8_chcount( utf8(), utf8() + length() )); } inline String &append( char ch, webvtt_status &result ) { From f309d5ec7edcd1b226915e461de3e1607bb576c3 Mon Sep 17 00:00:00 2001 From: Alexey Malov Date: Mon, 4 Dec 2023 08:53:40 +0300 Subject: [PATCH 18/19] Fix WEBVTT tag comment parsing issue. --- src/webvtt/parser.c | 1 + .../filestructure/webvtt-space-text-eol.vtt | 1 + .../webvtt-space-text-eols-one-cue.vtt | 4 + .../filestructure/webvtt-space-text-eols.vtt | 2 + .../webvtt-space-text-no-eol-cue.vtt | 3 + test/unit/filestructure_unittest.cpp | 83 +++++++++++++++++++ 6 files changed, 94 insertions(+) create mode 100644 test/unit/filestructure/webvtt-space-text-eol.vtt create mode 100644 test/unit/filestructure/webvtt-space-text-eols-one-cue.vtt create mode 100644 test/unit/filestructure/webvtt-space-text-eols.vtt create mode 100644 test/unit/filestructure/webvtt-space-text-no-eol-cue.vtt diff --git a/src/webvtt/parser.c b/src/webvtt/parser.c index 0b72a819..ef609d6d 100644 --- a/src/webvtt/parser.c +++ b/src/webvtt/parser.c @@ -713,6 +713,7 @@ parse_webvtt( webvtt_parser self, const char *buffer, webvtt_uint *ppos, * T_BODY state. */ POPBACK(); + SP->state = T_BODY; PUSH0( T_EOL, 1, V_INTEGER ); break; default: diff --git a/test/unit/filestructure/webvtt-space-text-eol.vtt b/test/unit/filestructure/webvtt-space-text-eol.vtt new file mode 100644 index 00000000..c8a3e6c7 --- /dev/null +++ b/test/unit/filestructure/webvtt-space-text-eol.vtt @@ -0,0 +1 @@ +WEBVTT text diff --git a/test/unit/filestructure/webvtt-space-text-eols-one-cue.vtt b/test/unit/filestructure/webvtt-space-text-eols-one-cue.vtt new file mode 100644 index 00000000..eb1e3fc3 --- /dev/null +++ b/test/unit/filestructure/webvtt-space-text-eols-one-cue.vtt @@ -0,0 +1,4 @@ +WEBVTT text + +00:13.000 --> 00:16.000 +This is payload \ No newline at end of file diff --git a/test/unit/filestructure/webvtt-space-text-eols.vtt b/test/unit/filestructure/webvtt-space-text-eols.vtt new file mode 100644 index 00000000..24975be1 --- /dev/null +++ b/test/unit/filestructure/webvtt-space-text-eols.vtt @@ -0,0 +1,2 @@ +WEBVTT text + diff --git a/test/unit/filestructure/webvtt-space-text-no-eol-cue.vtt b/test/unit/filestructure/webvtt-space-text-no-eol-cue.vtt new file mode 100644 index 00000000..e694d99a --- /dev/null +++ b/test/unit/filestructure/webvtt-space-text-no-eol-cue.vtt @@ -0,0 +1,3 @@ +WEBVTT text +00:13.000 --> 00:16.000 +This is payload \ No newline at end of file diff --git a/test/unit/filestructure_unittest.cpp b/test/unit/filestructure_unittest.cpp index 3be09588..8ebb44f7 100644 --- a/test/unit/filestructure_unittest.cpp +++ b/test/unit/filestructure_unittest.cpp @@ -115,6 +115,89 @@ TEST_F(FileStructure, WebVTTSpaceText) ASSERT_EQ( 0, errorCount() ) << "This file should contain no errors."; } +/* + * Verifies that a file with the WebVTT signature, a space, non-line terminating + * characters followed by EOL will parse correctly. + * From http://dev.w3.org/html5/webvtt/#webvtt-file-body (12/02/2012): + * + * A WebVTT file body consists of the following components, in the following order: + * 1. An optional U+FEFF BYTE ORDER MARK (BOM) character. + * 2. The string "WEBVTT". + * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) character followed by any number of characters that are not U+000A LINE FEED (LF) or U+000D CARRIAGE RETURN (CR) characters. + * 4. Two or more WebVTT line terminators. + * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other by two or more WebVTT line terminators. + * 6. Zero or more WebVTT line terminators. + */ +TEST_F(FileStructure, WebVTTSpaceTextEOL) +{ + loadVtt( "filestructure/webvtt-space-text-eol.vtt", 0 ); + ASSERT_EQ( 0, errorCount() ) << "This file should contain no errors."; +} + +/* + * Verifies that a file with the WebVTT signature, a space, non-line terminating + * characters followed by two EOL characters will parse correctly. + * From http://dev.w3.org/html5/webvtt/#webvtt-file-body (12/02/2012): + * + * A WebVTT file body consists of the following components, in the following order: + * 1. An optional U+FEFF BYTE ORDER MARK (BOM) character. + * 2. The string "WEBVTT". + * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) character followed by any number of characters that are not U+000A LINE FEED (LF) or U+000D CARRIAGE RETURN (CR) characters. + * 4. Two or more WebVTT line terminators. + * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other by two or more WebVTT line terminators. + * 6. Zero or more WebVTT line terminators. + */ +TEST_F(FileStructure, WebVTTSpaceTextEOLs) +{ + loadVtt( "filestructure/webvtt-space-text-eols.vtt", 0 ); + ASSERT_EQ( 0, errorCount() ) << "This file should contain no errors."; +} + +/* + * Verifies that a file with the WebVTT signature, a space, non-line terminating + * characters followed by two EOL characters and cue will parse correctly. + * From http://dev.w3.org/html5/webvtt/#webvtt-file-body (12/02/2012): + * + * A WebVTT file body consists of the following components, in the following order: + * 1. An optional U+FEFF BYTE ORDER MARK (BOM) character. + * 2. The string "WEBVTT". + * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) character followed by any number of characters that are not U+000A LINE FEED (LF) or U+000D CARRIAGE RETURN (CR) characters. + * 4. Two or more WebVTT line terminators. + * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other by two or more WebVTT line terminators. + * 6. Zero or more WebVTT line terminators. + */ +TEST_F(FileStructure, WebVTTSpaceTextEOLsOneCue) +{ + loadVtt("filestructure/webvtt-space-text-eols-one-cue.vtt", 1); + ASSERT_EQ(0, errorCount()) << "This file should contain no errors."; +} + +/* + * Verifies that a file with the WebVTT signature, a space, non-line terminating + * characters followed by two EOL characters and cue will parse correctly. + * From http://dev.w3.org/html5/webvtt/#webvtt-file-body (12/02/2012): + * + * A WebVTT file body consists of the following components, in the following + * order: + * 1. An optional U+FEFF BYTE ORDER MARK (BOM) character. + * 2. The string "WEBVTT". + * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER + * TABULATION (tab) character followed by any number of characters that are not + * U+000A LINE FEED (LF) or U+000D CARRIAGE RETURN (CR) characters. + * 4. Two or more WebVTT line terminators. + * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other + * by two or more WebVTT line terminators. + * 6. Zero or more WebVTT line terminators. + */ +TEST_F(FileStructure, WebVTTSpaceTextNoEOLsOneCue) { + loadVtt("filestructure/webvtt-space-text-no-eol-cue.vtt", 1); + ASSERT_LE(1, errorCount()); + EXPECT_EQ(1, errorCount()); + expectEquals(getError(0), WEBVTT_EXPECTED_EOL, 2, 1); + expectEquals(getCue(0).startTime(), 0, 13, 0); + expectEquals(getCue(0).endTime(), 0, 16, 0); +} + /* * Verifies that a file with text before the WebVTT signature will fail parsing and finish gracefully. * From http://dev.w3.org/html5/webvtt/#webvtt-file-body (12/02/2012): From ac4304332dbb20e872a757da46a8cad5121f1525 Mon Sep 17 00:00:00 2001 From: Alexey Malov Date: Tue, 5 Dec 2023 14:12:14 +0300 Subject: [PATCH 19/19] Renamed test vtt file and reformatted test. --- ...-cue.vtt => webvtt-space-text-eol-cue.vtt} | 0 test/unit/filestructure_unittest.cpp | 177 ++++++++++++------ 2 files changed, 119 insertions(+), 58 deletions(-) rename test/unit/filestructure/{webvtt-space-text-no-eol-cue.vtt => webvtt-space-text-eol-cue.vtt} (100%) diff --git a/test/unit/filestructure/webvtt-space-text-no-eol-cue.vtt b/test/unit/filestructure/webvtt-space-text-eol-cue.vtt similarity index 100% rename from test/unit/filestructure/webvtt-space-text-no-eol-cue.vtt rename to test/unit/filestructure/webvtt-space-text-eol-cue.vtt diff --git a/test/unit/filestructure_unittest.cpp b/test/unit/filestructure_unittest.cpp index 8ebb44f7..8d9e0adf 100644 --- a/test/unit/filestructure_unittest.cpp +++ b/test/unit/filestructure_unittest.cpp @@ -12,9 +12,12 @@ class FileStructure : public CueTest { }; * A WebVTT file body consists of the following components, in the following order: * 1. An optional U+FEFF BYTE ORDER MARK (BOM) character. * 2. The string "WEBVTT". - * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) character followed by any number of characters that are not U+000A LINE FEED (LF) or U+000D CARRIAGE RETURN (CR) characters. + * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) + * character followed by any number of characters that are not U+000A LINE FEED (LF) + * or U+000D CARRIAGE RETURN (CR) characters. * 4. Two or more WebVTT line terminators. - * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other by two or more WebVTT line terminators. + * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other + * by two or more WebVTT line terminators. * 6. Zero or more WebVTT line terminators. */ TEST_F(FileStructure, WebVTTNoBOM) @@ -30,9 +33,12 @@ TEST_F(FileStructure, WebVTTNoBOM) * A WebVTT file body consists of the following components, in the following order: * 1. An optional U+FEFF BYTE ORDER MARK (BOM) character. * 2. The string "WEBVTT". - * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) character followed by any number of characters that are not U+000A LINE FEED (LF) or U+000D CARRIAGE RETURN (CR) characters. + * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) + * character followed by any number of characters that are not U+000A LINE FEED (LF) + * or U+000D CARRIAGE RETURN (CR) characters. * 4. Two or more WebVTT line terminators. - * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other by two or more WebVTT line terminators. + * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other + * by two or more WebVTT line terminators. * 6. Zero or more WebVTT line terminators. */ TEST_F(FileStructure, WebVTTWithBOM) @@ -48,7 +54,9 @@ TEST_F(FileStructure, WebVTTWithBOM) * A WebVTT file body consists of the following components, in the following order: * 1. An optional U+FEFF BYTE ORDER MARK (BOM) character. * 2. The string "WEBVTT". - * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) character followed by any number of characters that are not U+000A LINE FEED (LF) or U+000D CARRIAGE RETURN (CR) characters. + * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) + * character followed by any number of characters that are not U+000A LINE FEED (LF) + * or U+000D CARRIAGE RETURN (CR) characters. * 4. Two or more WebVTT line terminators. * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other by two or more WebVTT line terminators. * 6. Zero or more WebVTT line terminators. @@ -66,9 +74,12 @@ TEST_F(FileStructure, WebVTTSpace) * A WebVTT file body consists of the following components, in the following order: * 1. An optional U+FEFF BYTE ORDER MARK (BOM) character. * 2. The string "WEBVTT". - * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) character followed by any number of characters that are not U+000A LINE FEED (LF) or U+000D CARRIAGE RETURN (CR) characters. + * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) + * character followed by any number of characters that are not U+000A LINE FEED (LF) + * or U+000D CARRIAGE RETURN (CR) characters. * 4. Two or more WebVTT line terminators. - * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other by two or more WebVTT line terminators. + * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other + * by two or more WebVTT line terminators. * 6. Zero or more WebVTT line terminators. */ TEST_F(FileStructure, WebVTTTab) @@ -85,9 +96,12 @@ TEST_F(FileStructure, WebVTTTab) * A WebVTT file body consists of the following components, in the following order: * 1. An optional U+FEFF BYTE ORDER MARK (BOM) character. * 2. The string "WEBVTT". - * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) character followed by any number of characters that are not U+000A LINE FEED (LF) or U+000D CARRIAGE RETURN (CR) characters. + * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) + * character followed by any number of characters that are not U+000A LINE FEED (LF) + * or U+000D CARRIAGE RETURN (CR) characters. * 4. Two or more WebVTT line terminators. - * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other by two or more WebVTT line terminators. + * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other + * by two or more WebVTT line terminators. * 6. Zero or more WebVTT line terminators. */ TEST_F(FileStructure, WebVTTTabText) @@ -104,9 +118,12 @@ TEST_F(FileStructure, WebVTTTabText) * A WebVTT file body consists of the following components, in the following order: * 1. An optional U+FEFF BYTE ORDER MARK (BOM) character. * 2. The string "WEBVTT". - * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) character followed by any number of characters that are not U+000A LINE FEED (LF) or U+000D CARRIAGE RETURN (CR) characters. + * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) + * character followed by any number of characters that are not U+000A LINE FEED (LF) + * or U+000D CARRIAGE RETURN (CR) characters. * 4. Two or more WebVTT line terminators. - * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other by two or more WebVTT line terminators. + * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other + * by two or more WebVTT line terminators. * 6. Zero or more WebVTT line terminators. */ TEST_F(FileStructure, WebVTTSpaceText) @@ -123,9 +140,12 @@ TEST_F(FileStructure, WebVTTSpaceText) * A WebVTT file body consists of the following components, in the following order: * 1. An optional U+FEFF BYTE ORDER MARK (BOM) character. * 2. The string "WEBVTT". - * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) character followed by any number of characters that are not U+000A LINE FEED (LF) or U+000D CARRIAGE RETURN (CR) characters. + * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) + * character followed by any number of characters that are not U+000A LINE FEED (LF) + * or U+000D CARRIAGE RETURN (CR) characters. * 4. Two or more WebVTT line terminators. - * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other by two or more WebVTT line terminators. + * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other + * by two or more WebVTT line terminators. * 6. Zero or more WebVTT line terminators. */ TEST_F(FileStructure, WebVTTSpaceTextEOL) @@ -142,9 +162,12 @@ TEST_F(FileStructure, WebVTTSpaceTextEOL) * A WebVTT file body consists of the following components, in the following order: * 1. An optional U+FEFF BYTE ORDER MARK (BOM) character. * 2. The string "WEBVTT". - * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) character followed by any number of characters that are not U+000A LINE FEED (LF) or U+000D CARRIAGE RETURN (CR) characters. + * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) + * character followed by any number of characters that are not U+000A LINE FEED (LF) + * or U+000D CARRIAGE RETURN (CR) characters. * 4. Two or more WebVTT line terminators. - * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other by two or more WebVTT line terminators. + * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other + * by two or more WebVTT line terminators. * 6. Zero or more WebVTT line terminators. */ TEST_F(FileStructure, WebVTTSpaceTextEOLs) @@ -161,9 +184,12 @@ TEST_F(FileStructure, WebVTTSpaceTextEOLs) * A WebVTT file body consists of the following components, in the following order: * 1. An optional U+FEFF BYTE ORDER MARK (BOM) character. * 2. The string "WEBVTT". - * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) character followed by any number of characters that are not U+000A LINE FEED (LF) or U+000D CARRIAGE RETURN (CR) characters. + * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) + * character followed by any number of characters that are not U+000A LINE FEED (LF) + * or U+000D CARRIAGE RETURN (CR) characters. * 4. Two or more WebVTT line terminators. - * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other by two or more WebVTT line terminators. + * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other + * by two or more WebVTT line terminators. * 6. Zero or more WebVTT line terminators. */ TEST_F(FileStructure, WebVTTSpaceTextEOLsOneCue) @@ -174,25 +200,24 @@ TEST_F(FileStructure, WebVTTSpaceTextEOLsOneCue) /* * Verifies that a file with the WebVTT signature, a space, non-line terminating - * characters followed by two EOL characters and cue will parse correctly. + * characters followed by single EOL character and cue will report error. * From http://dev.w3.org/html5/webvtt/#webvtt-file-body (12/02/2012): * * A WebVTT file body consists of the following components, in the following * order: * 1. An optional U+FEFF BYTE ORDER MARK (BOM) character. * 2. The string "WEBVTT". - * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER - * TABULATION (tab) character followed by any number of characters that are not - * U+000A LINE FEED (LF) or U+000D CARRIAGE RETURN (CR) characters. + * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) + * character followed by any number of characters that are not U+000A LINE FEED (LF) + * or U+000D CARRIAGE RETURN (CR) characters. * 4. Two or more WebVTT line terminators. * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other - * by two or more WebVTT line terminators. + * by two or more WebVTT line terminators. * 6. Zero or more WebVTT line terminators. */ TEST_F(FileStructure, WebVTTSpaceTextNoEOLsOneCue) { - loadVtt("filestructure/webvtt-space-text-no-eol-cue.vtt", 1); - ASSERT_LE(1, errorCount()); - EXPECT_EQ(1, errorCount()); + loadVtt("filestructure/webvtt-space-text-eol-cue.vtt", 1); + ASSERT_EQ(1, errorCount()); expectEquals(getError(0), WEBVTT_EXPECTED_EOL, 2, 1); expectEquals(getCue(0).startTime(), 0, 13, 0); expectEquals(getCue(0).endTime(), 0, 16, 0); @@ -205,9 +230,12 @@ TEST_F(FileStructure, WebVTTSpaceTextNoEOLsOneCue) { * A WebVTT file body consists of the following components, in the following order: * 1. An optional U+FEFF BYTE ORDER MARK (BOM) character. * 2. The string "WEBVTT". - * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) character followed by any number of characters that are not U+000A LINE FEED (LF) or U+000D CARRIAGE RETURN (CR) characters. + * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) + * character followed by any number of characters that are not U+000A LINE FEED (LF) + * or U+000D CARRIAGE RETURN (CR) characters. * 4. Two or more WebVTT line terminators. - * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other by two or more WebVTT line terminators. + * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other + * by two or more WebVTT line terminators. * 6. Zero or more WebVTT line terminators. */ TEST_F(FileStructure, TextBeforeHeader) @@ -224,9 +252,12 @@ TEST_F(FileStructure, TextBeforeHeader) * A WebVTT file body consists of the following components, in the following order: * 1. An optional U+FEFF BYTE ORDER MARK (BOM) character. * 2. The string "WEBVTT". - * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) character followed by any number of characters that are not U+000A LINE FEED (LF) or U+000D CARRIAGE RETURN (CR) characters. + * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) + * character followed by any number of characters that are not U+000A LINE FEED (LF) + * or U+000D CARRIAGE RETURN (CR) characters. * 4. Two or more WebVTT line terminators. - * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other by two or more WebVTT line terminators. + * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other + * by two or more WebVTT line terminators. * 6. Zero or more WebVTT line terminators. */ TEST_F(FileStructure, BlankFile) @@ -242,9 +273,12 @@ TEST_F(FileStructure, BlankFile) * A WebVTT file body consists of the following components, in the following order: * 1. An optional U+FEFF BYTE ORDER MARK (BOM) character. * 2. The string "WEBVTT". - * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) character followed by any number of characters that are not U+000A LINE FEED (LF) or U+000D CARRIAGE RETURN (CR) characters. + * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) + * character followed by any number of characters that are not U+000A LINE FEED (LF) + * or U+000D CARRIAGE RETURN (CR) characters. * 4. Two or more WebVTT line terminators. - * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other by two or more WebVTT line terminators. + * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other + * by two or more WebVTT line terminators. * 6. Zero or more WebVTT line terminators. */ TEST_F(FileStructure, BlankFileWithBOM) @@ -254,16 +288,19 @@ TEST_F(FileStructure, BlankFileWithBOM) } /* - * Verifies that a file with a BOM character, a tab, and the WebVTT signature will fail parsing and - * finish gracefully. + * Verifies that a file with a BOM character, a tab, and the WebVTT signature + * will fail parsing and finish gracefully. * From http://dev.w3.org/html5/webvtt/#webvtt-file-body (12/02/2012): * * A WebVTT file body consists of the following components, in the following order: * 1. An optional U+FEFF BYTE ORDER MARK (BOM) character. * 2. The string "WEBVTT". - * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) character followed by any number of characters that are not U+000A LINE FEED (LF) or U+000D CARRIAGE RETURN (CR) characters. + * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) + * character followed by any number of characters that are not U+000A LINE FEED (LF) + * or U+000D CARRIAGE RETURN (CR) characters. * 4. Two or more WebVTT line terminators. - * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other by two or more WebVTT line terminators. + * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other + * by two or more WebVTT line terminators. * 6. Zero or more WebVTT line terminators. */ TEST_F(FileStructure, TabAfterBOMBeforeHeader) @@ -280,17 +317,24 @@ TEST_F(FileStructure, TabAfterBOMBeforeHeader) * From http://dev.w3.org/html5/webvtt/#parsing (12/10/2012): * * ... - * 8. If line is more than six characters long but the first six characters do not exactly equal "WEBVTT", or the seventh - * character is neither a U+0020 SPACE character nor a U+0009 CHARACTER TABULATION (tab) character, then abort these steps. - * The file does not start with the correct WebVTT file signature and was therefore not successfully processed. - * 9. If position is past the end of input, then abort these steps. The file was successfully processed, but it contains - * no useful data and so no text track cues where added to output. - * 10. The character indicated by position is a U+000A LINE FEED (LF) character. Advance position to the next character in input. - * 11. Header: Collect a sequence of characters that are not U+000A LINE FEED (LF) characters. Let line be those characters, if any. + * 8. If line is more than six characters long but the first six characters do not + * exactly equal "WEBVTT", or the seventh character is neither a U+0020 SPACE + * character nor a U+0009 CHARACTER TABULATION (tab) character, then abort these steps. + * The file does not start with the correct WebVTT file signature and was + * therefore not successfully processed. + * 9. If position is past the end of input, then abort these steps. The file was + * successfully processed, but it contains no useful data and so no text track + * cues where added to output. + * 10. The character indicated by position is a U+000A LINE FEED (LF) character. + * Advance position to the next character in input. + * 11. Header: Collect a sequence of characters that are not U+000A LINE FEED (LF) + * characters. Let line be those characters, if any. * 12. If position is past the end of input, then jump to the step labeled end. - * 13. The character indicated by position is a U+000A LINE FEED (LF) character. Advance position to the next character in input. - * 14. If line contains the three-character substring "-->" (U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN), - * then set the already collected line flag and jump to the step labeled cue loop. + * 13. The character indicated by position is a U+000A LINE FEED (LF) character. + * Advance position to the next character in input. + * 14. If line contains the three-character substring "-->" (U+002D HYPHEN-MINUS, + * U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN), then set the already + * collected line flag and jump to the step labeled cue loop. * 15. If line is not the empty string, then jump back to the step labeled header. */ TEST_F(FileStructure, HeaderNoNewLine) @@ -382,9 +426,12 @@ TEST_F(FileStructure, BOMGarbageNoWebVTT) * A WebVTT file body consists of the following components, in the following order: * 1. An optional U+FEFF BYTE ORDER MARK (BOM) character. * 2. The string "WEBVTT". - * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) character followed by any number of characters that are not U+000A LINE FEED (LF) or U+000D CARRIAGE RETURN (CR) characters. + * 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) + * character followed by any number of characters that are not U+000A LINE FEED (LF) + * or U+000D CARRIAGE RETURN (CR) characters. * 4. Two or more WebVTT line terminators. - * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other by two or more WebVTT line terminators. + * 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other + * by two or more WebVTT line terminators. * 6. Zero or more WebVTT line terminators. */ TEST_F(FileStructure, ExtraNewlinesAfterCue) @@ -464,15 +511,19 @@ TEST_F(FileStructure, NewlineBetweenPayloadText) Test expecting parser to succeed if a bunch of new lines at the bottom of file exists. From http://dev.w3.org/html5/webvtt/#the-webvtt-file-format - A WebVTT file must consist of a WebVTT file body encoded as UTF-8 and labeled with the MIME type text/vtt. [RFC3629] + A WebVTT file must consist of a WebVTT file body encoded as UTF-8 and labeled + with the MIME type text/vtt. [RFC3629] A WebVTT file body consists of the following components, in the following order: 1. An optional U+FEFF BYTE ORDER MARK (BOM) character. 2. The string "WEBVTT". - 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) character followed by any number of characters that are not U+000A LINE FEED (LF) or U+000D CARRIAGE RETURN (CR) characters. + 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) + character followed by any number of characters that are not U+000A LINE FEED (LF) + or U+000D CARRIAGE RETURN (CR) characters. 4. Two or more WebVTT line terminators. - 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other by two or more WebVTT line terminators. + 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other + by two or more WebVTT line terminators. 6. Zero or more WebVTT line terminators. */ TEST_F(FileStructure, NewLinesAtTheEnd) @@ -483,18 +534,23 @@ TEST_F(FileStructure, NewLinesAtTheEnd) /* - This test checks for a bom charecter followed by garbage data to make sure that a WEBVTT header follows the bom character and nothing else (garbage data). + This test checks for a bom charecter followed by garbage data to make sure that + a WEBVTT header follows the bom character and nothing else (garbage data). From http://dev.w3.org/html5/webvtt/#the-webvtt-file-format - A WebVTT file must consist of a WebVTT file body encoded as UTF-8 and labeled with the MIME type text/vtt. [RFC3629] + A WebVTT file must consist of a WebVTT file body encoded as UTF-8 and labeled + with the MIME type text/vtt. [RFC3629] A WebVTT file body consists of the following components, in the following order: 1. An optional U+FEFF BYTE ORDER MARK (BOM) character. 2. The string "WEBVTT". - 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) character followed by any number of characters that are not U+000A LINE FEED (LF) or U+000D CARRIAGE RETURN (CR) characters. + 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) + character followed by any number of characters that are not U+000A LINE FEED (LF) + or U+000D CARRIAGE RETURN (CR) characters. 4. Two or more WebVTT line terminators. - 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other by two or more WebVTT line terminators. + 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other + by two or more WebVTT line terminators. 6. Zero or more WebVTT line terminators. */ TEST_F(FileStructure, BOMGarbageData) @@ -506,18 +562,23 @@ TEST_F(FileStructure, BOMGarbageData) } /* - This test checks for a tab after the bom charecter to make sure that trailing spaces after bom do not make it pass. + This test checks for a tab after the bom charecter to make sure that trailing + spaces after bom do not make it pass. From http://dev.w3.org/html5/webvtt/#the-webvtt-file-format - A WebVTT file must consist of a WebVTT file body encoded as UTF-8 and labeled with the MIME type text/vtt. [RFC3629] + A WebVTT file must consist of a WebVTT file body encoded as UTF-8 and labeled + with the MIME type text/vtt. [RFC3629] A WebVTT file body consists of the following components, in the following order: 1. An optional U+FEFF BYTE ORDER MARK (BOM) character. 2. The string "WEBVTT". - 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) character followed by any number of characters that are not U+000A LINE FEED (LF) or U+000D CARRIAGE RETURN (CR) characters. + 3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER TABULATION (tab) + character followed by any number of characters that are not U+000A LINE FEED (LF) + or U+000D CARRIAGE RETURN (CR) characters. 4. Two or more WebVTT line terminators. - 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other by two or more WebVTT line terminators. + 5. Zero or more WebVTT cues and/or WebVTT comments separated from each other by + two or more WebVTT line terminators. 6. Zero or more WebVTT line terminators. */ TEST_F(FileStructure, BOMTabWebvtt)