diff --git a/src/wp-includes/html-api/class-wp-html-open-elements.php b/src/wp-includes/html-api/class-wp-html-open-elements.php index 1162a267f9c9b..c760009ce0c28 100644 --- a/src/wp-includes/html-api/class-wp-html-open-elements.php +++ b/src/wp-includes/html-api/class-wp-html-open-elements.php @@ -308,7 +308,20 @@ public function has_element_in_scope( string $tag_name ): bool { 'MARQUEE', 'OBJECT', 'TEMPLATE', - // @todo: Support SVG and MathML nodes when support for foreign content is added. + + /* + * @todo Support SVG and MathML nodes when support for foreign content is added. + * + * - MathML mi + * - MathML mo + * - MathML mn + * - MathML ms + * - MathML mtext + * - MathML annotation-xml + * - SVG foreignObject + * - SVG desc + * - SVG title + */ ) ); } @@ -349,7 +362,20 @@ public function has_element_in_list_item_scope( string $tag_name ): bool { 'OL', 'TEMPLATE', 'UL', - // @todo: Support SVG and MathML nodes when support for foreign content is added. + + /* + * @todo Support SVG and MathML nodes when support for foreign content is added. + * + * - MathML mi + * - MathML mo + * - MathML mn + * - MathML ms + * - MathML mtext + * - MathML annotation-xml + * - SVG foreignObject + * - SVG desc + * - SVG title + */ ) ); } @@ -386,7 +412,20 @@ public function has_element_in_button_scope( string $tag_name ): bool { 'MARQUEE', 'OBJECT', 'TEMPLATE', - // @todo: Support SVG and MathML nodes when support for foreign content is added. + + /* + * @todo Support SVG and MathML nodes when support for foreign content is added. + * + * - MathML mi + * - MathML mo + * - MathML mn + * - MathML ms + * - MathML mtext + * - MathML annotation-xml + * - SVG foreignObject + * - SVG desc + * - SVG title + */ ) ); } diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index f8653022454b6..9f2662c9e4c48 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1040,7 +1040,7 @@ private function step_before_head(): bool { * This internal function performs the 'in head' insertion mode * logic for the generalized WP_HTML_Processor::step() function. * - * @since 6.7.0 Stub implementation. + * @since 6.7.0 * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * @@ -1050,7 +1050,211 @@ private function step_before_head(): bool { * @return bool Whether an element was found. */ private function step_in_head(): bool { - $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD . ' state.' ); + $token_name = $this->get_token_name(); + $token_type = $this->get_token_type(); + $is_closer = parent::is_tag_closer(); + $op_sigil = '#tag' === $token_type ? ( $is_closer ? '-' : '+' ) : ''; + $op = "{$op_sigil}{$token_name}"; + + /* + * > A character token that is one of U+0009 CHARACTER TABULATION, + * > U+000A LINE FEED (LF), U+000C FORM FEED (FF), + * > U+000D CARRIAGE RETURN (CR), or U+0020 SPACE + */ + if ( '#text' === $op ) { + $text = $this->get_modifiable_text(); + if ( '' === $text ) { + /* + * If the text is empty after processing HTML entities and stripping + * U+0000 NULL bytes then ignore the token. + */ + return $this->step(); + } + + if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) { + // Insert the character. + $this->insert_html_element( $this->state->current_token ); + return true; + } + } + + switch ( $op ) { + /* + * > A comment token + */ + case '#comment': + case '#funky-comment': + case '#presumptuous-tag': + $this->insert_html_element( $this->state->current_token ); + return true; + + /* + * > A DOCTYPE token + */ + case 'html': + // Parse error: ignore the token. + return $this->step(); + + /* + * > A start tag whose tag name is "html" + */ + case '+HTML': + return $this->step_in_body(); + + /* + * > A start tag whose tag name is one of: "base", "basefont", "bgsound", "link" + */ + case '+BASE': + case '+BASEFONT': + case '+BGSOUND': + case '+LINK': + $this->insert_html_element( $this->state->current_token ); + return true; + + /* + * > A start tag whose tag name is "meta" + */ + case '+META': + $this->insert_html_element( $this->state->current_token ); + + /* + * > If the active speculative HTML parser is null, then: + * > - If the element has a charset attribute, and getting an encoding from + * > its value results in an encoding, and the confidence is currently + * > tentative, then change the encoding to the resulting encoding. + */ + $charset = $this->get_attribute( 'charset' ); + if ( is_string( $charset ) ) { + $this->bail( 'Cannot yet process META tags with charset to determine encoding.' ); + } + + /* + * > - Otherwise, if the element has an http-equiv attribute whose value is + * > an ASCII case-insensitive match for the string "Content-Type", and + * > the element has a content attribute, and applying the algorithm for + * > extracting a character encoding from a meta element to that attribute's + * > value returns an encoding, and the confidence is currently tentative, + * > then change the encoding to the extracted encoding. + */ + $http_equiv = $this->get_attribute( 'http-equiv' ); + $content = $this->get_attribute( 'content' ); + if ( + is_string( $http_equiv ) && + is_string( $content ) && + 0 === strcasecmp( $http_equiv, 'Content-Type' ) + ) { + $this->bail( 'Cannot yet process META tags with http-equiv Content-Type to determine encoding.' ); + } + + return true; + + /* + * > A start tag whose tag name is "title" + */ + case '+TITLE': + $this->insert_html_element( $this->state->current_token ); + return true; + + /* + * > A start tag whose tag name is "noscript", if the scripting flag is enabled + * > A start tag whose tag name is one of: "noframes", "style" + * + * The scripting flag is never enabled in this parser. + */ + case '+NOFRAMES': + case '+STYLE': + $this->insert_html_element( $this->state->current_token ); + return true; + + /* + * > A start tag whose tag name is "noscript", if the scripting flag is disabled + */ + case '+NOSCRIPT': + $this->insert_html_element( $this->state->current_token ); + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD_NOSCRIPT; + return true; + + /* + * > A start tag whose tag name is "script" + * + * @todo Could the adjusted insertion location be anything other than the current location? + */ + case '+SCRIPT': + $this->insert_html_element( $this->state->current_token ); + return true; + + /* + * > An end tag whose tag name is "head" + */ + case '-HEAD': + $this->state->stack_of_open_elements->pop(); + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD; + return true; + + /* + * > An end tag whose tag name is one of: "body", "html", "br" + */ + case '-BODY': + case '-HTML': + case '-BR': + /* + * > Act as described in the "anything else" entry below. + */ + goto in_head_anything_else; + break; + + /* + * > A start tag whose tag name is "template" + * + * @todo Could the adjusted insertion location be anything other than the current location? + */ + case '+TEMPLATE': + $this->state->active_formatting_elements->insert_marker(); + $this->state->frameset_ok = false; + + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE; + $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE; + + $this->insert_html_element( $this->state->current_token ); + return true; + + /* + * > An end tag whose tag name is "template" + */ + case '-TEMPLATE': + if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) { + // @todo Indicate a parse error once it's possible. + return $this->step(); + } + + $this->generate_implied_end_tags_thoroughly(); + if ( ! $this->state->stack_of_open_elements->current_node_is( 'TEMPLATE' ) ) { + // @todo Indicate a parse error once it's possible. + } + + $this->state->stack_of_open_elements->pop_until( 'TEMPLATE' ); + $this->state->active_formatting_elements->clear_up_to_last_marker(); + array_pop( $this->state->stack_of_template_insertion_modes ); + $this->reset_insertion_mode(); + return true; + } + + /* + * > A start tag whose tag name is "head" + * > Any other end tag + */ + if ( '+HEAD' === $op || $is_closer ) { + // Parse error: ignore the token. + return $this->step(); + } + + /* + * > Anything else + */ + in_head_anything_else: + $this->state->stack_of_open_elements->pop(); + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD; + return $this->step( self::REPROCESS_CURRENT_NODE ); } /** @@ -2991,7 +3195,117 @@ private function step_in_select_in_table(): bool { * @return bool Whether an element was found. */ private function step_in_template(): bool { - $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE . ' state.' ); + $token_name = $this->get_token_name(); + $token_type = $this->get_token_type(); + $is_closer = $this->is_tag_closer(); + $op_sigil = '#tag' === $token_type ? ( $is_closer ? '-' : '+' ) : ''; + $op = "{$op_sigil}{$token_name}"; + + switch ( $op ) { + /* + * > A character token + * > A comment token + * > A DOCTYPE token + */ + case '#text': + case '#comment': + case '#funky-comment': + case '#presumptuous-tag': + case 'html': + return $this->step_in_body(); + + /* + * > A start tag whose tag name is one of: "base", "basefont", "bgsound", "link", + * > "meta", "noframes", "script", "style", "template", "title" + * > An end tag whose tag name is "template" + */ + case '+BASE': + case '+BASEFONT': + case '+BGSOUND': + case '+LINK': + case '+META': + case '+NOFRAMES': + case '+SCRIPT': + case '+STYLE': + case '+TEMPLATE': + case '+TITLE': + case '-TEMPLATE': + return $this->step_in_head(); + + /* + * > A start tag whose tag name is one of: "caption", "colgroup", "tbody", "tfoot", "thead" + */ + case '+CAPTION': + case '+COLGROUP': + case '+TBODY': + case '+TFOOT': + case '+THEAD': + array_pop( $this->state->stack_of_template_insertion_modes ); + $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; + return $this->step( self::REPROCESS_CURRENT_NODE ); + + /* + * > A start tag whose tag name is "col" + */ + case '+COL': + array_pop( $this->state->stack_of_template_insertion_modes ); + $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP; + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP; + return $this->step( self::REPROCESS_CURRENT_NODE ); + + /* + * > A start tag whose tag name is "tr" + */ + case '+TR': + array_pop( $this->state->stack_of_template_insertion_modes ); + $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; + return $this->step( self::REPROCESS_CURRENT_NODE ); + + /* + * > A start tag whose tag name is one of: "td", "th" + */ + case '+TD': + case '+TH': + array_pop( $this->state->stack_of_template_insertion_modes ); + $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; + return $this->step( self::REPROCESS_CURRENT_NODE ); + } + + /* + * > Any other start tag + */ + if ( ! $is_closer ) { + array_pop( $this->state->stack_of_template_insertion_modes ); + $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; + return $this->step( self::REPROCESS_CURRENT_NODE ); + } + + /* + * > Any other end tag + */ + if ( $is_closer ) { + // Parse error: ignore the token. + return $this->step(); + } + + /* + * > An end-of-file token + */ + if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) { + // Stop parsing. + return false; + } + + // @todo Indicate a parse error once it's possible. + $this->state->stack_of_open_elements->pop_until( 'TEMPLATE' ); + $this->state->active_formatting_elements->clear_up_to_last_marker(); + array_pop( $this->state->stack_of_template_insertion_modes ); + $this->reset_insertion_mode(); + return $this->step( self::REPROCESS_CURRENT_NODE ); } /** diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php index 7dd94747fd8e8..0dbd45cfa0ead 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php @@ -46,8 +46,10 @@ public static function data_single_tag_of_supported_elements() { 'ASIDE', 'AUDIO', 'B', + 'BASE', 'BDI', 'BDO', + 'BGSOUND', // Deprectated. 'BIG', 'BLINK', // Deprecated. 'BR', @@ -93,12 +95,14 @@ public static function data_single_tag_of_supported_elements() { 'KEYGEN', // Deprecated. 'LABEL', 'LEGEND', + 'LINK', 'LISTING', // Deprecated. 'MAIN', 'MAP', 'MARK', 'MARQUEE', // Deprecated. 'MENU', + 'META', 'METER', 'MULTICOL', // Deprecated. 'NAV', @@ -178,24 +182,19 @@ public function test_fails_when_encountering_unsupported_tag( $html ) { */ public static function data_unsupported_elements() { $unsupported_elements = array( - 'BASE', - 'BGSOUND', // Deprecated; self-closing if self-closing flag provided, otherwise normal. 'BODY', 'FRAME', 'FRAMESET', 'HEAD', 'HTML', 'IFRAME', - 'LINK', 'MATH', - 'META', 'NOEMBED', // Neutralized. 'NOFRAMES', // Neutralized. 'PLAINTEXT', // Neutralized. 'SCRIPT', 'STYLE', 'SVG', - 'TEMPLATE', 'TEXTAREA', 'TITLE', 'XMP', // Deprecated, use PRE instead. diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php index 8487df26c99dc..69329f51321ba 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php @@ -34,6 +34,7 @@ class Tests_HtmlApi_Html5lib extends WP_UnitTestCase { 'adoption01/line0046' => 'Unimplemented: Reconstruction of active formatting elements.', 'adoption01/line0159' => 'Unimplemented: Reconstruction of active formatting elements.', 'adoption01/line0318' => 'Unimplemented: Reconstruction of active formatting elements.', + 'template/line0885' => 'Unimplemented: no parsing of attributes on context node.', 'tests1/line0720' => 'Unimplemented: Reconstruction of active formatting elements.', 'tests15/line0001' => 'Unimplemented: Reconstruction of active formatting elements.', 'tests15/line0022' => 'Unimplemented: Reconstruction of active formatting elements.', @@ -163,25 +164,34 @@ private static function build_tree_representation( ?string $fragment_context, st return null; } - if ( $was_text && '#text' !== $processor->get_token_name() ) { + $token_name = $processor->get_token_name(); + $token_type = $processor->get_token_type(); + $is_closer = $processor->is_tag_closer(); + + if ( $was_text && '#text' !== $token_name ) { $output .= "{$text_node}\"\n"; $was_text = false; $text_node = ''; } - switch ( $processor->get_token_type() ) { + switch ( $token_type ) { case '#tag': - $tag_name = strtolower( $processor->get_tag() ); + $tag_name = strtolower( $token_name ); - if ( $processor->is_tag_closer() ) { + if ( $is_closer ) { --$indent_level; + + if ( 'TEMPLATE' === $token_name ) { + --$indent_level; + } + break; } - $tag_indent = count( $processor->get_breadcrumbs() ) - 1; + $tag_indent = $indent_level; if ( ! WP_HTML_Processor::is_void( $tag_name ) ) { - $indent_level = $tag_indent + 1; + ++$indent_level; } $output .= str_repeat( $indent, $tag_indent ) . "<{$tag_name}>\n"; @@ -209,6 +219,11 @@ private static function build_tree_representation( ?string $fragment_context, st $output .= str_repeat( $indent, $indent_level ) . "\"{$modifiable_text}\"\n"; } + if ( 'TEMPLATE' === $token_name ) { + $output .= str_repeat( $indent, $indent_level ) . "content\n"; + ++$indent_level; + } + if ( ! $processor->is_void( $tag_name ) && ! $processor->expects_closer() ) { --$indent_level; }