diff --git a/src/wp-includes/html-api/class-wp-html-open-elements.php b/src/wp-includes/html-api/class-wp-html-open-elements.php
index 1162a267f9c9b..c760009ce0c28 100644
--- a/src/wp-includes/html-api/class-wp-html-open-elements.php
+++ b/src/wp-includes/html-api/class-wp-html-open-elements.php
@@ -308,7 +308,20 @@ public function has_element_in_scope( string $tag_name ): bool {
'MARQUEE',
'OBJECT',
'TEMPLATE',
- // @todo: Support SVG and MathML nodes when support for foreign content is added.
+
+ /*
+ * @todo Support SVG and MathML nodes when support for foreign content is added.
+ *
+ * - MathML mi
+ * - MathML mo
+ * - MathML mn
+ * - MathML ms
+ * - MathML mtext
+ * - MathML annotation-xml
+ * - SVG foreignObject
+ * - SVG desc
+ * - SVG title
+ */
)
);
}
@@ -349,7 +362,20 @@ public function has_element_in_list_item_scope( string $tag_name ): bool {
'OL',
'TEMPLATE',
'UL',
- // @todo: Support SVG and MathML nodes when support for foreign content is added.
+
+ /*
+ * @todo Support SVG and MathML nodes when support for foreign content is added.
+ *
+ * - MathML mi
+ * - MathML mo
+ * - MathML mn
+ * - MathML ms
+ * - MathML mtext
+ * - MathML annotation-xml
+ * - SVG foreignObject
+ * - SVG desc
+ * - SVG title
+ */
)
);
}
@@ -386,7 +412,20 @@ public function has_element_in_button_scope( string $tag_name ): bool {
'MARQUEE',
'OBJECT',
'TEMPLATE',
- // @todo: Support SVG and MathML nodes when support for foreign content is added.
+
+ /*
+ * @todo Support SVG and MathML nodes when support for foreign content is added.
+ *
+ * - MathML mi
+ * - MathML mo
+ * - MathML mn
+ * - MathML ms
+ * - MathML mtext
+ * - MathML annotation-xml
+ * - SVG foreignObject
+ * - SVG desc
+ * - SVG title
+ */
)
);
}
diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php
index f8653022454b6..9f2662c9e4c48 100644
--- a/src/wp-includes/html-api/class-wp-html-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-processor.php
@@ -1040,7 +1040,7 @@ private function step_before_head(): bool {
* This internal function performs the 'in head' insertion mode
* logic for the generalized WP_HTML_Processor::step() function.
*
- * @since 6.7.0 Stub implementation.
+ * @since 6.7.0
*
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
*
@@ -1050,7 +1050,211 @@ private function step_before_head(): bool {
* @return bool Whether an element was found.
*/
private function step_in_head(): bool {
- $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD . ' state.' );
+ $token_name = $this->get_token_name();
+ $token_type = $this->get_token_type();
+ $is_closer = parent::is_tag_closer();
+ $op_sigil = '#tag' === $token_type ? ( $is_closer ? '-' : '+' ) : '';
+ $op = "{$op_sigil}{$token_name}";
+
+ /*
+ * > A character token that is one of U+0009 CHARACTER TABULATION,
+ * > U+000A LINE FEED (LF), U+000C FORM FEED (FF),
+ * > U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+ */
+ if ( '#text' === $op ) {
+ $text = $this->get_modifiable_text();
+ if ( '' === $text ) {
+ /*
+ * If the text is empty after processing HTML entities and stripping
+ * U+0000 NULL bytes then ignore the token.
+ */
+ return $this->step();
+ }
+
+ if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) {
+ // Insert the character.
+ $this->insert_html_element( $this->state->current_token );
+ return true;
+ }
+ }
+
+ switch ( $op ) {
+ /*
+ * > A comment token
+ */
+ case '#comment':
+ case '#funky-comment':
+ case '#presumptuous-tag':
+ $this->insert_html_element( $this->state->current_token );
+ return true;
+
+ /*
+ * > A DOCTYPE token
+ */
+ case 'html':
+ // Parse error: ignore the token.
+ return $this->step();
+
+ /*
+ * > A start tag whose tag name is "html"
+ */
+ case '+HTML':
+ return $this->step_in_body();
+
+ /*
+ * > A start tag whose tag name is one of: "base", "basefont", "bgsound", "link"
+ */
+ case '+BASE':
+ case '+BASEFONT':
+ case '+BGSOUND':
+ case '+LINK':
+ $this->insert_html_element( $this->state->current_token );
+ return true;
+
+ /*
+ * > A start tag whose tag name is "meta"
+ */
+ case '+META':
+ $this->insert_html_element( $this->state->current_token );
+
+ /*
+ * > If the active speculative HTML parser is null, then:
+ * > - If the element has a charset attribute, and getting an encoding from
+ * > its value results in an encoding, and the confidence is currently
+ * > tentative, then change the encoding to the resulting encoding.
+ */
+ $charset = $this->get_attribute( 'charset' );
+ if ( is_string( $charset ) ) {
+ $this->bail( 'Cannot yet process META tags with charset to determine encoding.' );
+ }
+
+ /*
+ * > - Otherwise, if the element has an http-equiv attribute whose value is
+ * > an ASCII case-insensitive match for the string "Content-Type", and
+ * > the element has a content attribute, and applying the algorithm for
+ * > extracting a character encoding from a meta element to that attribute's
+ * > value returns an encoding, and the confidence is currently tentative,
+ * > then change the encoding to the extracted encoding.
+ */
+ $http_equiv = $this->get_attribute( 'http-equiv' );
+ $content = $this->get_attribute( 'content' );
+ if (
+ is_string( $http_equiv ) &&
+ is_string( $content ) &&
+ 0 === strcasecmp( $http_equiv, 'Content-Type' )
+ ) {
+ $this->bail( 'Cannot yet process META tags with http-equiv Content-Type to determine encoding.' );
+ }
+
+ return true;
+
+ /*
+ * > A start tag whose tag name is "title"
+ */
+ case '+TITLE':
+ $this->insert_html_element( $this->state->current_token );
+ return true;
+
+ /*
+ * > A start tag whose tag name is "noscript", if the scripting flag is enabled
+ * > A start tag whose tag name is one of: "noframes", "style"
+ *
+ * The scripting flag is never enabled in this parser.
+ */
+ case '+NOFRAMES':
+ case '+STYLE':
+ $this->insert_html_element( $this->state->current_token );
+ return true;
+
+ /*
+ * > A start tag whose tag name is "noscript", if the scripting flag is disabled
+ */
+ case '+NOSCRIPT':
+ $this->insert_html_element( $this->state->current_token );
+ $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD_NOSCRIPT;
+ return true;
+
+ /*
+ * > A start tag whose tag name is "script"
+ *
+ * @todo Could the adjusted insertion location be anything other than the current location?
+ */
+ case '+SCRIPT':
+ $this->insert_html_element( $this->state->current_token );
+ return true;
+
+ /*
+ * > An end tag whose tag name is "head"
+ */
+ case '-HEAD':
+ $this->state->stack_of_open_elements->pop();
+ $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD;
+ return true;
+
+ /*
+ * > An end tag whose tag name is one of: "body", "html", "br"
+ */
+ case '-BODY':
+ case '-HTML':
+ case '-BR':
+ /*
+ * > Act as described in the "anything else" entry below.
+ */
+ goto in_head_anything_else;
+ break;
+
+ /*
+ * > A start tag whose tag name is "template"
+ *
+ * @todo Could the adjusted insertion location be anything other than the current location?
+ */
+ case '+TEMPLATE':
+ $this->state->active_formatting_elements->insert_marker();
+ $this->state->frameset_ok = false;
+
+ $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE;
+ $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE;
+
+ $this->insert_html_element( $this->state->current_token );
+ return true;
+
+ /*
+ * > An end tag whose tag name is "template"
+ */
+ case '-TEMPLATE':
+ if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) {
+ // @todo Indicate a parse error once it's possible.
+ return $this->step();
+ }
+
+ $this->generate_implied_end_tags_thoroughly();
+ if ( ! $this->state->stack_of_open_elements->current_node_is( 'TEMPLATE' ) ) {
+ // @todo Indicate a parse error once it's possible.
+ }
+
+ $this->state->stack_of_open_elements->pop_until( 'TEMPLATE' );
+ $this->state->active_formatting_elements->clear_up_to_last_marker();
+ array_pop( $this->state->stack_of_template_insertion_modes );
+ $this->reset_insertion_mode();
+ return true;
+ }
+
+ /*
+ * > A start tag whose tag name is "head"
+ * > Any other end tag
+ */
+ if ( '+HEAD' === $op || $is_closer ) {
+ // Parse error: ignore the token.
+ return $this->step();
+ }
+
+ /*
+ * > Anything else
+ */
+ in_head_anything_else:
+ $this->state->stack_of_open_elements->pop();
+ $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD;
+ return $this->step( self::REPROCESS_CURRENT_NODE );
}
/**
@@ -2991,7 +3195,117 @@ private function step_in_select_in_table(): bool {
* @return bool Whether an element was found.
*/
private function step_in_template(): bool {
- $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE . ' state.' );
+ $token_name = $this->get_token_name();
+ $token_type = $this->get_token_type();
+ $is_closer = $this->is_tag_closer();
+ $op_sigil = '#tag' === $token_type ? ( $is_closer ? '-' : '+' ) : '';
+ $op = "{$op_sigil}{$token_name}";
+
+ switch ( $op ) {
+ /*
+ * > A character token
+ * > A comment token
+ * > A DOCTYPE token
+ */
+ case '#text':
+ case '#comment':
+ case '#funky-comment':
+ case '#presumptuous-tag':
+ case 'html':
+ return $this->step_in_body();
+
+ /*
+ * > A start tag whose tag name is one of: "base", "basefont", "bgsound", "link",
+ * > "meta", "noframes", "script", "style", "template", "title"
+ * > An end tag whose tag name is "template"
+ */
+ case '+BASE':
+ case '+BASEFONT':
+ case '+BGSOUND':
+ case '+LINK':
+ case '+META':
+ case '+NOFRAMES':
+ case '+SCRIPT':
+ case '+STYLE':
+ case '+TEMPLATE':
+ case '+TITLE':
+ case '-TEMPLATE':
+ return $this->step_in_head();
+
+ /*
+ * > A start tag whose tag name is one of: "caption", "colgroup", "tbody", "tfoot", "thead"
+ */
+ case '+CAPTION':
+ case '+COLGROUP':
+ case '+TBODY':
+ case '+TFOOT':
+ case '+THEAD':
+ array_pop( $this->state->stack_of_template_insertion_modes );
+ $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE;
+ $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE;
+ return $this->step( self::REPROCESS_CURRENT_NODE );
+
+ /*
+ * > A start tag whose tag name is "col"
+ */
+ case '+COL':
+ array_pop( $this->state->stack_of_template_insertion_modes );
+ $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP;
+ $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP;
+ return $this->step( self::REPROCESS_CURRENT_NODE );
+
+ /*
+ * > A start tag whose tag name is "tr"
+ */
+ case '+TR':
+ array_pop( $this->state->stack_of_template_insertion_modes );
+ $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY;
+ $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY;
+ return $this->step( self::REPROCESS_CURRENT_NODE );
+
+ /*
+ * > A start tag whose tag name is one of: "td", "th"
+ */
+ case '+TD':
+ case '+TH':
+ array_pop( $this->state->stack_of_template_insertion_modes );
+ $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW;
+ $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW;
+ return $this->step( self::REPROCESS_CURRENT_NODE );
+ }
+
+ /*
+ * > Any other start tag
+ */
+ if ( ! $is_closer ) {
+ array_pop( $this->state->stack_of_template_insertion_modes );
+ $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
+ $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
+ return $this->step( self::REPROCESS_CURRENT_NODE );
+ }
+
+ /*
+ * > Any other end tag
+ */
+ if ( $is_closer ) {
+ // Parse error: ignore the token.
+ return $this->step();
+ }
+
+ /*
+ * > An end-of-file token
+ */
+ if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) {
+ // Stop parsing.
+ return false;
+ }
+
+ // @todo Indicate a parse error once it's possible.
+ $this->state->stack_of_open_elements->pop_until( 'TEMPLATE' );
+ $this->state->active_formatting_elements->clear_up_to_last_marker();
+ array_pop( $this->state->stack_of_template_insertion_modes );
+ $this->reset_insertion_mode();
+ return $this->step( self::REPROCESS_CURRENT_NODE );
}
/**
diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php
index 7dd94747fd8e8..0dbd45cfa0ead 100644
--- a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php
+++ b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php
@@ -46,8 +46,10 @@ public static function data_single_tag_of_supported_elements() {
'ASIDE',
'AUDIO',
'B',
+ 'BASE',
'BDI',
'BDO',
+ 'BGSOUND', // Deprectated.
'BIG',
'BLINK', // Deprecated.
'BR',
@@ -93,12 +95,14 @@ public static function data_single_tag_of_supported_elements() {
'KEYGEN', // Deprecated.
'LABEL',
'LEGEND',
+ 'LINK',
'LISTING', // Deprecated.
'MAIN',
'MAP',
'MARK',
'MARQUEE', // Deprecated.
'MENU',
+ 'META',
'METER',
'MULTICOL', // Deprecated.
'NAV',
@@ -178,24 +182,19 @@ public function test_fails_when_encountering_unsupported_tag( $html ) {
*/
public static function data_unsupported_elements() {
$unsupported_elements = array(
- 'BASE',
- 'BGSOUND', // Deprecated; self-closing if self-closing flag provided, otherwise normal.
'BODY',
'FRAME',
'FRAMESET',
'HEAD',
'HTML',
'IFRAME',
- 'LINK',
'MATH',
- 'META',
'NOEMBED', // Neutralized.
'NOFRAMES', // Neutralized.
'PLAINTEXT', // Neutralized.
'SCRIPT',
'STYLE',
'SVG',
- 'TEMPLATE',
'TEXTAREA',
'TITLE',
'XMP', // Deprecated, use PRE instead.
diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
index 8487df26c99dc..69329f51321ba 100644
--- a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
+++ b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
@@ -34,6 +34,7 @@ class Tests_HtmlApi_Html5lib extends WP_UnitTestCase {
'adoption01/line0046' => 'Unimplemented: Reconstruction of active formatting elements.',
'adoption01/line0159' => 'Unimplemented: Reconstruction of active formatting elements.',
'adoption01/line0318' => 'Unimplemented: Reconstruction of active formatting elements.',
+ 'template/line0885' => 'Unimplemented: no parsing of attributes on context node.',
'tests1/line0720' => 'Unimplemented: Reconstruction of active formatting elements.',
'tests15/line0001' => 'Unimplemented: Reconstruction of active formatting elements.',
'tests15/line0022' => 'Unimplemented: Reconstruction of active formatting elements.',
@@ -163,25 +164,34 @@ private static function build_tree_representation( ?string $fragment_context, st
return null;
}
- if ( $was_text && '#text' !== $processor->get_token_name() ) {
+ $token_name = $processor->get_token_name();
+ $token_type = $processor->get_token_type();
+ $is_closer = $processor->is_tag_closer();
+
+ if ( $was_text && '#text' !== $token_name ) {
$output .= "{$text_node}\"\n";
$was_text = false;
$text_node = '';
}
- switch ( $processor->get_token_type() ) {
+ switch ( $token_type ) {
case '#tag':
- $tag_name = strtolower( $processor->get_tag() );
+ $tag_name = strtolower( $token_name );
- if ( $processor->is_tag_closer() ) {
+ if ( $is_closer ) {
--$indent_level;
+
+ if ( 'TEMPLATE' === $token_name ) {
+ --$indent_level;
+ }
+
break;
}
- $tag_indent = count( $processor->get_breadcrumbs() ) - 1;
+ $tag_indent = $indent_level;
if ( ! WP_HTML_Processor::is_void( $tag_name ) ) {
- $indent_level = $tag_indent + 1;
+ ++$indent_level;
}
$output .= str_repeat( $indent, $tag_indent ) . "<{$tag_name}>\n";
@@ -209,6 +219,11 @@ private static function build_tree_representation( ?string $fragment_context, st
$output .= str_repeat( $indent, $indent_level ) . "\"{$modifiable_text}\"\n";
}
+ if ( 'TEMPLATE' === $token_name ) {
+ $output .= str_repeat( $indent, $indent_level ) . "content\n";
+ ++$indent_level;
+ }
+
if ( ! $processor->is_void( $tag_name ) && ! $processor->expects_closer() ) {
--$indent_level;
}