From 1e21ecedf19f1b97360949a9e509a3c04ac1f34e Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Fri, 27 Sep 2024 00:42:47 +0000 Subject: [PATCH] HTML API: Switch to HTML namespace when entering Integration Points. When encountering inline SVG and MathML content in an HTML document, there are certain "integration points" which transition back into the HTML parsing ruleset. Previously, the HTML API was incorrectly switching into the namespace of the element transitioning into that ruleset. In this patch, the correct transition is made, where all integration points refer to HTML rules, while non-integration points refer to the rules of the namespace corresponding to the token itself. Developed in https://github.com/wordpress/wordpress-develop/pull/7425 Discussed in https://core.trac.wordpress.org/ticket/61576 Props dmsnell, jonsurrell. See #61576. git-svn-id: https://develop.svn.wordpress.org/trunk@59099 602fd350-edb4-49c9-b593-d223f7449a82 --- .../html-api/class-wp-html-processor.php | 14 ++- .../tests/html-api/wpHtmlProcessor.php | 118 ++++++++++++++++++ 2 files changed, 126 insertions(+), 6 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index ba8be1e7d4004..ed6ac0299b3c3 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -393,7 +393,7 @@ function ( WP_HTML_Token $token ): void { $provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real'; $this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::PUSH, $provenance ); - $this->change_parsing_namespace( $token->namespace ); + $this->change_parsing_namespace( $token->integration_node_type ? 'html' : $token->namespace ); } ); @@ -403,12 +403,14 @@ function ( WP_HTML_Token $token ): void { $same_node = isset( $this->state->current_token ) && $token->node_name === $this->state->current_token->node_name; $provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real'; $this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::POP, $provenance ); + $adjusted_current_node = $this->get_adjusted_current_node(); - $this->change_parsing_namespace( - $adjusted_current_node - ? $adjusted_current_node->namespace - : 'html' - ); + + if ( $adjusted_current_node ) { + $this->change_parsing_namespace( $adjusted_current_node->integration_node_type ? 'html' : $adjusted_current_node->namespace ); + } else { + $this->change_parsing_namespace( 'html' ); + } } ); diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index bd6607935d220..ba3407ff84cbf 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -745,4 +745,122 @@ public function test_class_list_quirks_mode() { $class_list ); } + + /** + * Ensures that the processor correctly adjusts the namespace + * for elements inside HTML integration points. + * + * @ticket 61576 + */ + public function test_adjusts_for_html_integration_points_in_svg() { + $processor = WP_HTML_Processor::create_full_parser( + '' + ); + + // At the foreignObject, the processor is in the SVG namespace. + $this->assertTrue( + $processor->next_tag( 'foreignObject' ), + 'Failed to find "foreignObject" under test: check test setup.' + ); + + $this->assertSame( + 'svg', + $processor->get_namespace(), + 'Found the wrong namespace for the "foreignObject" element.' + ); + + /* + * The IMAGE tag should be handled according to HTML processing rules + * and transformted to an IMG tag because `foreignObject` is an HTML + * integration point. At this point, the processor is entering the HTML + * integration point. + */ + $this->assertTrue( + $processor->next_tag( 'IMG' ), + 'Failed to find expected "IMG" tag from "" source tag.' + ); + + $this->assertSame( + 'html', + $processor->get_namespace(), + 'Found the wrong namespace for the transformed "IMAGE"/"IMG" element.' + ); + + /* + * Again, the IMAGE tag should be handled according to HTML processing + * rules and transformted to an IMG tag because `foreignObject` is an + * HTML integration point. At this point, the processor is has entered + * SVG and is returning to an HTML integration point. + */ + $this->assertTrue( + $processor->next_tag( 'IMG' ), + 'Failed to find expected "IMG" tag from "" source tag.' + ); + + $this->assertSame( + 'html', + $processor->get_namespace(), + 'Found the wrong namespace for the transformed "IMAGE"/"IMG" element.' + ); + } + + /** + * Ensures that the processor correctly adjusts the namespace + * for elements inside MathML integration points. + * + * @ticket 61576 + */ + public function test_adjusts_for_mathml_integration_points() { + $processor = WP_HTML_Processor::create_fragment( + '' + ); + + // Advance token-by-token to ensure matching the right raw "" token. + $processor->next_token(); // Advance past the +MO. + $processor->next_token(); // Advance into the +IMG. + + $this->assertSame( + 'IMG', + $processor->get_tag(), + 'Failed to find expected "IMG" tag from "" source tag.' + ); + + $this->assertSame( + 'html', + $processor->get_namespace(), + 'Found the wrong namespace for the transformed "IMAGE"/"IMG" element.' + ); + + // Advance token-by-token to ensure matching the right raw "" token. + $processor->next_token(); // Advance past the -MO. + $processor->next_token(); // Advance past the +MATH. + $processor->next_token(); // Advance into the +IMAGE. + + $this->assertSame( + 'IMAGE', + $processor->get_tag(), + 'Failed to find the un-transformed "" tag.' + ); + + $this->assertSame( + 'math', + $processor->get_namespace(), + 'Found the wrong namespace for the transformed "IMAGE"/"IMG" element.' + ); + + $processor->next_token(); // Advance past the +MO. + $processor->next_token(); // Advance into the +IMG. + + $this->assertSame( + 'IMG', + $processor->get_tag(), + 'Failed to find expected "IMG" tag from "" source tag.' + ); + + $this->assertSame( + 'html', + $processor->get_namespace(), + 'Found the wrong namespace for the transformed "IMAGE"/"IMG" element.' + ); + } }