Skip to content

Commit

Permalink
HTML API: Switch to HTML namespace when entering Integration Points.
Browse files Browse the repository at this point in the history
When encountering inline SVG and MathML content in an HTML document, there are certain "integration points" which transition back into the HTML parsing ruleset. Previously, the HTML API was incorrectly switching into the namespace of the element transitioning into that ruleset.

In this patch, the correct transition is made, where all integration points refer to HTML rules, while non-integration points refer to the rules of the namespace corresponding to the token itself.

Developed in WordPress#7425
Discussed in https://core.trac.wordpress.org/ticket/61576

Props dmsnell, jonsurrell.
See #61576.


git-svn-id: https://develop.svn.wordpress.org/trunk@59099 602fd350-edb4-49c9-b593-d223f7449a82
  • Loading branch information
dmsnell authored and aslamdoctor committed Dec 28, 2024
1 parent dd07b9d commit 5ee5e0c
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 6 deletions.
14 changes: 8 additions & 6 deletions src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ function ( WP_HTML_Token $token ): void {
$provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real';
$this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::PUSH, $provenance );

$this->change_parsing_namespace( $token->namespace );
$this->change_parsing_namespace( $token->integration_node_type ? 'html' : $token->namespace );
}
);

Expand All @@ -403,12 +403,14 @@ function ( WP_HTML_Token $token ): void {
$same_node = isset( $this->state->current_token ) && $token->node_name === $this->state->current_token->node_name;
$provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real';
$this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::POP, $provenance );

$adjusted_current_node = $this->get_adjusted_current_node();
$this->change_parsing_namespace(
$adjusted_current_node
? $adjusted_current_node->namespace
: 'html'
);

if ( $adjusted_current_node ) {
$this->change_parsing_namespace( $adjusted_current_node->integration_node_type ? 'html' : $adjusted_current_node->namespace );
} else {
$this->change_parsing_namespace( 'html' );
}
}
);

Expand Down
118 changes: 118 additions & 0 deletions tests/phpunit/tests/html-api/wpHtmlProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -745,4 +745,122 @@ public function test_class_list_quirks_mode() {
$class_list
);
}

/**
* Ensures that the processor correctly adjusts the namespace
* for elements inside HTML integration points.
*
* @ticket 61576
*/
public function test_adjusts_for_html_integration_points_in_svg() {
$processor = WP_HTML_Processor::create_full_parser(
'<svg><foreignobject><image /><svg /><image />'
);

// At the foreignObject, the processor is in the SVG namespace.
$this->assertTrue(
$processor->next_tag( 'foreignObject' ),
'Failed to find "foreignObject" under test: check test setup.'
);

$this->assertSame(
'svg',
$processor->get_namespace(),
'Found the wrong namespace for the "foreignObject" element.'
);

/*
* The IMAGE tag should be handled according to HTML processing rules
* and transformted to an IMG tag because `foreignObject` is an HTML
* integration point. At this point, the processor is entering the HTML
* integration point.
*/
$this->assertTrue(
$processor->next_tag( 'IMG' ),
'Failed to find expected "IMG" tag from "<IMAGE>" source tag.'
);

$this->assertSame(
'html',
$processor->get_namespace(),
'Found the wrong namespace for the transformed "IMAGE"/"IMG" element.'
);

/*
* Again, the IMAGE tag should be handled according to HTML processing
* rules and transformted to an IMG tag because `foreignObject` is an
* HTML integration point. At this point, the processor is has entered
* SVG and is returning to an HTML integration point.
*/
$this->assertTrue(
$processor->next_tag( 'IMG' ),
'Failed to find expected "IMG" tag from "<IMAGE>" source tag.'
);

$this->assertSame(
'html',
$processor->get_namespace(),
'Found the wrong namespace for the transformed "IMAGE"/"IMG" element.'
);
}

/**
* Ensures that the processor correctly adjusts the namespace
* for elements inside MathML integration points.
*
* @ticket 61576
*/
public function test_adjusts_for_mathml_integration_points() {
$processor = WP_HTML_Processor::create_fragment(
'<mo><image /></mo><math><image /><mo><image /></mo></math>'
);

// Advance token-by-token to ensure matching the right raw "<image />" token.
$processor->next_token(); // Advance past the +MO.
$processor->next_token(); // Advance into the +IMG.

$this->assertSame(
'IMG',
$processor->get_tag(),
'Failed to find expected "IMG" tag from "<IMAGE>" source tag.'
);

$this->assertSame(
'html',
$processor->get_namespace(),
'Found the wrong namespace for the transformed "IMAGE"/"IMG" element.'
);

// Advance token-by-token to ensure matching the right raw "<image />" token.
$processor->next_token(); // Advance past the -MO.
$processor->next_token(); // Advance past the +MATH.
$processor->next_token(); // Advance into the +IMAGE.

$this->assertSame(
'IMAGE',
$processor->get_tag(),
'Failed to find the un-transformed "<image />" tag.'
);

$this->assertSame(
'math',
$processor->get_namespace(),
'Found the wrong namespace for the transformed "IMAGE"/"IMG" element.'
);

$processor->next_token(); // Advance past the +MO.
$processor->next_token(); // Advance into the +IMG.

$this->assertSame(
'IMG',
$processor->get_tag(),
'Failed to find expected "IMG" tag from "<IMAGE>" source tag.'
);

$this->assertSame(
'html',
$processor->get_namespace(),
'Found the wrong namespace for the transformed "IMAGE"/"IMG" element.'
);
}
}

0 comments on commit 5ee5e0c

Please sign in to comment.