Skip to content

Commit

Permalink
HTML API: Add support for list elements.
Browse files Browse the repository at this point in the history
  • Loading branch information
dmsnell and sirreal committed Jan 9, 2024
1 parent 18cacc0 commit 38cb836
Show file tree
Hide file tree
Showing 9 changed files with 618 additions and 43 deletions.
9 changes: 9 additions & 0 deletions phpcs.xml.dist
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,15 @@
<exclude-pattern>/wp-tests-config-sample\.php</exclude-pattern>
</rule>

<!-- Exclude forbidding goto in the HTML Processor, which mimics algorithms that are written
this way in the HTML specification, and these particular algorithms are complex and
highly imperative. Avoiding the goto introduces a number of risks that could make it
more difficult to maintain the relationship to the standard, lead to subtle differences
in the parsing, and distance the code from its standard. -->
<rule ref="Generic.PHP.DiscourageGoto.Found">
<exclude-pattern>/wp-includes/html-api/class-wp-html-processor\.php</exclude-pattern>
</rule>

<!-- Exclude sample config from modernization to prevent breaking CI workflows based on WP-CLI scaffold.
See: https://core.trac.wordpress.org/ticket/48082#comment:16 -->
<rule ref="Modernize.FunctionCalls.Dirname.FileConstant">
Expand Down
32 changes: 24 additions & 8 deletions src/wp-includes/html-api/class-wp-html-open-elements.php
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ public function has_element_in_specific_scope( $tag_name, $termination_list ) {
}

if ( in_array( $node->node_name, $termination_list, true ) ) {
return true;
return false;
}
}

Expand Down Expand Up @@ -166,18 +166,22 @@ public function has_element_in_scope( $tag_name ) {
* Returns whether a particular element is in list item scope.
*
* @since 6.4.0
* @since 6.5.0 Implemented: no longer throws on every invocation.
*
* @see https://html.spec.whatwg.org/#has-an-element-in-list-item-scope
*
* @throws WP_HTML_Unsupported_Exception Always until this function is implemented.
*
* @param string $tag_name Name of tag to check.
* @return bool Whether given element is in scope.
*/
public function has_element_in_list_item_scope( $tag_name ) {
throw new WP_HTML_Unsupported_Exception( 'Cannot process elements depending on list item scope.' );

return false; // The linter requires this unreachable code until the function is implemented and can return.
return $this->has_element_in_specific_scope(
$tag_name,
array(
// There are more elements that belong here which aren't currently supported.
'OL',
'UL',
)
);
}

/**
Expand Down Expand Up @@ -375,10 +379,22 @@ public function walk_down() {
* see WP_HTML_Open_Elements::walk_down().
*
* @since 6.4.0
* @since 6.5.0 Accepts $above_this_node to start traversal above a given node, if it exists.
*
* @param ?WP_HTML_Token $above_this_node Start traversing above this node, if provided and if the node exists.
*/
public function walk_up() {
public function walk_up( $above_this_node = null ) {
$has_found_node = null === $above_this_node;

for ( $i = count( $this->stack ) - 1; $i >= 0; $i-- ) {
yield $this->stack[ $i ];
$node = $this->stack[ $i ];

if ( ! $has_found_node ) {
$has_found_node = $node === $above_this_node;
continue;
}

yield $node;
}
}

Expand Down
113 changes: 113 additions & 0 deletions src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -648,10 +648,12 @@ private function step_in_body() {
case '+MAIN':
case '+MENU':
case '+NAV':
case '+OL':
case '+P':
case '+SEARCH':
case '+SECTION':
case '+SUMMARY':
case '+UL':
if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
$this->close_a_p_element();
}
Expand Down Expand Up @@ -685,9 +687,11 @@ private function step_in_body() {
case '-MAIN':
case '-MENU':
case '-NAV':
case '-OL':
case '-SEARCH':
case '-SECTION':
case '-SUMMARY':
case '-UL':
if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $tag_name ) ) {
// @todo Report parse error.
// Ignore the token.
Expand Down Expand Up @@ -755,6 +759,109 @@ private function step_in_body() {
$this->state->stack_of_open_elements->pop_until( '(internal: H1 through H6 - do not use)' );
return true;

/*
* > A start tag whose tag name is "li"
* > A start tag whose tag name is one of: "dd", "dt"
*/
case '+DD':
case '+DT':
case '+LI':
$this->state->frameset_ok = false;
$node = $this->state->stack_of_open_elements->current_node();
$is_li = 'LI' === $tag_name;

in_body_list_loop:
/*
* The logic for LI and DT/DD is the same except for one point: LI elements _only_
* close other LI elements, but a DT or DD element closes _any_ open DT or DD element.
*/
if ( $is_li ? 'LI' === $node->node_name : ( 'DD' === $node->node_name || 'DT' === $node->node_name ) ) {
$node_name = $is_li ? 'LI' : $node->node_name;
$this->generate_implied_end_tags( $node_name );
if ( $node_name !== $this->state->stack_of_open_elements->current_node()->node_name ) {
// @todo Indicate a parse error once it's possible. This error does not impact the logic here.
}

$this->state->stack_of_open_elements->pop_until( $node_name );
goto in_body_list_done;
}

if (
'ADDRESS' !== $node->node_name &&
'DIV' !== $node->node_name &&
'P' !== $node->node_name &&
$this->is_special( $node->node_name )
) {
/*
* > If node is in the special category, but is not an address, div,
* > or p element, then jump to the step labeled done below.
*/
goto in_body_list_done;
} else {
/*
* > Otherwise, set node to the previous entry in the stack of open elements
* > and return to the step labeled loop.
*/
foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $item ) {
$node = $item;
break;
}
goto in_body_list_loop;
}

in_body_list_done:
if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
$this->close_a_p_element();
}

$this->insert_html_element( $this->state->current_token );
return true;

/*
* > An end tag whose tag name is "li"
* > An end tag whose tag name is one of: "dd", "dt"
*/
case '-DD':
case '-DT':
case '-LI':
if (
/*
* An end tag whose tag name is "li":
* If the stack of open elements does not have an li element in list item scope,
* then this is a parse error; ignore the token.
*/
(
'LI' === $tag_name &&
! $this->state->stack_of_open_elements->has_element_in_list_item_scope( 'LI' )
) ||
/*
* An end tag whose tag name is one of: "dd", "dt":
* If the stack of open elements does not have an element in scope that is an
* HTML element with the same tag name as that of the token, then this is a
* parse error; ignore the token.
*/
(
'LI' !== $tag_name &&
! $this->state->stack_of_open_elements->has_element_in_scope( $tag_name )
)
) {
/*
* This is a parse error, ignore the token.
*
* @todo Indicate a parse error once it's possible.
*/
return $this->step();
}

$this->generate_implied_end_tags( $tag_name );

if ( $tag_name !== $this->state->stack_of_open_elements->current_node()->node_name ) {
// @todo Indicate a parse error once it's possible. This error does not impact the logic here.
}

$this->state->stack_of_open_elements->pop_until( $tag_name );
return true;

/*
* > An end tag whose tag name is "p"
*/
Expand Down Expand Up @@ -1223,6 +1330,9 @@ private function close_a_p_element() {
*/
private function generate_implied_end_tags( $except_for_this_element = null ) {
$elements_with_implied_end_tags = array(
'DD',
'DT',
'LI',
'P',
);

Expand All @@ -1248,6 +1358,9 @@ private function generate_implied_end_tags( $except_for_this_element = null ) {
*/
private function generate_implied_end_tags_thoroughly() {
$elements_with_implied_end_tags = array(
'DD',
'DT',
'LI',
'P',
);

Expand Down
5 changes: 0 additions & 5 deletions tests/phpunit/tests/html-api/wpHtmlProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,6 @@ public function data_unsupported_special_in_body_tags() {
'CAPTION' => array( 'CAPTION' ),
'COL' => array( 'COL' ),
'COLGROUP' => array( 'COLGROUP' ),
'DD' => array( 'DD' ),
'DT' => array( 'DT' ),
'EMBED' => array( 'EMBED' ),
'FORM' => array( 'FORM' ),
'FRAME' => array( 'FRAME' ),
Expand All @@ -180,7 +178,6 @@ public function data_unsupported_special_in_body_tags() {
'IFRAME' => array( 'IFRAME' ),
'INPUT' => array( 'INPUT' ),
'KEYGEN' => array( 'KEYGEN' ),
'LI' => array( 'LI' ),
'LINK' => array( 'LINK' ),
'LISTING' => array( 'LISTING' ),
'MARQUEE' => array( 'MARQUEE' ),
Expand All @@ -191,7 +188,6 @@ public function data_unsupported_special_in_body_tags() {
'NOFRAMES' => array( 'NOFRAMES' ),
'NOSCRIPT' => array( 'NOSCRIPT' ),
'OBJECT' => array( 'OBJECT' ),
'OL' => array( 'OL' ),
'OPTGROUP' => array( 'OPTGROUP' ),
'OPTION' => array( 'OPTION' ),
'PARAM' => array( 'PARAM' ),
Expand All @@ -218,7 +214,6 @@ public function data_unsupported_special_in_body_tags() {
'TITLE' => array( 'TITLE' ),
'TR' => array( 'TR' ),
'TRACK' => array( 'TRACK' ),
'UL' => array( 'UL' ),
'WBR' => array( 'WBR' ),
'XMP' => array( 'XMP' ),
);
Expand Down
40 changes: 23 additions & 17 deletions tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public function data_single_tag_of_supported_elements() {
$supported_elements = array(
'A',
'ABBR',
'ACRONYM', // Neutralized
'ACRONYM', // Neutralized.
'ADDRESS',
'ARTICLE',
'ASIDE',
Expand All @@ -47,13 +47,14 @@ public function data_single_tag_of_supported_elements() {
'BDI',
'BDO',
'BIG',
'BLINK', // Deprecated
'BLINK', // Deprecated.
'BUTTON',
'CANVAS',
'CENTER', // Neutralized
'CENTER', // Neutralized.
'CITE',
'CODE',
'DATA',
'DD',
'DATALIST',
'DFN',
'DEL',
Expand All @@ -62,6 +63,7 @@ public function data_single_tag_of_supported_elements() {
'DIR',
'DIV',
'DL',
'DT',
'EM',
'FIELDSET',
'FIGCAPTION',
Expand All @@ -79,6 +81,7 @@ public function data_single_tag_of_supported_elements() {
'I',
'IMG',
'INS',
'LI',
'ISINDEX', // Deprecated
'KBD',
'LABEL',
Expand All @@ -91,6 +94,7 @@ public function data_single_tag_of_supported_elements() {
'MULTICOL', // Deprecated
'NAV',
'NEXTID', // Deprecated
'OL',
'OUTPUT',
'P',
'PICTURE',
Expand All @@ -112,6 +116,7 @@ public function data_single_tag_of_supported_elements() {
'TIME',
'TT',
'U',
'UL',
'VAR',
'VIDEO',
);
Expand Down Expand Up @@ -156,7 +161,7 @@ public function test_fails_when_encountering_unsupported_tag( $html ) {
*/
public function data_unsupported_elements() {
$unsupported_elements = array(
'APPLET', // Deprecated
'APPLET', // Deprecated.
'AREA',
'BASE',
'BGSOUND', // Deprecated; self-closing if self-closing flag provided, otherwise normal.
Expand All @@ -165,8 +170,6 @@ public function data_unsupported_elements() {
'CAPTION',
'COL',
'COLGROUP',
'DD',
'DT',
'EMBED',
'FORM',
'FRAME',
Expand All @@ -176,27 +179,25 @@ public function data_unsupported_elements() {
'HTML',
'IFRAME',
'INPUT',
'KEYGEN', // Deprecated; void
'LI',
'KEYGEN', // Deprecated; void.
'LINK',
'LISTING', // Deprecated, use PRE instead.
'MARQUEE', // Deprecated
'MARQUEE', // Deprecated.
'MATH',
'META',
'NOBR', // Neutralized
'NOEMBED', // Neutralized
'NOFRAMES', // Neutralized
'NOBR', // Neutralized.
'NOEMBED', // Neutralized.
'NOFRAMES', // Neutralized.
'NOSCRIPT',
'OBJECT',
'OL',
'OPTGROUP',
'OPTION',
'PLAINTEXT', // Neutralized
'PLAINTEXT', // Neutralized.
'PRE',
'RB', // Neutralized
'RB', // Neutralized.
'RP',
'RT',
'RTC', // Neutralized
'RTC', // Neutralized.
'SCRIPT',
'SELECT',
'SOURCE',
Expand All @@ -213,7 +214,6 @@ public function data_unsupported_elements() {
'TITLE',
'TR',
'TRACK',
'UL',
'WBR',
'XMP', // Deprecated, use PRE instead.
);
Expand Down Expand Up @@ -348,6 +348,12 @@ public function data_html_target_with_breadcrumbs() {
),
'MAIN inside MAIN inside SPAN' => array( '<span><main><main target>', array( 'HTML', 'BODY', 'SPAN', 'MAIN', 'MAIN' ), 1 ),
'MAIN next to unclosed P' => array( '<p><main target>', array( 'HTML', 'BODY', 'MAIN' ), 1 ),
'LI after unclosed LI' => array( '<li>one<li>two<li target>three', array( 'HTML', 'BODY', 'LI' ), 3 ),
'LI in UL in LI' => array( '<ul><li>one<ul><li target>two', array( 'HTML', 'BODY', 'UL', 'LI', 'UL', 'LI' ), 1 ),
'DD and DT mutually close, LI self-closes (dt 2)' => array( '<dd><dd><dt><dt target><dd><li><li>', array( 'HTML', 'BODY', 'DT' ), 2 ),
'DD and DT mutually close, LI self-closes (dd 3)' => array( '<dd><dd><dt><dt><dd target><li><li>', array( 'HTML', 'BODY', 'DD' ), 3 ),
'DD and DT mutually close, LI self-closes (li 1)' => array( '<dd><dd><dt><dt><dd><li target><li>', array( 'HTML', 'BODY', 'DD', 'LI' ), 1 ),
'DD and DT mutually close, LI self-closes (li 2)' => array( '<dd><dd><dt><dt><dd><li><li target>', array( 'HTML', 'BODY', 'DD', 'LI' ), 2 ),

// H1 - H6 close out _any_ H1 - H6 when encountering _any_ of H1 - H6, making this section surprising.
'EM inside H3 after unclosed P' => array( '<p><h3><em target>Important Message</em></h3>', array( 'HTML', 'BODY', 'H3', 'EM' ), 1 ),
Expand Down
Loading

0 comments on commit 38cb836

Please sign in to comment.