diff --git a/src/wp-includes/class-wp-html-attribute-token.php b/src/wp-includes/class-wp-html-attribute-token.php new file mode 100644 index 0000000000000..21147e30bfe1f --- /dev/null +++ b/src/wp-includes/class-wp-html-attribute-token.php @@ -0,0 +1,93 @@ +name = $name; + $this->value_starts_at = $value_start; + $this->value_length = $value_length; + $this->start = $start; + $this->end = $end; + $this->is_true = $is_true; + } +} + +endif; diff --git a/src/wp-includes/class-wp-html-span.php b/src/wp-includes/class-wp-html-span.php new file mode 100644 index 0000000000000..376e391dc1c44 --- /dev/null +++ b/src/wp-includes/class-wp-html-span.php @@ -0,0 +1,56 @@ +start = $start; + $this->end = $end; + } +} + +endif; diff --git a/src/wp-includes/class-wp-html-tag-processor.php b/src/wp-includes/class-wp-html-tag-processor.php new file mode 100644 index 0000000000000..24e67a3adc83f --- /dev/null +++ b/src/wp-includes/class-wp-html-tag-processor.php @@ -0,0 +1,2047 @@ + "c" not " c" + * @TODO: Skip over `/` in attributes area, split attribute names by `/` + * @TODO: Decode HTML references/entities in class names when matching. + * E.g. match having class `1<"2` needs to recognize `class="1<"2"`. + * @TODO: Decode character references in `get_attribute()` + * @TODO: Properly escape attribute value in `set_attribute()` + * @TODO: Add slow mode to escape character entities in CSS class names? + * (This requires a custom decoder since `html_entity_decode()` + * doesn't handle attribute character reference decoding rules. + * + * @package WordPress + * @subpackage HTML + * @since 6.2.0 + */ + +if ( ! class_exists( 'WP_HTML_Tag_Processor' ) ) : + +/** + * Processes an input HTML document by applying a specified set + * of patches to that input. Tokenizes HTML but does not fully + * parse the input document. + * + * ## Usage + * + * Use of this class requires three steps: + * + * 1. Create a new class instance with your input HTML document. + * 2. Find the tag(s) you are looking for. + * 3. Request changes to the attributes in those tag(s). + * + * Example: + * ```php + * $tags = new WP_HTML_Tag_Processor( $html ); + * if ( $tags->next_tag( [ 'tag_name' => 'option' ] ) ) { + * $tags->set_attribute( 'selected', true ); + * } + * ``` + * + * ### Finding tags + * + * The `next_tag()` function moves the internal cursor through + * your input HTML document until it finds a tag meeting any of + * the supplied restrictions in the optional query argument. If + * no argument is provided then it will find the next HTML tag, + * regardless of what kind it is. + * + * If you want to _find whatever the next tag is_: + * ```php + * $tags->next_tag(); + * ``` + * + * | Goal | Query | + * |-----------------------------------------------------------|----------------------------------------------------------------------------| + * | Find any tag. | `$tags->next_tag();` | + * | Find next image tag. | `$tags->next_tag( [ 'tag_name' => 'img' ] );` | + * | Find next tag containing the `fullwidth` CSS class. | `$tags->next_tag( [ 'class_name' => 'fullwidth' ] );` | + * | Find next image tag containing the `fullwidth` CSS class. | `$tags->next_tag( [ 'tag_name' => 'img', 'class_name' => 'fullwidth' ] );` | + * + * If a tag was found meeting your criteria then `next_tag()` + * will return `true` and you can proceed to modify it. If it + * returns `false`, however, it failed to find the tag and + * moved the cursor to the end of the file. + * + * Once the cursor reaches the end of the file the processor + * is done and if you want to reach an earlier tag you will + * need to recreate the processor and start over. The internal + * cursor can only proceed forward, never backing up. + * + * #### Custom queries + * + * Sometimes it's necessary to further inspect an HTML tag than + * the query syntax here permits. In these cases one may further + * inspect the search results using the read-only functions + * provided by the processor or external state or variables. + * + * Example: + * ```php + * // Paint up to the first five DIV or SPAN tags marked with the "jazzy" style. + * $remaining_count = 5; + * while ( $remaining_count > 0 && $tags->next_tag() ) { + * if ( + * ( 'DIV' === $tags->get_tag() || 'SPAN' === $tags->get_tag() ) && + * 'jazzy' === $tags->get_attribute( 'data-style' ) + * ) { + * $tags->add_class( 'theme-style-everest-jazz' ); + * $remaining_count--; + * } + * } + * ``` + * + * `get_attribute()` will return `null` if the attribute wasn't present + * on the tag when it was called. It may return `""` (the empty string) + * in cases where the attribute was present but its value was empty. + * For boolean attributes, those whose name is present but no value is + * given, it will return `true` (the only way to set `false` for an + * attribute is to remove it). + * + * ### Modifying HTML attributes for a found tag + * + * Once you've found the start of an opening tag you can modify + * any number of the attributes on that tag. You can set a new + * value for an attribute, remove the entire attribute, or do + * nothing and move on to the next opening tag. + * + * Example: + * ```php + * if ( $tags->next_tag( [ 'class' => 'wp-group-block' ] ) ) { + * $tags->set_attribute( 'title', 'This groups the contained content.' ); + * $tags->remove_attribute( 'data-test-id' ); + * } + * ``` + * + * If `set_attribute()` is called for an existing attribute it will + * overwrite the existing value. Similarly, calling `remove_attribute()` + * for a non-existing attribute has no effect on the document. Both + * of these methods are safe to call without knowing if a given attribute + * exists beforehand. + * + * ### Modifying CSS classes for a found tag + * + * The tag processor treats the `class` attribute as a special case. + * Because it's a common operation to add or remove CSS classes you + * can do so using this interface. + * + * As with attribute values, adding or removing CSS classes is a safe + * operation that doesn't require checking if the attribute or class + * exists before making changes. If removing the only class then the + * entire `class` attribute will be removed. + * + * Example: + * ```php + * // from `Yippee!` + * // to `Yippee!` + * $tags->add_class( 'is-active' ); + * + * // from `Yippee!` + * // to `Yippee!` + * $tags->add_class( 'is-active' ); + * + * // from `Yippee!` + * // to `Yippee!` + * $tags->add_class( 'is-active' ); + * + * // from `` + * // to ` + * $tags->remove_class( 'rugby' ); + * + * // from `` + * // to ` + * $tags->remove_class( 'rugby' ); + * + * // from `` + * // to ` + * $tags->remove_class( 'rugby' ); + * ``` + * + * ## Design limitations + * + * @TODO: Expand this section + * + * - No nesting: cannot match open and close tag. + * - Class names are not decoded if they contain character references. + * + * @since 6.2.0 + */ +class WP_HTML_Tag_Processor { + /** + * The maximum number of bookmarks allowed to exist at + * any given time. + * + * @see set_bookmark(); + * @since 6.2.0 + * @var int + */ + const MAX_BOOKMARKS = 10; + + /** + * Maximum number of times seek() can be called. + * Prevents accidental infinite loops. + * + * @see seek() + * @since 6.2.0 + * @var int + */ + const MAX_SEEK_OPS = 1000; + + /** + * The HTML document to parse. + * + * @since 6.2.0 + * @var string + */ + private $html; + + /** + * The last query passed to next_tag(). + * + * @since 6.2.0 + * @var array|null + */ + private $last_query; + + /** + * The tag name this processor currently scans for. + * + * @since 6.2.0 + * @var string|null + */ + private $sought_tag_name; + + /** + * The CSS class name this processor currently scans for. + * + * @since 6.2.0 + * @var string|null + */ + private $sought_class_name; + + /** + * The match offset this processor currently scans for. + * + * @since 6.2.0 + * @var int|null + */ + private $sought_match_offset; + + /** + * Whether to visit tag closers, e.g. , when walking an input document. + * + * @since 6.2.0 + * @var boolean + */ + private $stop_on_tag_closers; + + /** + * The updated HTML document. + * + * @since 6.2.0 + * @var string + */ + private $updated_html = ''; + + /** + * How many bytes from the original HTML document were already read. + * + * @since 6.2.0 + * @var int + */ + private $parsed_bytes = 0; + + /** + * How many bytes from the original HTML document were already treated + * with the requested replacements. + * + * @since 6.2.0 + * @var int + */ + private $updated_bytes = 0; + + /** + * Byte offset in input document where current tag name starts. + * + * Example: + * ``` + *
... + * 01234 + * - tag name starts at 1 + * ``` + * + * @since 6.2.0 + * @var ?int + */ + private $tag_name_starts_at; + + /** + * Byte length of current tag name. + * + * Example: + * ``` + *
... + * 01234 + * --- tag name length is 3 + * ``` + * + * @since 6.2.0 + * @var ?int + */ + private $tag_name_length; + + /** + * Byte offset in input document where current tag token ends. + * + * Example: + * ``` + *
... + * 0 1 | + * 01234567890123456 + * --- tag name ends at 14 + * ``` + * + * @since 6.2.0 + * @var ?int + */ + private $tag_ends_at; + + /** + * Whether the current tag is an opening tag, e.g.
, or a closing tag, e.g.
. + * + * @var boolean + */ + private $is_closing_tag; + + /** + * Lazily-built index of attributes found within an HTML tag, keyed by the attribute name. + * + * Example: + * + * // supposing the parser is working through this content + * // and stops after recognizing the `id` attribute + * //
+ * // ^ parsing will continue from this point + * $this->attributes = [ + * 'id' => new WP_HTML_Attribute_Match( 'id', null, 6, 17 ) + * ]; + * + * // when picking up parsing again, or when asking to find the + * // `class` attribute we will continue and add to this array + * $this->attributes = [ + * 'id' => new WP_HTML_Attribute_Match( 'id', null, 6, 17 ), + * 'class' => new WP_HTML_Attribute_Match( 'class', 'outline', 18, 32 ) + * ]; + * + * // Note that only the `class` attribute value is stored in the index. + * // That's because it is the only value used by this class at the moment. + * + * + * @since 6.2.0 + * @var WP_HTML_Attribute_Token[] + */ + private $attributes = array(); + + /** + * Which class names to add or remove from a tag. + * + * These are tracked separately from attribute updates because they are + * semantically distinct, whereas this interface exists for the common + * case of adding and removing class names while other attributes are + * generally modified as with DOM `setAttribute` calls. + * + * When modifying an HTML document these will eventually be collapsed + * into a single lexical update to replace the `class` attribute. + * + * Example: + * + * // Add the `wp-block-group` class, remove the `wp-group` class. + * $classname_updates = [ + * // Indexed by a comparable class name + * 'wp-block-group' => WP_HTML_Tag_Processor::ADD_CLASS, + * 'wp-group' => WP_HTML_Tag_Processor::REMOVE_CLASS + * ]; + * + * + * @since 6.2.0 + * @var bool[] + */ + private $classname_updates = array(); + + /** + * Tracks a semantic location in the original HTML which + * shifts with updates as they are applied to the document. + * + * @since 6.2.0 + * @var WP_HTML_Span[] + */ + private $bookmarks = array(); + + const ADD_CLASS = true; + const REMOVE_CLASS = false; + const SKIP_CLASS = null; + + /** + * Lexical replacements to apply to input HTML document. + * + * HTML modifications collapse into lexical replacements in order to + * provide an efficient mechanism to update documents lazily and in + * order to support a variety of semantic modifications without + * building a complicated parsing machinery. That is, it's up to + * the calling class to generate the lexical modification from the + * semantic change requested. + * + * Example: + * + * // Replace an attribute stored with a new value, indices + * // sourced from the lazily-parsed HTML recognizer. + * $start = $attributes['src']->start; + * $end = $attributes['src']->end; + * $modifications[] = new WP_HTML_Text_Replacement( $start, $end, get_the_post_thumbnail_url() ); + * + * // Correspondingly, something like this + * // will appear in the replacements array. + * $replacements = [ + * WP_HTML_Text_Replacement( 14, 28, 'https://my-site.my-domain/wp-content/uploads/2014/08/kittens.jpg' ) + * ]; + * + * + * @since 6.2.0 + * @var WP_HTML_Text_Replacement[] + */ + private $lexical_updates = array(); + + /** + * Tracks how many times we've performed a `seek()` + * so that we can prevent accidental infinite loops. + * + * @see seek + * @since 6.2.0 + * @var int + */ + private $seek_count = 0; + + /** + * Constructor. + * + * @since 6.2.0 + * + * @param string $html HTML to process. + */ + public function __construct( $html ) { + $this->html = $html; + } + + /** + * Finds the next tag matching the $query. + * + * @since 6.2.0 + * + * @param array|string $query { + * Which tag name to find, having which class, etc. + * + * @type string|null $tag_name Which tag to find, or `null` for "any tag." + * @type int|null $match_offset Find the Nth tag matching all search criteria. + * 0 for "first" tag, 2 for "third," etc. + * Defaults to first tag. + * @type string|null $class_name Tag must contain this whole class name to match. + * } + * @return boolean Whether a tag was matched. + */ + public function next_tag( $query = null ) { + $this->parse_query( $query ); + $already_found = 0; + + do { + if ( $this->parsed_bytes >= strlen( $this->html ) ) { + return false; + } + + /* + * Unfortunately we can't try to search for only the tag name we want because that might + * lead us to skip over other tags and lose track of our place. So we need to search for + * _every_ tag and then check after we find one if it's the one we are looking for. + */ + if ( false === $this->parse_next_tag() ) { + $this->parsed_bytes = strlen( $this->html ); + + return false; + } + + while ( $this->parse_next_attribute() ) { + continue; + } + + $tag_ends_at = strpos( $this->html, '>', $this->parsed_bytes ); + if ( false === $tag_ends_at ) { + return false; + } + $this->tag_ends_at = $tag_ends_at; + $this->parsed_bytes = $tag_ends_at; + + if ( $this->matches() ) { + ++$already_found; + } + + // Avoid copying the tag name string when possible. + $t = $this->html[ $this->tag_name_starts_at ]; + if ( 's' === $t || 'S' === $t || 't' === $t || 'T' === $t ) { + $tag_name = $this->get_tag(); + + if ( 'SCRIPT' === $tag_name && ! $this->skip_script_data() ) { + $this->parsed_bytes = strlen( $this->html ); + return false; + } elseif ( + ( 'TEXTAREA' === $tag_name || 'TITLE' === $tag_name ) && + ! $this->skip_rcdata( $tag_name ) + ) { + $this->parsed_bytes = strlen( $this->html ); + return false; + } + } + } while ( $already_found < $this->sought_match_offset ); + + return true; + } + + + /** + * Sets a bookmark in the HTML document. + * + * Bookmarks represent specific places or tokens in the HTML + * document, such as a tag opener or closer. When applying + * edits to a document, such as setting an attribute, the + * text offsets of that token may shift; the bookmark is + * kept updated with those shifts and remains stable unless + * the entire span of text in which the token sits is removed. + * + * Release bookmarks when they are no longer needed. + * + * Example: + * ``` + *

Surprising fact you may not know!

+ * ^ ^ + * \-|-- this `H2` opener bookmark tracks the token + * + *

Surprising fact you may no… + * ^ ^ + * \-|-- it shifts with edits + * ``` + * + * Bookmarks provide the ability to seek to a previously-scanned + * place in the HTML document. This avoids the need to re-scan + * the entire thing. + * + * Example: + * ``` + *
  • One
  • Two
  • Three
+ * ^^^^ + * want to note this last item + * + * $p = new WP_HTML_Tag_Processor( $html ); + * $in_list = false; + * while ( $p->next_tag( [ 'tag_closers' => $in_list ? 'visit' : 'skip' ] ) ) { + * if ( 'UL' === $p->get_tag() ) { + * if ( $p->is_tag_closer() ) { + * $in_list = false; + * $p->set_bookmark( 'resume' ); + * if ( $p->seek( 'last-li' ) ) { + * $p->add_class( 'last-li' ); + * } + * $p->seek( 'resume' ); + * $p->release_bookmark( 'last-li' ); + * $p->release_bookmark( 'resume' ); + * } else { + * $in_list = true; + * } + * } + * + * if ( 'LI' === $p->get_tag() ) { + * $p->set_bookmark( 'last-li' ); + * } + * } + * ``` + * + * Because bookmarks maintain their position they don't + * expose any internal offsets for the HTML document + * and can't be used with normal string functions. + * + * Because bookmarks allocate memory and require processing + * for every applied update they are limited and require + * a name. They should not be created inside a loop. + * + * Bookmarks are a powerful tool to enable complicated behavior; + * consider double-checking that you need this tool if you are + * reaching for it, as inappropriate use could lead to broken + * HTML structure or unwanted processing overhead. + * + * @param string $name Identifies this particular bookmark. + * @return false|void + * @throws Exception Throws on invalid bookmark name if WP_DEBUG set. + */ + public function set_bookmark( $name ) { + if ( null === $this->tag_name_starts_at ) { + return false; + } + + if ( ! array_key_exists( $name, $this->bookmarks ) && count( $this->bookmarks ) >= self::MAX_BOOKMARKS ) { + if ( defined( 'WP_DEBUG' ) && WP_DEBUG ) { + throw new Exception( "Tried to jump to a non-existent HTML bookmark {$name}." ); + } + return false; + } + + $this->bookmarks[ $name ] = new WP_HTML_Span( + $this->tag_name_starts_at - 1, + $this->tag_ends_at + ); + + return true; + } + + + /** + * Removes a bookmark if you no longer need to use it. + * + * Releasing a bookmark frees up the small performance + * overhead they require, mainly in the form of compute + * costs when modifying the document. + * + * @param string $name Name of the bookmark to remove. + * @return bool + */ + public function release_bookmark( $name ) { + if ( ! array_key_exists( $name, $this->bookmarks ) ) { + return false; + } + + unset( $this->bookmarks[ $name ] ); + + return true; + } + + + /** + * Skips the contents of the title and textarea tags until an appropriate + * tag closer is found. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state + * @param string $tag_name – the lowercase tag name which will close the RCDATA region. + * @since 6.2.0 + */ + private function skip_rcdata( $tag_name ) { + $html = $this->html; + $doc_length = strlen( $html ); + $tag_length = strlen( $tag_name ); + + $at = $this->parsed_bytes; + + while ( false !== $at && $at < $doc_length ) { + $at = strpos( $this->html, '= $doc_length ) { + $this->parsed_bytes = $doc_length; + return false; + } + + $at += 2; + + /* + * We have to find a case-insensitive match to the tag name. + * Note also that since tag names are limited to US-ASCII + * characters we can ignore any kind of Unicode normalizing + * forms when comparing. If we get a non-ASCII character it + * will never be a match. + */ + for ( $i = 0; $i < $tag_length; $i++ ) { + $tag_char = $tag_name[ $i ]; + $html_char = $html[ $at + $i ]; + + if ( $html_char !== $tag_char && strtoupper( $html_char ) !== $tag_char ) { + $at += $i; + continue 2; + } + } + + $at += $tag_length; + $this->parsed_bytes = $at; + + /* + * Ensure we terminate the tag name, otherwise we might, + * for example, accidentally match the sequence + * "" for "". + */ + $c = $html[ $at ]; + if ( ' ' !== $c && "\t" !== $c && "\r" !== $c && "\n" !== $c && '/' !== $c && '>' !== $c ) { + continue; + } + + while ( $this->parse_next_attribute() ) { + continue; + } + $at = $this->parsed_bytes; + if ( $at >= strlen( $this->html ) ) { + return false; + } + + if ( '>' === $html[ $at ] || '/' === $html[ $at ] ) { + ++$this->parsed_bytes; + return true; + } + } + + return false; + } + + /** + * Skips the contents of ' ), + ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers get_updated_html + * @covers get_attribute + */ + public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attribute_to_the_markup() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->set_attribute( 'test-attribute', 'test-value' ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + 'Updated HTML does not include attribute added via set_attribute()' + ); + $this->assertSame( + 'test-value', + $p->get_attribute( 'test-attribute' ), + 'get_attribute() (called after get_updated_html()) did not return attribute added via set_attribute()' + ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers get_updated_html + * @covers get_attribute + */ + public function test_get_attribute_returns_updated_values_before_they_are_updated() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->set_attribute( 'test-attribute', 'test-value' ); + $this->assertSame( + 'test-value', + $p->get_attribute( 'test-attribute' ), + 'get_attribute() (called before get_updated_html()) did not return attribute added via set_attribute()' + ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + 'Updated HTML does not include attribute added via set_attribute()' + ); + } + + public function test_get_attribute_returns_updated_values_before_they_are_updated_with_different_name_casing() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->set_attribute( 'test-ATTribute', 'test-value' ); + $this->assertSame( + 'test-value', + $p->get_attribute( 'test-attribute' ), + 'get_attribute() (called before get_updated_html()) did not return attribute added via set_attribute()' + ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + 'Updated HTML does not include attribute added via set_attribute()' + ); + } + + /** + * @ticket 56299 + * + * @covers add_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_get_attribute_reflects_added_class_names_before_they_are_updated() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->add_class( 'my-class' ); + $this->assertSame( + 'my-class', + $p->get_attribute( 'class' ), + 'get_attribute() (called before get_updated_html()) did not return class name added via add_class()' + ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + 'Updated HTML does not include class name added via add_class()' + ); + } + + /** + * @ticket 56299 + * + * @covers add_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_get_attribute_reflects_added_class_names_before_they_are_updated_and_retains_classes_from_previous_add_class_calls() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->add_class( 'my-class' ); + $this->assertSame( + 'my-class', + $p->get_attribute( 'class' ), + 'get_attribute() (called before get_updated_html()) did not return class name added via add_class()' + ); + $p->add_class( 'my-other-class' ); + $this->assertSame( + 'my-class my-other-class', + $p->get_attribute( 'class' ), + 'get_attribute() (called before get_updated_html()) did not return class names added via subsequent add_class() calls' + ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + 'Updated HTML does not include class names added via subsequent add_class() calls' + ); + } + + /** + * @ticket 56299 + * + * @covers remove_attribute + * @covers get_attribute + * @covers get_updated_html + */ + public function test_get_attribute_reflects_removed_attribute_before_it_is_updated() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->remove_attribute( 'id' ); + $this->assertNull( + $p->get_attribute( 'id' ), + 'get_attribute() (called before get_updated_html()) returned attribute that was removed by remove_attribute()' + ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + 'Updated HTML includes attribute that was removed by remove_attribute()' + ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers remove_attribute + * @covers get_attribute + * @covers get_updated_html + */ + public function test_get_attribute_reflects_adding_and_then_removing_an_attribute_before_it_is_updated() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->set_attribute( 'test-attribute', 'test-value' ); + $p->remove_attribute( 'test-attribute' ); + $this->assertNull( + $p->get_attribute( 'test-attribute' ), + 'get_attribute() (called before get_updated_html()) returned attribute that was added via set_attribute() and then removed by remove_attribute()' + ); + $this->assertSame( + self::HTML_SIMPLE, + $p->get_updated_html(), + 'Updated HTML includes attribute that was added via set_attribute() and then removed by remove_attribute()' + ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers remove_attribute + * @covers get_attribute + * @covers get_updated_html + */ + public function test_get_attribute_reflects_setting_and_then_removing_an_existing_attribute_before_it_is_updated() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->set_attribute( 'id', 'test-value' ); + $p->remove_attribute( 'id' ); + $this->assertNull( + $p->get_attribute( 'id' ), + 'get_attribute() (called before get_updated_html()) returned attribute that was overwritten by set_attribute() and then removed by remove_attribute()' + ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + 'Updated HTML includes attribute that was overwritten by set_attribute() and then removed by remove_attribute()' + ); + } + + /** + * @ticket 56299 + * + * @covers remove_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_get_attribute_reflects_removed_class_names_before_they_are_updated() { + $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); + $p->next_tag(); + $p->remove_class( 'with-border' ); + $this->assertSame( + 'main', + $p->get_attribute( 'class' ), + 'get_attribute() (called before get_updated_html()) returned the wrong attribute after calling remove_attribute()' + ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + 'Updated HTML includes wrong attribute after calling remove_attribute()' + ); + } + + /** + * @ticket 56299 + * + * @covers add_class + * @covers remove_class + * @covers get_attribute + * @covers get_updated_html + */ + public function test_get_attribute_reflects_setting_and_then_removing_a_class_name_before_it_is_updated() { + $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); + $p->next_tag(); + $p->add_class( 'foo-class' ); + $p->remove_class( 'foo-class' ); + $this->assertSame( + 'main with-border', + $p->get_attribute( 'class' ), + 'get_attribute() (called before get_updated_html()) returned class name that was added via add_class() and then removed by remove_class()' + ); + $this->assertSame( + self::HTML_WITH_CLASSES, + $p->get_updated_html(), + 'Updated HTML includes class that was added via add_class() and then removed by remove_class()' + ); + } + + /** + * @ticket 56299 + * + * @covers add_class + * @covers remove_class + * @covers get_attribute + * @covers get_updated_html + */ + public function test_get_attribute_reflects_duplicating_and_then_removing_an_existing_class_name_before_it_is_updated() { + $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); + $p->next_tag(); + $p->add_class( 'with-border' ); + $p->remove_class( 'with-border' ); + $this->assertSame( + 'main', + $p->get_attribute( 'class' ), + 'get_attribute() (called before get_updated_html()) returned class name that was duplicated via add_class() and then removed by remove_class()' + ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + 'Updated HTML includes class that was duplicated via add_class() and then removed by remove_class()' + ); + } + + /** + * According to HTML spec, only the first instance of an attribute counts. + * The other ones are ignored. + * + * @ticket 56299 + * + * @covers set_attribute + * @covers get_updated_html + */ + public function test_update_first_when_duplicated_attribute() { + $p = new WP_HTML_Tag_Processor( '
Text
' ); + $p->next_tag(); + $p->set_attribute( 'id', 'updated-id' ); + $this->assertSame( '
Text
', $p->get_updated_html() ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers get_updated_html + */ + public function test_set_attribute_with_an_existing_attribute_name_updates_its_value_in_the_markup() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->set_attribute( 'id', 'new-id' ); + $this->assertSame( '
Text
', $p->get_updated_html() ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers get_updated_html + */ + public function test_next_tag_and_set_attribute_in_a_loop_update_all_tags_in_the_markup() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + while ( $p->next_tag() ) { + $p->set_attribute( 'data-foo', 'bar' ); + } + + $this->assertSame( '
Text
', $p->get_updated_html() ); + } + + /** + * Removing an attribute that's listed many times, e.g. `
` should remove + * all its instances and output just `
`. + * + * Today, however, WP_HTML_Tag_Processor only removes the first such attribute. It seems like a corner case + * and introducing additional complexity to correctly handle this scenario doesn't seem to be worth it. + * Let's revisit if and when this becomes a problem. + * + * This test is in place to confirm this behavior, while incorrect, is well-defined. + * + * @ticket 56299 + * + * @covers remove_attribute + * @covers get_updated_html + */ + public function test_remove_first_when_duplicated_attribute() { + $p = new WP_HTML_Tag_Processor( '
Text
' ); + $p->next_tag(); + $p->remove_attribute( 'id' ); + $this->assertSame( '
Text
', $p->get_updated_html() ); + } + + /** + * @ticket 56299 + * + * @covers remove_attribute + * @covers get_updated_html + */ + public function test_remove_attribute_with_an_existing_attribute_name_removes_it_from_the_markup() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->remove_attribute( 'id' ); + $this->assertSame( '
Text
', $p->get_updated_html() ); + } + + /** + * @ticket 56299 + * + * @covers remove_attribute + * @covers get_updated_html + */ + public function test_remove_attribute_with_a_non_existing_attribute_name_does_not_change_the_markup() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->remove_attribute( 'no-such-attribute' ); + $this->assertSame( self::HTML_SIMPLE, $p->get_updated_html() ); + } + + /** + * @ticket 56299 + * + * @covers add_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_add_class_creates_a_class_attribute_when_there_is_none() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->add_class( 'foo-class' ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + 'Updated HTML does not include class name added via add_class()' + ); + $this->assertSame( + 'foo-class', + $p->get_attribute( 'class' ), + "get_attribute( 'class' ) did not return class name added via add_class()" + ); + } + + /** + * @ticket 56299 + * + * @covers add_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_calling_add_class_twice_creates_a_class_attribute_with_both_class_names_when_there_is_no_class_attribute() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->add_class( 'foo-class' ); + $p->add_class( 'bar-class' ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + 'Updated HTML does not include class names added via subsequent add_class() calls' + ); + $this->assertSame( + 'foo-class bar-class', + $p->get_attribute( 'class' ), + "get_attribute( 'class' ) did not return class names added via subsequent add_class() calls" + ); + } + + /** + * @ticket 56299 + * + * @covers remove_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_remove_class_does_not_change_the_markup_when_there_is_no_class_attribute() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->remove_class( 'foo-class' ); + $this->assertSame( + self::HTML_SIMPLE, + $p->get_updated_html(), + 'Updated HTML includes class name that was removed by remove_class()' + ); + $this->assertNull( + $p->get_attribute( 'class' ), + "get_attribute( 'class' ) did not return null for class name that was removed by remove_class()" + ); + } + + /** + * @ticket 56299 + * + * @covers add_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_add_class_appends_class_names_to_the_existing_class_attribute_when_one_already_exists() { + $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); + $p->next_tag(); + $p->add_class( 'foo-class' ); + $p->add_class( 'bar-class' ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + 'Updated HTML does not reflect class names added to existing class attribute via subsequent add_class() calls' + ); + $this->assertSame( + 'main with-border foo-class bar-class', + $p->get_attribute( 'class' ), + "get_attribute( 'class' ) does not reflect class names added to existing class attribute via subsequent add_class() calls" + ); + } + + /** + * @ticket 56299 + * + * @covers remove_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_remove_class_removes_a_single_class_from_the_class_attribute_when_one_exists() { + $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); + $p->next_tag(); + $p->remove_class( 'main' ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + 'Updated HTML does not reflect class name removed from existing class attribute via remove_class()' + ); + $this->assertSame( + ' with-border', + $p->get_attribute( 'class' ), + "get_attribute( 'class' ) does not reflect class name removed from existing class attribute via remove_class()" + ); + } + + /** + * @ticket 56299 + * + * @covers remove_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_calling_remove_class_with_all_listed_class_names_removes_the_existing_class_attribute_from_the_markup() { + $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); + $p->next_tag(); + $p->remove_class( 'main' ); + $p->remove_class( 'with-border' ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + 'Updated HTML does not reflect class attribute removed via subesequent remove_class() calls' + ); + $this->assertNull( + $p->get_attribute( 'class' ), + "get_attribute( 'class' ) did not return null for class attribute removed via subesequent remove_class() calls" + ); + } + + /** + * @ticket 56299 + * + * @covers add_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_add_class_does_not_add_duplicate_class_names() { + $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); + $p->next_tag(); + $p->add_class( 'with-border' ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + 'Updated HTML does not reflect deduplicated class name added via add_class()' + ); + $this->assertSame( + 'main with-border', + $p->get_attribute( 'class' ), + "get_attribute( 'class' ) does not reflect deduplicated class name added via add_class()" + ); + } + + /** + * @ticket 56299 + * + * @covers add_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_add_class_preserves_class_name_order_when_a_duplicate_class_name_is_added() { + $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); + $p->next_tag(); + $p->add_class( 'main' ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + 'Updated HTML does not reflect class name order after adding duplicated class name via add_class()' + ); + $this->assertSame( + 'main with-border', + $p->get_attribute( 'class' ), + "get_attribute( 'class' ) does not reflect class name order after adding duplicated class name added via add_class()" + ); + } + + /** + * @ticket 56299 + * + * @covers add_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_add_class_when_there_is_a_class_attribute_with_excessive_whitespaces() { + $p = new WP_HTML_Tag_Processor( + '
Text
' + ); + $p->next_tag(); + $p->add_class( 'foo-class' ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + 'Updated HTML does not reflect existing excessive whitespace after adding class name via add_class()' + ); + $this->assertSame( + ' main with-border foo-class', + $p->get_attribute( 'class' ), + "get_attribute( 'class' ) does not reflect existing excessive whitespace after adding class name via add_class()" + ); + } + + /** + * @ticket 56299 + * + * @covers remove_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_remove_class_preserves_whitespaces_when_there_is_a_class_attribute_with_excessive_whitespaces() { + $p = new WP_HTML_Tag_Processor( + '
Text
' + ); + $p->next_tag(); + $p->remove_class( 'with-border' ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + 'Updated HTML does not reflect existing excessive whitespace after removing class name via remove_class()' + ); + $this->assertSame( + ' main', + $p->get_attribute( 'class' ), + "get_attribute( 'class' ) does not reflect existing excessive whitespace after removing class name via removing_class()" + ); + } + + /** + * @ticket 56299 + * + * @covers remove_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_removing_all_classes_removes_the_existing_class_attribute_from_the_markup_even_when_excessive_whitespaces_are_present() { + $p = new WP_HTML_Tag_Processor( + '
Text
' + ); + $p->next_tag(); + $p->remove_class( 'main' ); + $p->remove_class( 'with-border' ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + 'Updated HTML does not reflect removed class attribute after removing all class names via remove_class()' + ); + $this->assertNull( + $p->get_attribute( 'class' ), + "get_attribute( 'class' ) did not return null after removing all class names via remove_class()" + ); + } + + /** + * When add_class( $different_value ) is called _after_ set_attribute( 'class', $value ), the + * final class name should be "$value $different_value". In other words, the `add_class` call + * should append its class to the one(s) set by `set_attribute`. When `add_class( $different_value )` + * is called _before_ `set_attribute( 'class', $value )`, however, the final class name should be + * "$value" instead, as any direct updates to the `class` attribute supersede any changes enqueued + * via the class builder methods. + * + * @ticket 56299 + * + * @covers add_class + * @covers set_attribute + * @covers get_updated_html + * @covers get_attribute + */ + public function test_set_attribute_takes_priority_over_add_class() { + $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); + $p->next_tag(); + $p->add_class( 'add_class' ); + $p->set_attribute( 'class', 'set_attribute' ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + "Calling get_updated_html after updating first tag's attributes did not return the expected HTML" + ); + $this->assertSame( + 'set_attribute', + $p->get_attribute( 'class' ), + "Calling get_attribute after updating first tag's attributes did not return the expected class name" + ); + + $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); + $p->next_tag(); + $p->set_attribute( 'class', 'set_attribute' ); + $p->add_class( 'add_class' ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + "Calling get_updated_html after updating first tag's attributes did not return the expected HTML" + ); + $this->assertSame( + 'set_attribute add_class', + $p->get_attribute( 'class' ), + "Calling get_attribute after updating first tag's attributes did not return the expected class name" + ); + } + + /** + * When add_class( $different_value ) is called _after_ set_attribute( 'class', $value ), the + * final class name should be "$value $different_value". In other words, the `add_class` call + * should append its class to the one(s) set by `set_attribute`. When `add_class( $different_value )` + * is called _before_ `set_attribute( 'class', $value )`, however, the final class name should be + * "$value" instead, as any direct updates to the `class` attribute supersede any changes enqueued + * via the class builder methods. + * + * This is still true if we read enqueued updates before calling `get_updated_html()`. + * + * @ticket 56299 + * + * @covers add_class + * @covers set_attribute + * @covers get_attribute + * @covers get_updated_html + */ + public function test_set_attribute_takes_priority_over_add_class_even_before_updating() { + $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); + $p->next_tag(); + $p->add_class( 'add_class' ); + $p->set_attribute( 'class', 'set_attribute' ); + $this->assertSame( + 'set_attribute', + $p->get_attribute( 'class' ), + "Calling get_attribute after updating first tag's attributes did not return the expected class name" + ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + "Calling get_updated_html after updating first tag's attributes did not return the expected HTML" + ); + + $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); + $p->next_tag(); + $p->set_attribute( 'class', 'set_attribute' ); + $p->add_class( 'add_class' ); + $this->assertSame( + 'set_attribute add_class', + $p->get_attribute( 'class' ), + "Calling get_attribute after updating first tag's attributes did not return the expected class name" + ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + "Calling get_updated_html after updating first tag's attributes did not return the expected HTML" + ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers add_class + * @covers get_attribute + * @covers get_updated_html + */ + public function test_add_class_overrides_boolean_class_attribute() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->set_attribute( 'class', true ); + $p->add_class( 'add_class' ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + "Updated HTML doesn't reflect class added via add_class that was originally set as boolean attribute" + ); + $this->assertSame( + 'add_class', + $p->get_attribute( 'class' ), + "get_attribute (called after get_updated_html()) doesn't reflect class added via add_class that was originally set as boolean attribute" + ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers add_class + * @covers get_attribute + * @covers get_updated_html + */ + public function test_add_class_overrides_boolean_class_attribute_even_before_updating() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->set_attribute( 'class', true ); + $p->add_class( 'add_class' ); + $this->assertSame( + 'add_class', + $p->get_attribute( 'class' ), + "get_attribute (called before get_updated_html()) doesn't reflect class added via add_class that was originally set as boolean attribute" + ); + $this->assertSame( + '
Text
', + $p->get_updated_html(), + "Updated HTML doesn't reflect class added via add_class that was originally set as boolean attribute" + ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers remove_attribute + * @covers add_class + * @covers remove_class + * @covers get_updated_html + */ + public function test_advanced_use_case() { + $input = << +
+
+
+ + + + + + + +
+
+
+HTML; + + $expected_output = << +
+
+
+ + + + + + + +
+
+
+HTML; + + $p = new WP_HTML_Tag_Processor( $input ); + $this->assertTrue( $p->next_tag( 'div' ), 'Querying an existing tag did not return true' ); + $p->set_attribute( 'data-details', '{ "key": "value" }' ); + $p->add_class( 'is-processed' ); + $this->assertTrue( + $p->next_tag( + array( + 'tag_name' => 'div', + 'class_name' => 'BtnGroup', + ) + ), + 'Querying an existing tag did not return true' + ); + $p->remove_class( 'BtnGroup' ); + $p->add_class( 'button-group' ); + $p->add_class( 'Another-Mixed-Case' ); + $this->assertTrue( + $p->next_tag( + array( + 'tag_name' => 'div', + 'class_name' => 'BtnGroup', + ) + ), + 'Querying an existing tag did not return true' + ); + $p->remove_class( 'BtnGroup' ); + $p->add_class( 'button-group' ); + $p->add_class( 'Another-Mixed-Case' ); + $this->assertTrue( + $p->next_tag( + array( + 'tag_name' => 'button', + 'class_name' => 'btn', + 'match_offset' => 3, + ) + ), + 'Querying an existing tag did not return true' + ); + $p->remove_attribute( 'class' ); + $this->assertFalse( $p->next_tag( 'non-existent' ), 'Querying a non-existing tag did not return false' ); + $p->set_attribute( 'class', 'test' ); + $this->assertSame( $expected_output, $p->get_updated_html(), 'Calling get_updated_html after updating the attributes did not return the expected HTML' ); + } + + /** + * @ticket 56299 + * + * @covers remove_attribute + * @covers set_attribute + * @covers get_updated_html + */ + public function test_correctly_parses_html_attributes_wrapped_in_single_quotation_marks() { + $p = new WP_HTML_Tag_Processor( + '
Text
' + ); + $p->next_tag( + array( + 'tag_name' => 'div', + 'id' => 'first', + ) + ); + $p->remove_attribute( 'id' ); + $p->next_tag( + array( + 'tag_name' => 'span', + 'id' => 'second', + ) + ); + $p->set_attribute( 'id', 'single-quote' ); + $this->assertSame( + '
Text
', + $p->get_updated_html() + ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers get_updated_html + */ + public function test_set_attribute_with_value_equals_to_true_adds_a_boolean_html_attribute_with_implicit_value() { + $p = new WP_HTML_Tag_Processor( + '
' + ); + $p->next_tag( 'input' ); + $p->set_attribute( 'checked', true ); + $this->assertSame( + '
', + $p->get_updated_html() + ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers get_updated_html + */ + public function test_setting_a_boolean_attribute_to_false_removes_it_from_the_markup() { + $p = new WP_HTML_Tag_Processor( + '
' + ); + $p->next_tag( 'input' ); + $p->set_attribute( 'checked', false ); + $this->assertSame( + '
', + $p->get_updated_html() + ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers get_updated_html + */ + public function test_setting_a_missing_attribute_to_false_does_not_change_the_markup() { + $html_input = '
'; + $p = new WP_HTML_Tag_Processor( $html_input ); + $p->next_tag( 'input' ); + $p->set_attribute( 'checked', false ); + $this->assertSame( $html_input, $p->get_updated_html() ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers get_updated_html + */ + public function test_setting_a_boolean_attribute_to_a_string_value_adds_explicit_value_to_the_markup() { + $p = new WP_HTML_Tag_Processor( + '
' + ); + $p->next_tag( 'input' ); + $p->set_attribute( 'checked', 'checked' ); + $this->assertSame( + '
', + $p->get_updated_html() + ); + } + + /** + * @ticket 56299 + * + * @covers get_tag + * @covers next_tag + */ + public function test_unclosed_script_tag_should_not_cause_an_infinite_loop() { + $p = new WP_HTML_Tag_Processor( '
', + ); + + $examples['Simple uppercase script tag'] = array( + '
', + ); + + $examples['Script with a comment opener inside should end at the next script tag closer (dash dash escaped state)'] = array( + '
-->', + ); + + $examples['Script with a comment opener and a script tag opener inside should end two script tag closer later (double escaped state)'] = array( + '
-->', + ); + + $examples['Double escaped script with a tricky opener'] = array( + '">
', + ); + + $examples['Double escaped script with a tricky closer'] = array( + '">
', + ); + + $examples['Double escaped, then escaped, then double escaped'] = array( + '
', + ); + + $examples['Script with a commented a script tag opener inside should at the next tag closer (dash dash escaped state)'] = array( + '
-->', + ); + + $examples['Script closer with another script tag in closer attributes'] = array( + '
', + ); + + $examples['Script closer with attributes'] = array( + '
', + ); + + $examples['Script opener with title closer inside'] = array( + '
', + ); + + $examples['Complex script with many parsing states'] = array( + '-->
-->', + ); + return $examples; + } + + /** + * @ticket 56299 + * + * @covers next_tag + * + * @dataProvider data_rcdata_state + */ + public function test_next_tag_ignores_the_contents_of_a_rcdata_tag( $rcdata_then_div, $rcdata_tag ) { + $p = new WP_HTML_Tag_Processor( $rcdata_then_div ); + $p->next_tag(); + $this->assertSame( strtoupper( $rcdata_tag ), $p->get_tag(), "The first found tag was not '$rcdata_tag'" ); + $p->next_tag(); + $this->assertSame( 'DIV', $p->get_tag(), "The second found tag was not 'div'" ); + } + + /** + * Data provider for test_ignores_contents_of_a_rcdata_tag(). + * + * @return array { + * @type array { + * @type string $rcdata_then_div The HTML snippet containing RCDATA and div tags. + * @type string $rcdata_tag The RCDATA tag. + * } + * } + */ + public function data_rcdata_state() { + $examples = array(); + $examples['Simple textarea'] = array( + '
', + 'TEXTAREA', + ); + + $examples['Simple title'] = array( + '<span class="d-none d-md-inline">Back to notifications</title</span>
', + 'TITLE', + ); + + $examples['Comment opener inside a textarea tag should be ignored'] = array( + '
-->', + 'TEXTAREA', + ); + + $examples['Textarea closer with another textarea tag in closer attributes'] = array( + '
', + 'TEXTAREA', + ); + + $examples['Textarea closer with attributes'] = array( + '
', + 'TEXTAREA', + ); + + $examples['Textarea opener with title closer inside'] = array( + '
', + 'TEXTAREA', + ); + return $examples; + } + + /** + * @ticket 56299 + * + * @covers next_tag + * @covers set_attribute + * @covers get_updated_html + */ + public function test_can_query_and_update_wrongly_nested_tags() { + $p = new WP_HTML_Tag_Processor( + '123

456789

' + ); + $p->next_tag( 'span' ); + $p->set_attribute( 'class', 'span-class' ); + $p->next_tag( 'p' ); + $p->set_attribute( 'class', 'p-class' ); + $this->assertSame( + '123

456789

', + $p->get_updated_html() + ); + } + + /** + * @ticket 56299 + * + * @covers next_tag + * @covers remove_attribute + * @covers get_updated_html + */ + public function test_removing_attributes_works_even_in_malformed_html() { + $p = new WP_HTML_Tag_Processor( self::HTML_MALFORMED ); + $p->next_tag( 'span' ); + $p->remove_attribute( 'Notifications<' ); + $this->assertSame( + '
Back to notifications
', + $p->get_updated_html() + ); + } + + /** + * @ticket 56299 + * + * @covers next_Tag + * @covers set_attribute + * @covers get_updated_html + */ + public function test_updating_attributes_works_even_in_malformed_html_1() { + $p = new WP_HTML_Tag_Processor( self::HTML_MALFORMED ); + $p->next_tag( 'span' ); + $p->set_attribute( 'id', 'first' ); + $p->next_tag( 'span' ); + $p->set_attribute( 'id', 'second' ); + $this->assertSame( + '
Back to notifications
', + $p->get_updated_html() + ); + } + + /** + * @ticket 56299 + * + * @covers next_tag + * @covers set_attribute + * @covers add_class + * @covers get_updated_html + * + * @dataProvider data_malformed_tag + */ + public function test_updating_attributes_works_even_in_malformed_html_2( $html_input, $html_expected ) { + $p = new WP_HTML_Tag_Processor( $html_input ); + $p->next_tag(); + $p->set_attribute( 'foo', 'bar' ); + $p->add_class( 'firstTag' ); + $p->next_tag(); + $p->add_class( 'secondTag' ); + $this->assertSame( + $html_expected, + $p->get_updated_html() + ); + } + + /** + * Data provider for test_updates_when_malformed_tag(). + * + * @return array { + * @type array { + * @type string $html_input The input HTML snippet. + * @type string $html_expected The expected HTML snippet after processing. + * } + * } + */ + public function data_malformed_tag() { + $null_byte = chr( 0 ); + $examples = array(); + $examples['Invalid entity inside attribute value'] = array( + 'test', + 'test', + ); + + $examples['HTML tag opening inside attribute value'] = array( + '
This <is> a <strong is="true">thing.
test', + '
This <is> a <strong is="true">thing.
test', + ); + + $examples['HTML tag brackets in attribute values and data markup'] = array( + '
This <is> a <strong is="true">thing.
test', + '
This <is> a <strong is="true">thing.
test', + ); + + $examples['Single and double quotes in attribute value'] = array( + '

test', + '

test', + ); + + $examples['Unquoted attribute values'] = array( + '


test', + '
test', + ); + + $examples['Double-quotes escaped in double-quote attribute value'] = array( + '
test', + '
test', + ); + + $examples['Unquoted attribute value'] = array( + '
test', + '
test', + ); + + $examples['Unquoted attribute value with tag-like value'] = array( + '
>test', + '
>test', + ); + + $examples['Unquoted attribute value with tag-like value followed by tag-like data'] = array( + '
>test', + '
>test', + ); + + $examples['1'] = array( + '
test', + '
test', + ); + + $examples['2'] = array( + '
test', + '
test', + ); + + $examples['4'] = array( + '
test', + '
test', + ); + + $examples['5'] = array( + '
code>test', + '
code>test', + ); + + $examples['6'] = array( + '
test', + '
test', + ); + + $examples['7'] = array( + '
test', + '
test', + ); + + $examples['8'] = array( + '
id="test">test', + '
id="test">test', + ); + + $examples['9'] = array( + '
test', + '
test', + ); + + $examples['10'] = array( + 'test', + 'test', + ); + + $examples['11'] = array( + 'The applicative operator <* works well in Haskell; is what?test', + 'The applicative operator <* works well in Haskell; is what?test', + ); + + $examples['12'] = array( + '<3 is a heart but is a tag.test', + '<3 is a heart but is a tag.test', + ); + + $examples['13'] = array( + 'test', + 'test', + ); + + $examples['14'] = array( + 'test', + 'test', + ); + + $examples['15'] = array( + ' a HTML Tag]]>test', + ' a HTML Tag]]>test', + ); + + $examples['16'] = array( + '
test', + '
test', + ); + + $examples['17'] = array( + '
test', + '
test', + ); + + $examples['18'] = array( + '
test', + '
test', + ); + + $examples['19'] = array( + '
test', + '
test', + ); + + $examples['20'] = array( + '
test', + '
test', + ); + + $examples['21'] = array( + '
test', + '
test', + ); + + $examples['22'] = array( + '
test', + '
test', + ); + + $examples['23'] = array( + '
test', + '
test', + ); + + $examples['24'] = array( + '
test', + '
test', + ); + + $examples['25'] = array( + '
test', + '
test', + ); + + $examples['Multiple unclosed tags treated as a single tag'] = array( + << + test +HTML + , + << + test +HTML + , + ); + + $examples['27'] = array( + '
test', + '
test', + ); + + $examples['28'] = array( + '
test', + '
test', + ); + + return $examples; + } +}