diff --git a/.travis.yml b/.travis.yml index 66fc489ed79..30bd83eaa27 100644 --- a/.travis.yml +++ b/.travis.yml @@ -43,9 +43,9 @@ cache: matrix: include: - php: "5.3" - env: WP_VERSION=latest DEV_LIB_SKIP=phpcs + env: WP_VERSION=latest DEV_LIB_SKIP=composer,phpcs - php: "5.3" - env: WP_VERSION=4.7 DEV_LIB_SKIP=phpcs + env: WP_VERSION=4.7 DEV_LIB_SKIP=composer,phpcs - php: "5.4" env: WP_VERSION=latest - php: "5.4" diff --git a/dev-lib b/dev-lib index 4ac4bd20b5a..223f238eb74 160000 --- a/dev-lib +++ b/dev-lib @@ -1 +1 @@ -Subproject commit 4ac4bd20b5a4b3953eb81a8d2536baef2bf60dcb +Subproject commit 223f238eb74dd2c65892b8fb6c1c06ae04e95234 diff --git a/includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php b/includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php index 18f4412882e..ee558c2864e 100644 --- a/includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php +++ b/includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php @@ -699,6 +699,19 @@ private function validate_attr_spec_list_for_node( $node, $attr_spec_list ) { } } + /* + * If given attribute's value is a URL with a host, the host must + * be valid + */ + if ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ] ) ) { + $result = $this->check_attr_spec_rule_valid_url( $node, $attr_name, $attr_spec_rule ); + if ( AMP_Rule_Spec::PASS === $result ) { + $score++; + } elseif ( AMP_Rule_Spec::FAIL === $result ) { + return 0; + } + } + /* * If the given attribute's value is *not* a relative path, and the rule's * value is `false`, then pass. @@ -877,6 +890,9 @@ private function delegated_sanitize_disallowed_attribute_values_in_node( $node, } elseif ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ][ AMP_Rule_Spec::ALLOWED_PROTOCOL ] ) && AMP_Rule_Spec::FAIL === $this->check_attr_spec_rule_allowed_protocol( $node, $attr_name, $attr_spec_rule ) ) { $should_remove_node = true; + } elseif ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ] ) && + AMP_Rule_Spec::FAIL === $this->check_attr_spec_rule_valid_url( $node, $attr_name, $attr_spec_rule ) ) { + $should_remove_node = true; } elseif ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ][ AMP_Rule_Spec::ALLOW_RELATIVE ] ) && AMP_Rule_Spec::FAIL === $this->check_attr_spec_rule_disallowed_relative( $node, $attr_name, $attr_spec_rule ) ) { $should_remove_node = true; @@ -1122,6 +1138,47 @@ private function check_attr_spec_rule_value_regex_casei( $node, $attr_name, $att return AMP_Rule_Spec::NOT_APPLICABLE; } + /** + * Check if attribute has a valid host value + * + * @since 0.7 + * + * @param DOMElement $node Node. + * @param string $attr_name Attribute name. + * @param array[]|string[] $attr_spec_rule Attribute spec rule. + * + * @return string: + * - AMP_Rule_Spec::PASS - $attr_name has a value that matches the rule. + * - AMP_Rule_Spec::FAIL - $attr_name has a value that does *not* match rule. + * - AMP_Rule_Spec::NOT_APPLICABLE - $attr_name does not exist or there + * is no rule for this attribute. + */ + private function check_attr_spec_rule_valid_url( $node, $attr_name, $attr_spec_rule ) { + if ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ] ) ) { + if ( $node->hasAttribute( $attr_name ) ) { + $urls_to_test = preg_split( '/\s*,\s*/', $node->getAttribute( $attr_name ) ); + foreach ( $urls_to_test as $url ) { + $url = urldecode( $url ); + // Check if the host contains invalid chars. + $url_host = wp_parse_url( $url, PHP_URL_HOST ); + if ( $url_host && preg_match( '/[!"#$%&\'()*+,\/:;<=>?@[\]^`{|}~\s]/i', $url_host ) ) { + return AMP_Rule_Spec::FAIL; + } + + // Check if the protocol contains invalid chars. + $dots_pos = strpos( $url, ':' ); + if ( false !== $dots_pos && preg_match( '/[!"#$%&\'()*+,\/:;<=>?@[\]^`{|}~\s]/i', substr( $url, 0, $dots_pos ) ) ) { + return AMP_Rule_Spec::FAIL; + } + } + + return AMP_Rule_Spec::PASS; + } + } + + return AMP_Rule_Spec::NOT_APPLICABLE; + } + /** * Check if attribute has a protocol value rule determine if it matches. * @@ -1138,9 +1195,7 @@ private function check_attr_spec_rule_value_regex_casei( $node, $attr_name, $att private function check_attr_spec_rule_allowed_protocol( $node, $attr_name, $attr_spec_rule ) { if ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ][ AMP_Rule_Spec::ALLOWED_PROTOCOL ] ) ) { if ( $node->hasAttribute( $attr_name ) ) { - $attr_value = $node->getAttribute( $attr_name ); - $attr_value = preg_replace( '/\s*,\s*/', ',', $attr_value ); - $urls_to_test = explode( ',', $attr_value ); + $urls_to_test = preg_split( '/\s*,\s*/', $node->getAttribute( $attr_name ) ); foreach ( $urls_to_test as $url ) { /* * This seems to be an acceptable check since the AMP validator @@ -1157,9 +1212,7 @@ private function check_attr_spec_rule_allowed_protocol( $node, $attr_name, $attr } elseif ( isset( $attr_spec_rule[ AMP_Rule_Spec::ALTERNATIVE_NAMES ] ) ) { foreach ( $attr_spec_rule[ AMP_Rule_Spec::ALTERNATIVE_NAMES ] as $alternative_name ) { if ( $node->hasAttribute( $alternative_name ) ) { - $attr_value = $node->getAttribute( $alternative_name ); - $attr_value = preg_replace( '/\s*,\s*/', ',', $attr_value ); - $urls_to_test = explode( ',', $attr_value ); + $urls_to_test = preg_split( '/\s*,\s*/', $node->getAttribute( $alternative_name ) ); foreach ( $urls_to_test as $url ) { /* * This seems to be an acceptable check since the AMP validator @@ -1196,9 +1249,7 @@ private function check_attr_spec_rule_allowed_protocol( $node, $attr_name, $attr private function check_attr_spec_rule_disallowed_relative( $node, $attr_name, $attr_spec_rule ) { if ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ][ AMP_Rule_Spec::ALLOW_RELATIVE ] ) && ! ( $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ][ AMP_Rule_Spec::ALLOW_RELATIVE ] ) ) { if ( $node->hasAttribute( $attr_name ) ) { - $attr_value = $node->getAttribute( $attr_name ); - $attr_value = preg_replace( '/\s*,\s*/', ',', $attr_value ); - $urls_to_test = explode( ',', $attr_value ); + $urls_to_test = preg_split( '/\s*,\s*/', $node->getAttribute( $attr_name ) ); foreach ( $urls_to_test as $url ) { $parsed_url = AMP_WP_Utils::parse_url( $url ); @@ -1217,9 +1268,7 @@ private function check_attr_spec_rule_disallowed_relative( $node, $attr_name, $a } elseif ( isset( $attr_spec_rule[ AMP_Rule_Spec::ALTERNATIVE_NAMES ] ) ) { foreach ( $attr_spec_rule[ AMP_Rule_Spec::ALTERNATIVE_NAMES ] as $alternative_name ) { if ( $node->hasAttribute( $alternative_name ) ) { - $attr_value = $node->getAttribute( $alternative_name ); - $attr_value = preg_replace( '/\s*,\s*/', ',', $attr_value ); - $urls_to_test = explode( ',', $attr_value ); + $urls_to_test = preg_split( '/\s*,\s*/', $node->getAttribute( $alternative_name ) ); foreach ( $urls_to_test as $url ) { $parsed_url = AMP_WP_Utils::parse_url( $url ); if ( empty( $parsed_url['scheme'] ) ) { diff --git a/readme.md b/readme.md index 8cdfb6070e5..f5693b78884 100644 --- a/readme.md +++ b/readme.md @@ -64,6 +64,7 @@ Follow along with or [contribute](https://github.com/Automattic/amp-wp/blob/deve - Creation of AMP-related notifications, on entering invalid content in the 'classic' editor. See [#912](https://github.com/Automattic/amp-wp/pull/912/). Props kienstra, westonruter, ThierryA. - Add output buffering, ensuring the entire page is valid AMP. See [#929](https://github.com/Automattic/amp-wp/pull/929), [#857](https://github.com/Automattic/amp-wp/pull/857). Props westonruter, ThierryA. - Update the generated sanitizer file to the AMP spec, and simplify the file that generates it. See [#929](https://github.com/Automattic/amp-wp/pull/929). Props westonruter. +- Add validation of host names in URLs. See [#983](https://github.com/Automattic/amp-wp/pull/983). Props rubengonzalezmrf. See [0.7 milestone](https://github.com/Automattic/amp-wp/milestone/6?closed=1). diff --git a/readme.txt b/readme.txt index fe723ec86f8..21a2be07b52 100644 --- a/readme.txt +++ b/readme.txt @@ -47,6 +47,7 @@ Follow along with or [contribute](https://github.com/Automattic/amp-wp/blob/deve - Creation of AMP-related notifications, on entering invalid content in the 'classic' editor. See [#912](https://github.com/Automattic/amp-wp/pull/912/). Props kienstra, westonruter, ThierryA. - Add output buffering, ensuring the entire page is valid AMP. See [#929](https://github.com/Automattic/amp-wp/pull/929), [#857](https://github.com/Automattic/amp-wp/pull/857). Props westonruter, ThierryA. - Update the generated sanitizer file to the AMP spec, and simplify the file that generates it. See [#929](https://github.com/Automattic/amp-wp/pull/929). Props westonruter. +- Add validation of host names in URLs. See [#983](https://github.com/Automattic/amp-wp/pull/983). Props rubengonzalezmrf. See [0.7 milestone](https://github.com/Automattic/amp-wp/milestone/6?closed=1). diff --git a/tests/amp-tag-and-attribute-sanitizer-private-methods-tests.php b/tests/amp-tag-and-attribute-sanitizer-private-methods-tests.php index a4746c4b743..a2115bc7e36 100644 --- a/tests/amp-tag-and-attribute-sanitizer-private-methods-tests.php +++ b/tests/amp-tag-and-attribute-sanitizer-private-methods-tests.php @@ -996,7 +996,7 @@ public function get_validate_attr_spec_list_for_node_data() { ), ), ), - 1, + 2, ), 'attributes_allow_relative_false_fail' => array( array( @@ -1024,7 +1024,7 @@ public function get_validate_attr_spec_list_for_node_data() { ), ), ), - 1, + 2, ), 'attributes_allow_empty_false_fail' => array( array( diff --git a/tests/test-tag-and-attribute-sanitizer.php b/tests/test-tag-and-attribute-sanitizer.php index f579f8e8426..c99416d81b1 100644 --- a/tests/test-tag-and-attribute-sanitizer.php +++ b/tests/test-tag-and-attribute-sanitizer.php @@ -680,6 +680,23 @@ public function get_body_data() { 'value', ), + 'a_with_wrong_host' => array( + 'value', + 'value', + ), + 'a_with_encoded_host' => array( + 'value', + null, + ), + 'a_with_wrong_schemeless_host' => array( + 'value', + 'value', + ), + 'a_with_mail_host' => array( + 'value', + 'value', + ), + // font is removed so we should check that other elements are checked as well. 'font_with_other_bad_elements' => array( 'HeadlineSpan',