diff --git a/.travis.yml b/.travis.yml
index 66fc489ed79..30bd83eaa27 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -43,9 +43,9 @@ cache:
matrix:
include:
- php: "5.3"
- env: WP_VERSION=latest DEV_LIB_SKIP=phpcs
+ env: WP_VERSION=latest DEV_LIB_SKIP=composer,phpcs
- php: "5.3"
- env: WP_VERSION=4.7 DEV_LIB_SKIP=phpcs
+ env: WP_VERSION=4.7 DEV_LIB_SKIP=composer,phpcs
- php: "5.4"
env: WP_VERSION=latest
- php: "5.4"
diff --git a/dev-lib b/dev-lib
index 4ac4bd20b5a..223f238eb74 160000
--- a/dev-lib
+++ b/dev-lib
@@ -1 +1 @@
-Subproject commit 4ac4bd20b5a4b3953eb81a8d2536baef2bf60dcb
+Subproject commit 223f238eb74dd2c65892b8fb6c1c06ae04e95234
diff --git a/includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php b/includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php
index 18f4412882e..ee558c2864e 100644
--- a/includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php
+++ b/includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php
@@ -699,6 +699,19 @@ private function validate_attr_spec_list_for_node( $node, $attr_spec_list ) {
}
}
+ /*
+ * If given attribute's value is a URL with a host, the host must
+ * be valid
+ */
+ if ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ] ) ) {
+ $result = $this->check_attr_spec_rule_valid_url( $node, $attr_name, $attr_spec_rule );
+ if ( AMP_Rule_Spec::PASS === $result ) {
+ $score++;
+ } elseif ( AMP_Rule_Spec::FAIL === $result ) {
+ return 0;
+ }
+ }
+
/*
* If the given attribute's value is *not* a relative path, and the rule's
* value is `false`, then pass.
@@ -877,6 +890,9 @@ private function delegated_sanitize_disallowed_attribute_values_in_node( $node,
} elseif ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ][ AMP_Rule_Spec::ALLOWED_PROTOCOL ] ) &&
AMP_Rule_Spec::FAIL === $this->check_attr_spec_rule_allowed_protocol( $node, $attr_name, $attr_spec_rule ) ) {
$should_remove_node = true;
+ } elseif ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ] ) &&
+ AMP_Rule_Spec::FAIL === $this->check_attr_spec_rule_valid_url( $node, $attr_name, $attr_spec_rule ) ) {
+ $should_remove_node = true;
} elseif ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ][ AMP_Rule_Spec::ALLOW_RELATIVE ] ) &&
AMP_Rule_Spec::FAIL === $this->check_attr_spec_rule_disallowed_relative( $node, $attr_name, $attr_spec_rule ) ) {
$should_remove_node = true;
@@ -1122,6 +1138,47 @@ private function check_attr_spec_rule_value_regex_casei( $node, $attr_name, $att
return AMP_Rule_Spec::NOT_APPLICABLE;
}
+ /**
+ * Check if attribute has a valid host value
+ *
+ * @since 0.7
+ *
+ * @param DOMElement $node Node.
+ * @param string $attr_name Attribute name.
+ * @param array[]|string[] $attr_spec_rule Attribute spec rule.
+ *
+ * @return string:
+ * - AMP_Rule_Spec::PASS - $attr_name has a value that matches the rule.
+ * - AMP_Rule_Spec::FAIL - $attr_name has a value that does *not* match rule.
+ * - AMP_Rule_Spec::NOT_APPLICABLE - $attr_name does not exist or there
+ * is no rule for this attribute.
+ */
+ private function check_attr_spec_rule_valid_url( $node, $attr_name, $attr_spec_rule ) {
+ if ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ] ) ) {
+ if ( $node->hasAttribute( $attr_name ) ) {
+ $urls_to_test = preg_split( '/\s*,\s*/', $node->getAttribute( $attr_name ) );
+ foreach ( $urls_to_test as $url ) {
+ $url = urldecode( $url );
+ // Check if the host contains invalid chars.
+ $url_host = wp_parse_url( $url, PHP_URL_HOST );
+ if ( $url_host && preg_match( '/[!"#$%&\'()*+,\/:;<=>?@[\]^`{|}~\s]/i', $url_host ) ) {
+ return AMP_Rule_Spec::FAIL;
+ }
+
+ // Check if the protocol contains invalid chars.
+ $dots_pos = strpos( $url, ':' );
+ if ( false !== $dots_pos && preg_match( '/[!"#$%&\'()*+,\/:;<=>?@[\]^`{|}~\s]/i', substr( $url, 0, $dots_pos ) ) ) {
+ return AMP_Rule_Spec::FAIL;
+ }
+ }
+
+ return AMP_Rule_Spec::PASS;
+ }
+ }
+
+ return AMP_Rule_Spec::NOT_APPLICABLE;
+ }
+
/**
* Check if attribute has a protocol value rule determine if it matches.
*
@@ -1138,9 +1195,7 @@ private function check_attr_spec_rule_value_regex_casei( $node, $attr_name, $att
private function check_attr_spec_rule_allowed_protocol( $node, $attr_name, $attr_spec_rule ) {
if ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ][ AMP_Rule_Spec::ALLOWED_PROTOCOL ] ) ) {
if ( $node->hasAttribute( $attr_name ) ) {
- $attr_value = $node->getAttribute( $attr_name );
- $attr_value = preg_replace( '/\s*,\s*/', ',', $attr_value );
- $urls_to_test = explode( ',', $attr_value );
+ $urls_to_test = preg_split( '/\s*,\s*/', $node->getAttribute( $attr_name ) );
foreach ( $urls_to_test as $url ) {
/*
* This seems to be an acceptable check since the AMP validator
@@ -1157,9 +1212,7 @@ private function check_attr_spec_rule_allowed_protocol( $node, $attr_name, $attr
} elseif ( isset( $attr_spec_rule[ AMP_Rule_Spec::ALTERNATIVE_NAMES ] ) ) {
foreach ( $attr_spec_rule[ AMP_Rule_Spec::ALTERNATIVE_NAMES ] as $alternative_name ) {
if ( $node->hasAttribute( $alternative_name ) ) {
- $attr_value = $node->getAttribute( $alternative_name );
- $attr_value = preg_replace( '/\s*,\s*/', ',', $attr_value );
- $urls_to_test = explode( ',', $attr_value );
+ $urls_to_test = preg_split( '/\s*,\s*/', $node->getAttribute( $alternative_name ) );
foreach ( $urls_to_test as $url ) {
/*
* This seems to be an acceptable check since the AMP validator
@@ -1196,9 +1249,7 @@ private function check_attr_spec_rule_allowed_protocol( $node, $attr_name, $attr
private function check_attr_spec_rule_disallowed_relative( $node, $attr_name, $attr_spec_rule ) {
if ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ][ AMP_Rule_Spec::ALLOW_RELATIVE ] ) && ! ( $attr_spec_rule[ AMP_Rule_Spec::VALUE_URL ][ AMP_Rule_Spec::ALLOW_RELATIVE ] ) ) {
if ( $node->hasAttribute( $attr_name ) ) {
- $attr_value = $node->getAttribute( $attr_name );
- $attr_value = preg_replace( '/\s*,\s*/', ',', $attr_value );
- $urls_to_test = explode( ',', $attr_value );
+ $urls_to_test = preg_split( '/\s*,\s*/', $node->getAttribute( $attr_name ) );
foreach ( $urls_to_test as $url ) {
$parsed_url = AMP_WP_Utils::parse_url( $url );
@@ -1217,9 +1268,7 @@ private function check_attr_spec_rule_disallowed_relative( $node, $attr_name, $a
} elseif ( isset( $attr_spec_rule[ AMP_Rule_Spec::ALTERNATIVE_NAMES ] ) ) {
foreach ( $attr_spec_rule[ AMP_Rule_Spec::ALTERNATIVE_NAMES ] as $alternative_name ) {
if ( $node->hasAttribute( $alternative_name ) ) {
- $attr_value = $node->getAttribute( $alternative_name );
- $attr_value = preg_replace( '/\s*,\s*/', ',', $attr_value );
- $urls_to_test = explode( ',', $attr_value );
+ $urls_to_test = preg_split( '/\s*,\s*/', $node->getAttribute( $alternative_name ) );
foreach ( $urls_to_test as $url ) {
$parsed_url = AMP_WP_Utils::parse_url( $url );
if ( empty( $parsed_url['scheme'] ) ) {
diff --git a/readme.md b/readme.md
index 8cdfb6070e5..f5693b78884 100644
--- a/readme.md
+++ b/readme.md
@@ -64,6 +64,7 @@ Follow along with or [contribute](https://github.com/Automattic/amp-wp/blob/deve
- Creation of AMP-related notifications, on entering invalid content in the 'classic' editor. See [#912](https://github.com/Automattic/amp-wp/pull/912/). Props kienstra, westonruter, ThierryA.
- Add output buffering, ensuring the entire page is valid AMP. See [#929](https://github.com/Automattic/amp-wp/pull/929), [#857](https://github.com/Automattic/amp-wp/pull/857). Props westonruter, ThierryA.
- Update the generated sanitizer file to the AMP spec, and simplify the file that generates it. See [#929](https://github.com/Automattic/amp-wp/pull/929). Props westonruter.
+- Add validation of host names in URLs. See [#983](https://github.com/Automattic/amp-wp/pull/983). Props rubengonzalezmrf.
See [0.7 milestone](https://github.com/Automattic/amp-wp/milestone/6?closed=1).
diff --git a/readme.txt b/readme.txt
index fe723ec86f8..21a2be07b52 100644
--- a/readme.txt
+++ b/readme.txt
@@ -47,6 +47,7 @@ Follow along with or [contribute](https://github.com/Automattic/amp-wp/blob/deve
- Creation of AMP-related notifications, on entering invalid content in the 'classic' editor. See [#912](https://github.com/Automattic/amp-wp/pull/912/). Props kienstra, westonruter, ThierryA.
- Add output buffering, ensuring the entire page is valid AMP. See [#929](https://github.com/Automattic/amp-wp/pull/929), [#857](https://github.com/Automattic/amp-wp/pull/857). Props westonruter, ThierryA.
- Update the generated sanitizer file to the AMP spec, and simplify the file that generates it. See [#929](https://github.com/Automattic/amp-wp/pull/929). Props westonruter.
+- Add validation of host names in URLs. See [#983](https://github.com/Automattic/amp-wp/pull/983). Props rubengonzalezmrf.
See [0.7 milestone](https://github.com/Automattic/amp-wp/milestone/6?closed=1).
diff --git a/tests/amp-tag-and-attribute-sanitizer-private-methods-tests.php b/tests/amp-tag-and-attribute-sanitizer-private-methods-tests.php
index a4746c4b743..a2115bc7e36 100644
--- a/tests/amp-tag-and-attribute-sanitizer-private-methods-tests.php
+++ b/tests/amp-tag-and-attribute-sanitizer-private-methods-tests.php
@@ -996,7 +996,7 @@ public function get_validate_attr_spec_list_for_node_data() {
),
),
),
- 1,
+ 2,
),
'attributes_allow_relative_false_fail' => array(
array(
@@ -1024,7 +1024,7 @@ public function get_validate_attr_spec_list_for_node_data() {
),
),
),
- 1,
+ 2,
),
'attributes_allow_empty_false_fail' => array(
array(
diff --git a/tests/test-tag-and-attribute-sanitizer.php b/tests/test-tag-and-attribute-sanitizer.php
index f579f8e8426..c99416d81b1 100644
--- a/tests/test-tag-and-attribute-sanitizer.php
+++ b/tests/test-tag-and-attribute-sanitizer.php
@@ -680,6 +680,23 @@ public function get_body_data() {
'value',
),
+ 'a_with_wrong_host' => array(
+ 'value',
+ 'value',
+ ),
+ 'a_with_encoded_host' => array(
+ 'value',
+ null,
+ ),
+ 'a_with_wrong_schemeless_host' => array(
+ 'value',
+ 'value',
+ ),
+ 'a_with_mail_host' => array(
+ 'value',
+ 'value',
+ ),
+
// font is removed so we should check that other elements are checked as well.
'font_with_other_bad_elements' => array(
'HeadlineSpan',