-
Notifications
You must be signed in to change notification settings - Fork 384
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve validating sanitizer with context for why element/attribute is invalid #3780
Changes from 37 commits
bf28199
656ebb0
a7f2750
6475b70
ad440fa
3f2f50e
3bb3d8e
ad66b8f
35638c5
a2e20d6
ef80051
2d6ec3d
00eb55c
a6d0fc9
15fb6dd
c55c58c
7f4bf4e
df96667
7b32198
9808761
374e0d3
934bbd1
5da07dc
b0dfd17
2c42a07
10b0c29
6975bd0
e2db927
5e56cce
9067d6c
5d9c205
7470a24
038caa7
998c45c
22b1869
8550fd7
322035b
dc76b9f
b96e4e7
827659a
f7d00f9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -82,7 +82,7 @@ abstract class AMP_Base_Sanitizer { | |
* | ||
* @var array | ||
*/ | ||
private $should_not_removed_nodes = []; | ||
private $nodes_to_keep = []; | ||
|
||
/** | ||
* AMP_Base_Sanitizer constructor. | ||
|
@@ -439,15 +439,15 @@ public function remove_invalid_child( $node, $validation_error = [] ) { | |
} | ||
|
||
// Prevent double-reporting nodes that are rejected for sanitization. | ||
if ( isset( $this->should_not_removed_nodes[ $node->nodeName ] ) && in_array( $node, $this->should_not_removed_nodes[ $node->nodeName ], true ) ) { | ||
if ( isset( $this->nodes_to_keep[ $node->nodeName ] ) && in_array( $node, $this->nodes_to_keep[ $node->nodeName ], true ) ) { | ||
return false; | ||
} | ||
|
||
$should_remove = $this->should_sanitize_validation_error( $validation_error, compact( 'node' ) ); | ||
if ( $should_remove ) { | ||
$node->parentNode->removeChild( $node ); | ||
} else { | ||
$this->should_not_removed_nodes[ $node->nodeName ][] = $node; | ||
$this->nodes_to_keep[ $node->nodeName ][] = $node; | ||
} | ||
return $should_remove; | ||
} | ||
|
@@ -463,9 +463,10 @@ public function remove_invalid_child( $node, $validation_error = [] ) { | |
* @param DOMElement $element The node for which to remove the attribute. | ||
* @param DOMAttr|string $attribute The attribute to remove from the element. | ||
* @param array $validation_error Validation error details. | ||
* @param array $attr_spec Attribute spec. | ||
* @return bool Whether the node should have been removed, that is, that the node was sanitized for validity. | ||
*/ | ||
public function remove_invalid_attribute( $element, $attribute, $validation_error = [] ) { | ||
public function remove_invalid_attribute( $element, $attribute, $validation_error = [], $attr_spec = [] ) { | ||
if ( $this->is_exempt_from_validation( $element ) ) { | ||
return false; | ||
} | ||
|
@@ -475,10 +476,23 @@ public function remove_invalid_attribute( $element, $attribute, $validation_erro | |
} else { | ||
$node = $attribute; | ||
} | ||
|
||
// Catch edge condition (no known possible way to reach). | ||
if ( ! ( $node instanceof DOMAttr ) || $element !== $node->parentNode ) { | ||
return false; | ||
} | ||
|
||
$should_remove = $this->should_sanitize_validation_error( $validation_error, compact( 'node' ) ); | ||
if ( $should_remove ) { | ||
$element->removeAttributeNode( $node ); | ||
$allow_empty = ! empty( $attr_spec[ AMP_Rule_Spec::VALUE_URL ][ AMP_Rule_Spec::ALLOW_EMPTY ] ); | ||
$is_href_attr = ( isset( $attr_spec[ AMP_Rule_Spec::VALUE_URL ] ) && 'href' === $node->nodeName ); | ||
if ( $allow_empty && ! $is_href_attr ) { | ||
$node->nodeValue = ''; | ||
} else { | ||
$element->removeAttributeNode( $node ); | ||
} | ||
} | ||
|
||
return $should_remove; | ||
} | ||
|
||
|
@@ -528,13 +542,15 @@ public function prepare_validation_error( array $error = [], array $data = [] ) | |
|
||
if ( $node instanceof DOMElement ) { | ||
if ( ! isset( $error['code'] ) ) { | ||
$error['code'] = AMP_Validation_Error_Taxonomy::INVALID_ELEMENT_CODE; | ||
$error['code'] = AMP_Tag_And_Attribute_Sanitizer::DISALLOWED_TAG; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The base class retrieves internals from one of its extending classes. Can we extract all of the validation error constants into a separate class? I tend to create an namespace Amp\AmpWP;
interface ValidationError {
const DISALLOWED_TAG = 'DISALLOWED_TAG';
const DISALLOWED_CHILD_TAG = 'DISALLOWED_CHILD_TAG';
const DISALLOWED_FIRST_CHILD_TAG = 'DISALLOWED_FIRST_CHILD_TAG';
// [...]
} Thi is separate of any inheritance chain, and provides one central place where you retrieve constants from. The interface name also makes the syntax nicer to read: $error['code'] = ValidationError::DISALLOWED_TAG; If we need to have more grouping, we can have multiple interfaces, like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point. On one hand, I think this should just be removed entirely, and let the Otherwise, I think this should be scoped with what you discussed in #3780 (comment). It's bound up with generating PHP objects from the spec. So I think we should do it, but probably not in the scope of this PR. |
||
} | ||
|
||
if ( ! isset( $error['type'] ) ) { | ||
// @todo Also include javascript: protocol for URL errors. | ||
$error['type'] = 'script' === $node->nodeName ? AMP_Validation_Error_Taxonomy::JS_ERROR_TYPE : AMP_Validation_Error_Taxonomy::HTML_ELEMENT_ERROR_TYPE; | ||
} | ||
|
||
// @todo Change from node_attributes to element_attributes to harmonize the two. | ||
if ( ! isset( $error['node_attributes'] ) ) { | ||
$error['node_attributes'] = []; | ||
foreach ( $node->attributes as $attribute ) { | ||
|
@@ -555,7 +571,7 @@ public function prepare_validation_error( array $error = [], array $data = [] ) | |
} | ||
} elseif ( $node instanceof DOMAttr ) { | ||
if ( ! isset( $error['code'] ) ) { | ||
$error['code'] = AMP_Validation_Error_Taxonomy::INVALID_ATTRIBUTE_CODE; | ||
$error['code'] = AMP_Tag_And_Attribute_Sanitizer::DISALLOWED_ATTR; | ||
} | ||
if ( ! isset( $error['type'] ) ) { | ||
// If this is an attribute that begins with on, like onclick, it should be a js_error. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
😜
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, maybe I should mention I searched for usage first: