-
Notifications
You must be signed in to change notification settings - Fork 383
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Sanitize entire HTML output when theme support is present #888
Changes from 4 commits
82c274b
91ef386
a5629f9
a666289
70a444e
eededf8
876c22f
8257cb0
ef69f2d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -191,8 +191,6 @@ public static function register_hooks() { | |
*/ | ||
add_action( 'template_redirect', array( __CLASS__, 'start_output_buffering' ), 0 ); | ||
|
||
add_filter( 'the_content', array( __CLASS__, 'filter_the_content' ), PHP_INT_MAX ); | ||
|
||
// @todo Add character conversion. | ||
} | ||
|
||
|
@@ -422,32 +420,12 @@ public static function get_amp_custom_styles() { | |
return $css; | ||
} | ||
|
||
/** | ||
* Filter the content to be valid AMP. | ||
* | ||
* @param string $content Content. | ||
* @return string Amplified content. | ||
*/ | ||
public static function filter_the_content( $content ) { | ||
$args = array( | ||
'content_max_width' => ! empty( $content_width ) ? $content_width : AMP_Post_Template::CONTENT_MAX_WIDTH, // Back-compat. | ||
); | ||
|
||
list( $sanitized_content, $scripts, $styles ) = AMP_Content_Sanitizer::sanitize( $content, self::$sanitizer_classes, $args ); | ||
|
||
self::$amp_scripts = array_merge( self::$amp_scripts, $scripts ); | ||
self::$amp_styles = array_merge( self::$amp_styles, $styles ); | ||
|
||
return $sanitized_content; | ||
} | ||
|
||
/** | ||
* Determine required AMP scripts. | ||
* | ||
* @param string $html Output HTML. | ||
* @return string Scripts to inject into the HEAD. | ||
*/ | ||
public static function get_amp_component_scripts( $html ) { | ||
public static function get_amp_component_scripts() { | ||
$amp_scripts = self::$amp_scripts; | ||
|
||
foreach ( self::$embed_handlers as $embed_handler ) { | ||
|
@@ -492,10 +470,33 @@ public static function start_output_buffering() { | |
* Finish output buffering. | ||
* | ||
* @todo Do this in shutdown instead of output buffering callback? | ||
* @global int $content_width | ||
* @param string $output Buffered output. | ||
* @return string Finalized output. | ||
*/ | ||
public static function finish_output_buffering( $output ) { | ||
global $content_width; | ||
|
||
$dom = AMP_DOM_Utils::get_dom( $output ); | ||
$args = array( | ||
'content_max_width' => ! empty( $content_width ) ? $content_width : AMP_Post_Template::CONTENT_MAX_WIDTH, // Back-compat. | ||
); | ||
|
||
$assets = AMP_Content_Sanitizer::sanitize_document( $dom, self::$sanitizer_classes, $args ); | ||
|
||
self::$amp_scripts = array_merge( self::$amp_scripts, $assets['scripts'] ); | ||
self::$amp_styles = array_merge( self::$amp_styles, $assets['styles'] ); | ||
|
||
/* | ||
* @todo The sanitize method needs to be updated to sanitize the entire HTML element and not just the BODY. | ||
* This will require updating mandatory_parent_blacklist in amphtml-update.py to include elements that appear in the HEAD. | ||
* This will ensure that the scripts and styles that plugins output via wp_head() will be sanitized as well. However, | ||
* since the the old paired mode is sending content from the *body* we'll need to be able to filter out the elements | ||
* from outside the body from being part of the whitelist sanitizer when it runs when theme support is not present, | ||
* as otherwise elements from the HEAD could get added to the BODY. | ||
*/ | ||
$sanitized_inner_body = AMP_DOM_Utils::get_content_from_dom( $dom ); | ||
$output = preg_replace( '#(<body.*?>)(.+)(</body>)#si', '$1' . $sanitized_inner_body . '$3', $output ); | ||
|
||
// Inject required scripts. | ||
$output = preg_replace( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @westonruter |
||
|
@@ -513,7 +514,6 @@ public static function finish_output_buffering( $output ) { | |
1 | ||
); | ||
|
||
// @todo Add more validation checking and potentially the whitelist sanitizer. | ||
return $output; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -67,4 +67,53 @@ public function test_is_paired_available() { | |
$this->assertTrue( is_search() ); | ||
$this->assertFalse( AMP_Theme_Support::is_paired_available() ); | ||
} | ||
|
||
/** | ||
* Test finish_output_buffering. | ||
* | ||
* @covers AMP_Theme_Support::finish_output_buffering() | ||
*/ | ||
public function test_finish_output_buffering() { | ||
add_theme_support( 'amp' ); | ||
AMP_Theme_Support::init(); | ||
ob_start(); | ||
?> | ||
<!DOCTYPE html> | ||
<html amp <?php language_attributes(); ?>> | ||
<head> | ||
<?php wp_head(); ?> | ||
<script data-head>document.write('TODO: This needs to be sanitized as well once.');</script> | ||
</head> | ||
<body> | ||
<img width="100" height="100" src="https://example.com/test.png"> | ||
<audio width="400" height="300" src="https://example.com/audios/myaudio.mp3"></audio> | ||
<amp-ad type="a9" | ||
width="300" | ||
height="250" | ||
data-aax_size="300x250" | ||
data-aax_pubname="test123" | ||
data-aax_src="302"></amp-ad> | ||
<?php wp_footer(); ?> | ||
</body> | ||
</html> | ||
<?php | ||
$original_html = trim( ob_get_clean() ); | ||
$sanitized_html = AMP_Theme_Support::finish_output_buffering( $original_html ); | ||
|
||
$this->assertContains( '<meta charset="utf-8">', $sanitized_html ); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @westonruter @DavidCramer these tests could have a bit more coverage here. I believe it is ok to do that at a later stage since the other components sanitization (form etc.) are still WIP. |
||
$this->assertContains( '<meta name="viewport" content="width=device-width,minimum-scale=1">', $sanitized_html ); | ||
$this->assertContains( '<style amp-boilerplate>', $sanitized_html ); | ||
$this->assertContains( '<style amp-custom>', $sanitized_html ); | ||
$this->assertContains( '<script async src="https://cdn.ampproject.org/v0.js"', $sanitized_html ); // phpcs:ignore WordPress.WP.EnqueuedResources.NonEnqueuedScript | ||
$this->assertContains( '<meta name="generator" content="AMP Plugin', $sanitized_html ); | ||
|
||
$this->assertNotContains( '<img', $sanitized_html ); | ||
$this->assertContains( '<amp-img', $sanitized_html ); | ||
|
||
$this->assertNotContains( '<audio', $sanitized_html ); | ||
$this->assertContains( '<amp-audio', $sanitized_html ); | ||
$this->assertContains( '<script async custom-element="amp-audio"', $sanitized_html ); | ||
|
||
$this->assertContains( '<script async custom-element="amp-ad"', $sanitized_html ); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@westonruter here we're replacing the body with the sanitised body. This is because it's returning
AMP_DOM_Utils::get_content_from_dom( $dom )
from the sanitize method. Wouldn't this be easier if the method returned$dom->saveHTML()
if the content passed is a complete document? This would stop the need to replace the body after it was sanitized.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, potentially. The
sanitize
method is better calledsanitize_content
. Better handling of sanitizing entire documents should be done.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@DavidCramer how about this: 876c22f
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@westonruter Thats nice, makes way for a full doc later on. I see that you made a comment about the
head
needs updating tomandatory_parent_blacklist
so that makes sense now.