Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cache response status and headers when fetching external stylesheets #4509

Merged
merged 12 commits into from
Apr 3, 2020
4 changes: 4 additions & 0 deletions includes/validation/class-amp-validation-error-taxonomy.php
Original file line number Diff line number Diff line change
Expand Up @@ -3344,6 +3344,10 @@ public static function get_source_key_label( $key, $validation_error ) {
return __( 'Invalid attribute', 'amp' );
case 'required_attr_value':
return __( 'Required attribute value', 'amp' );
case 'url':
return __( 'URL', 'amp' );
case 'message':
return __( 'Message', 'amp' );
default:
return $key;
}
Expand Down
29 changes: 29 additions & 0 deletions lib/common/src/Exception/FailedToGetCachedResponseData.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<?php


pierlon marked this conversation as resolved.
Show resolved Hide resolved
namespace AmpProject\Exception;


pierlon marked this conversation as resolved.
Show resolved Hide resolved
use RuntimeException;

/**
* Exception thrown when a cached remote response could not be retrieved.
*
* @package ampproject/common
*/
final class FailedToGetCachedResponseData extends RuntimeException implements AmpException
{

/**
* Instantiate a FailedToGetCachedResponseData exception for a URL if the cached response data could not be retrieved.
*
* @param string $url URL that failed to be fetched.
* @return self
*/
public static function withUrl($url)
{
$message = "Failed to retrieve the cached response data for the URL '{$url}'.";

return new self($message);
}
}
23 changes: 16 additions & 7 deletions src/RemoteRequest/CachedRemoteGetRequest.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

namespace AmpProject\AmpWP\RemoteRequest;

use AmpProject\Exception\FailedToGetCachedResponseData;
use AmpProject\Exception\FailedToGetFromRemoteUrl;
use AmpProject\RemoteGetRequest;
use AmpProject\RemoteRequest\RemoteGetRequestResponse;
Expand Down Expand Up @@ -101,13 +102,12 @@ public function __construct(
*
* @param string $url URL to get.
* @return Response Response for the executed request.
* @throws FailedToGetFromRemoteUrl If retrieving the contents from the URL failed.
* @throws FailedToGetFromRemoteUrl|FailedToGetCachedResponseData If retrieving the contents from the URL failed.
pierlon marked this conversation as resolved.
Show resolved Hide resolved
*/
public function get( $url ) {
$cache_key = self::TRANSIENT_PREFIX . md5( __CLASS__ . $url );
$cached_data = get_transient( $cache_key );
$headers = [];
$status = null;

if ( false !== $cached_data ) {
if ( PHP_MAJOR_VERSION >= 7 ) {
Expand All @@ -126,17 +126,26 @@ public function get( $url ) {
$headers = $response->getHeaders();
$body = $response->getBody();
} catch ( FailedToGetFromRemoteUrl $exception ) {
$status = $exception->getStatusCode();
$expiry = new DateTimeImmutable( "+ {$this->min_expiry} seconds" );
$body = $exception->getMessage();
$status = $exception->getStatusCode();
$expiry = new DateTimeImmutable( "+ {$this->min_expiry} seconds" );
$body = $exception->getMessage();
$response = new RemoteGetRequestResponse( $body, $headers, $status );
}

$cached_data = new CachedData( $body, $expiry );
$cached_data = new CachedData( compact( 'body', 'status', 'headers' ), $expiry );
pierlon marked this conversation as resolved.
Show resolved Hide resolved

set_transient( $cache_key, serialize( $cached_data ) ); // phpcs:ignore WordPress.PHP.DiscouragedPHPFunctions.serialize_serialize

return $response;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this return needed?

Suggested change
return $response;

Would removing it make the logic more consistent between cached and non-cached code paths?

}

$cached_value = $cached_data->get_value();

if ( ! isset( $cached_value['body'], $cached_value['headers'], $cached_value['status'] ) ) {
throw new FailedToGetCachedResponseData( $url );
}

return new RemoteGetRequestResponse( $cached_data->get_value(), $headers, $status );
return new RemoteGetRequestResponse( $cached_value['body'], $cached_value['headers'], $cached_value['status'] );
}

/**
Expand Down
124 changes: 94 additions & 30 deletions tests/php/test-amp-style-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -1587,22 +1587,24 @@ public function test_external_stylesheet_cache_control() {
$request_count = 0;
$href = 'https://www.example.com/styles.css';
$response_body = 'body{color:red}';
$headers = [
'content-type' => 'text/css',
'cache-control' => 'max-age=' . ( YEAR_IN_SECONDS + MONTH_IN_SECONDS ),
];
$status_code = 200;

add_filter(
'pre_http_request',
function( $preempt, $request, $url ) use ( $href, &$request_count, $response_body ) {
function( $preempt, $request, $url ) use ( $href, &$request_count, $response_body, $headers, $status_code ) {
$this->assertRegExp( '#^https?://#', $url );
if ( set_url_scheme( $url, 'https' ) === set_url_scheme( $href, 'https' ) ) {
$request_count++;
$preempt = [
'response' => [
'code' => 200,
],
'headers' => [
'content-type' => 'text/css',
'cache-control' => 'max-age=' . ( YEAR_IN_SECONDS + MONTH_IN_SECONDS ),
'code' => $status_code,
],
'body' => $response_body,
'headers' => $headers,
'body' => $response_body,
];
}
return $preempt;
Expand Down Expand Up @@ -1652,7 +1654,14 @@ function( $preempt, $request, $url ) use ( $href, &$request_count, $response_bod
$cached_data = unserialize( $transient ); // phpcs:ignore WordPress.PHP.DiscouragedPHPFunctions.serialize_unserialize
$this->assertInstanceOf( CachedData::class, $cached_data );

$this->assertEquals( $response_body, $cached_data->get_value() );
$this->assertEquals(
[
'body' => $response_body,
'headers' => $headers,
'status' => $status_code,
],
$cached_data->get_value()
);

$expiry = $cached_data->get_expiry();
$this->assertGreaterThan( ( new DateTimeImmutable( '+ 1 year' ) )->getTimestamp(), $expiry->getTimestamp() );
Expand All @@ -1662,28 +1671,86 @@ function( $preempt, $request, $url ) use ( $href, &$request_count, $response_bod
}

/**
* Test that external stylesheets fetches are cached when failures occur.
* Data for test_external_stylesheet()
*
* @return array
*/
public function get_external_stylesheet_data() {
return [
'successful' => [
'style_url' => 'https://www.example.com/styles.css',
'http_response' => [
'body' => 'body { color: #fff }',
'code' => 200,
'headers' => [
'cache-control' => 'max-age=1441',
'content-type' => 'text/css',
],
],
'expected_styles' => [ 'body{color:#fff}' ],
'expected_errors' => [],
'cached_data' => new CachedData(
[
'body' => 'body { color: #fff }',
'headers' => [
'cache-control' => 'max-age=1441',
'content-type' => 'text/css',
],
'status' => 200,
],
new DateTimeImmutable( '+ 1441 seconds' )
),
],
'failed' => [
'style_url' => 'https://www.example.com/not-found/styles.css',
'http_response' => [
'body' => 'Not Found!',
'code' => 404,
'headers' => [
'content-type' => 'text/html',
],
],
'expected_styles' => [],
'expected_errors' => [ AMP_Style_Sanitizer::STYLESHEET_FETCH_ERROR ],
'cached_data' => new CachedData(
[
'body' => FailedToGetFromRemoteUrl::withHttpStatus( 'https://www.example.com/not-found/styles.css', 404 )->getMessage(),
'headers' => [],
'status' => 404,
],
new DateTimeImmutable( '+ ' . DAY_IN_SECONDS . ' seconds' )
),
],
];
}

/**
* Test that external stylesheets fetches are cached.
*
* @dataProvider get_external_stylesheet_data
* @covers AMP_Style_Sanitizer::process_link_element()
*
* @param string $style_url Stylesheet URL.
* @param array $http_response Mocked HTTP response.
* @param array $expected_styles Expected minified stylesheets.
* @param array $expected_errors Expected error codes.
* @param CachedData $expected_cached_data Expected cache data.
*/
public function test_external_stylesheet_not_found() {
public function test_external_stylesheet( $style_url, $http_response, $expected_styles, $expected_errors, $expected_cached_data ) {
$request_count = 0;
$href = 'https://www.example.com/styles.css';

add_filter(
'pre_http_request',
function( $preempt, $request, $url ) use ( $href, &$request_count ) {
function( $preempt, $request, $url ) use ( $style_url, $http_response, &$request_count ) {
$this->assertRegExp( '#^https?://#', $url );
if ( set_url_scheme( $url, 'https' ) === set_url_scheme( $href, 'https' ) ) {
if ( set_url_scheme( $url, 'https' ) === set_url_scheme( $style_url, 'https' ) ) {
$request_count++;
$preempt = [
'response' => [
'code' => 404,
],
'headers' => [
'content-type' => 'text/html',
'code' => $http_response['code'],
],
'body' => 'Not Found!',
'headers' => $http_response['headers'],
'body' => $http_response['body'],
];
}
return $preempt;
Expand All @@ -1692,8 +1759,8 @@ function( $preempt, $request, $url ) use ( $href, &$request_count ) {
3
);

$sanitize_and_get_stylesheets = static function() use ( $href ) {
$html = sprintf( '<html amp><head><meta charset="utf-8"><link rel="stylesheet" href="%s"></head><body></body></html>', esc_url( $href ) ); // phpcs:ignore WordPress.WP.EnqueuedResources.NonEnqueuedStylesheet
$sanitize_and_get_stylesheets = static function( $css_url ) {
$html = sprintf( '<html amp><head><meta charset="utf-8"><link rel="stylesheet" href="%s"></head><body></body></html>', esc_url( $css_url ) ); // phpcs:ignore WordPress.WP.EnqueuedResources.NonEnqueuedStylesheet
$dom = Document::fromHtml( $html );

$found_error_codes = [];
Expand All @@ -1714,13 +1781,13 @@ function( $preempt, $request, $url ) use ( $href, &$request_count ) {

$this->assertEquals( 0, $request_count );

list( $found_error_codes, $actual_stylesheets ) = $sanitize_and_get_stylesheets();
list( $found_error_codes, $actual_stylesheets ) = $sanitize_and_get_stylesheets( $style_url );

$this->assertEquals( [ AMP_Style_Sanitizer::STYLESHEET_FETCH_ERROR ], $found_error_codes );
$this->assertEmpty( $actual_stylesheets );
$this->assertEquals( $expected_errors, $found_error_codes );
$this->assertEquals( $expected_styles, $actual_stylesheets );
$this->assertEquals( 1, $request_count, 'Expected HTTP request.' );

$cache_key = CachedRemoteGetRequest::TRANSIENT_PREFIX . md5( CachedRemoteGetRequest::class . $href );
$cache_key = CachedRemoteGetRequest::TRANSIENT_PREFIX . md5( CachedRemoteGetRequest::class . $style_url );
$transient = get_transient( $cache_key );
$this->assertNotFalse( $transient );

Expand All @@ -1732,15 +1799,12 @@ function( $preempt, $request, $url ) use ( $href, &$request_count ) {
$cached_data = unserialize( $transient ); // phpcs:ignore WordPress.PHP.DiscouragedPHPFunctions.serialize_unserialize
$this->assertInstanceOf( CachedData::class, $cached_data );

$expected_exception = FailedToGetFromRemoteUrl::withHttpStatus( $href, 404 );
$this->assertEquals( $expected_exception->getMessage(), $cached_data->get_value() );
$this->assertEquals( $expected_cached_data->get_value(), $cached_data->get_value() );

// Verify that CachedRemoteGetRequest::$min_expiry is used for HTTP failures. It is set to 1 day.
$expiry = $cached_data->get_expiry();
$this->assertLessThanOrEqual( ( new DateTimeImmutable( '+ 1 day' ) )->getTimestamp(), $expiry->getTimestamp() );
$this->assertGreaterThan( ( new DateTimeImmutable( '+ 12 hours' ) )->getTimestamp(), $expiry->getTimestamp() );
$this->assertEquals( $cached_data->get_expiry()->getTimestamp(), $expiry->getTimestamp() );

$sanitize_and_get_stylesheets();
$sanitize_and_get_stylesheets( $style_url );
$this->assertEquals( 1, $request_count, 'Expected HTTP request to be cached.' );
}

Expand Down