-
Notifications
You must be signed in to change notification settings - Fork 384
/
class-amp-iframe-sanitizer.php
445 lines (392 loc) · 15.1 KB
/
class-amp-iframe-sanitizer.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
<?php
/**
* Class AMP_Iframe_Sanitizer
*
* @package AMP
*/
use AmpProject\AmpWP\ValidationExemption;
use AmpProject\DevMode;
use AmpProject\Html\Attribute;
use AmpProject\Layout;
/**
* Class AMP_Iframe_Sanitizer
*
* Converts <iframe> tags to <amp-iframe>
*
* @internal
*/
class AMP_Iframe_Sanitizer extends AMP_Base_Sanitizer {
use AMP_Noscript_Fallback;
/**
* Default values for sandboxing IFrame.
*
* | Sandbox Token | Included | Rationale
* |-------------------------------------------|----------|----------
* | `allow-downloads` | Yes | Useful for downloading documents, etc.
* | `allow-downloads-without-user-activation` | No | Experimental per MDN. Bad UX.
* | `allow-forms` | Yes | For embeds like polls.
* | `allow-modals` | Yes | For apps to show `confirm()`, etc.
* | `allow-orientation-lock` | Yes | Since we `allowfullscreen`, useful for games, etc.
* | `allow-pointer-lock` | Yes | Useful for games.
* | `allow-popups` | Yes | To open YouTube video in new window, for example.
* | `allow-popups-to-escape-sandbox` | Yes | Useful for ads.
* | `allow-presentation` | Yes | To cast YouTube videos, for example.
* | `allow-same-origin` | Yes | Removed if iframe is same origin.
* | `allow-scripts` | Yes | An iframe's primary use case is custom JS.
* | `allow-storage-access-by-user-activation` | No | Experimental per MDN.
* | `allow-top-navigation` | No | Poor user experience.
* | `allow-top-navigation-by-user-activation` | Yes | Key for clicking `target=_top` links in iframes.
*
* @since 0.2
* @since 2.0.5 Updated to include majority of other sandbox values which are included by default if sandbox is not provided.
* @link https://html.spec.whatwg.org/multipage/iframe-embed-object.html#attr-iframe-sandbox
*
* @const string
*/
const SANDBOX_DEFAULTS = 'allow-downloads allow-forms allow-modals allow-orientation-lock allow-pointer-lock allow-popups allow-popups-to-escape-sandbox allow-presentation allow-same-origin allow-scripts allow-top-navigation-by-user-activation';
/**
* Tag.
*
* @since 0.2
*
* @var string HTML <iframe> tag to identify and replace with AMP version.
*/
public static $tag = 'iframe';
/**
* Default args.
*
* @var array {
* Default args.
*
* @type bool $add_placeholder Whether to add a placeholder element.
* @type bool $add_noscript_fallback Whether to add a noscript fallback.
* @type string $current_origin The current origin serving the page. Normally this will be the $_SERVER[HTTP_HOST].
* @type string $alias_origin An alternative origin which can be supplied which is used when encountering same-origin iframes.
* @type bool $native_iframe_used Whether an HTML5 iframe element should be used instead of amp-iframe.
* }
*/
protected $DEFAULT_ARGS = [
'add_placeholder' => false,
'add_noscript_fallback' => true,
'current_origin' => null,
'alias_origin' => null,
'native_iframe_used' => false,
];
/**
* Get mapping of HTML selectors to the AMP component selectors which they may be converted into.
*
* @return array Mapping.
*/
public function get_selector_conversion_mapping() {
if ( $this->args['native_iframe_used'] ) {
return [];
}
return [
'iframe' => [
'amp-iframe',
],
];
}
/**
* Sanitize the <iframe> elements from the HTML contained in this instance's Dom\Document.
*
* @since 0.2
*/
public function sanitize() {
$nodes = $this->dom->getElementsByTagName( self::$tag );
$num_nodes = $nodes->length;
if ( 0 === $num_nodes ) {
return;
}
if ( $this->args['add_noscript_fallback'] ) {
$this->initialize_noscript_allowed_attributes( self::$tag );
}
// Ensure origins are normalized.
$this->args['current_origin'] = $this->get_origin_from_url( $this->args['current_origin'] );
if ( ! empty( $this->args['alias_origin'] ) ) {
$this->args['alias_origin'] = $this->get_origin_from_url( $this->args['alias_origin'] );
}
for ( $i = $num_nodes - 1; $i >= 0; $i-- ) {
/**
* Iframe element.
*
* @var DOMElement $node
*/
$node = $nodes->item( $i );
// Skip element if already inside of an AMP element as a noscript fallback, or if it has a dev mode exemption.
if ( $this->is_inside_amp_noscript( $node ) || DevMode::hasExemptionForNode( $node ) ) {
continue;
}
// If using native <iframe> instead of converting to <amp-iframe>, just mark the element as being unvalidated.
if ( $this->args['native_iframe_used'] ) {
ValidationExemption::mark_node_as_px_verified( $node );
continue;
}
$normalized_attributes = AMP_DOM_Utils::get_node_attributes_as_assoc_array( $node );
$normalized_attributes = $this->set_layout( $normalized_attributes );
$normalized_attributes = $this->normalize_attributes( $normalized_attributes );
/**
* If the src doesn't exist, remove the node. Either it never
* existed or was invalidated while filtering attributes above.
*
* @todo: add an arg to allow for a fallback element in this instance (note that filter cannot be used inside a sanitizer).
* @see: https://github.com/ampproject/amphtml/issues/2261
*/
if ( empty( $normalized_attributes['src'] ) ) {
$this->remove_invalid_child(
$node,
[
'code' => AMP_Tag_And_Attribute_Sanitizer::ATTR_REQUIRED_BUT_MISSING,
'attributes' => [ 'src' ],
'spec_name' => 'amp-iframe',
]
);
continue;
}
$this->did_convert_elements = true;
if ( empty( $normalized_attributes[ Attribute::LAYOUT ] ) && ! empty( $normalized_attributes[ Attribute::HEIGHT ] ) && ! empty( $normalized_attributes[ Attribute::WIDTH ] ) ) {
// Set layout to responsive if the iframe is aligned to full width.
$figure_node = null;
if ( $node->parentNode instanceof DOMElement && 'figure' === $node->parentNode->tagName ) {
$figure_node = $node->parentNode;
}
if ( $node->parentNode->parentNode instanceof DOMElement && 'figure' === $node->parentNode->parentNode->tagName ) {
$figure_node = $node->parentNode->parentNode;
}
if (
! empty( $this->args['align_wide_support'] )
&& $figure_node
&& preg_match( '/(^|\s)(alignwide|alignfull)(\s|$)/', $figure_node->getAttribute( Attribute::CLASS_ ) )
) {
$normalized_attributes[ Attribute::LAYOUT ] = Layout::RESPONSIVE;
} else {
$normalized_attributes[ Attribute::LAYOUT ] = Layout::INTRINSIC;
}
$this->add_or_append_attribute( $normalized_attributes, 'class', 'amp-wp-enforced-sizes' );
}
// Remove the ID from the original node so that PHP DOM doesn't fail to set it on the replacement element.
$node->removeAttribute( Attribute::ID );
$new_node = AMP_DOM_Utils::create_node( $this->dom, 'amp-iframe', $normalized_attributes );
// Find existing placeholder/overflow.
$placeholder_node = null;
$overflow_node = null;
foreach ( iterator_to_array( $node->childNodes ) as $child ) {
if ( ! ( $child instanceof DOMElement ) ) {
continue;
}
if ( $child->hasAttribute( 'placeholder' ) ) {
$placeholder_node = $node->removeChild( $child );
} elseif ( $child->hasAttribute( 'overflow' ) ) {
$overflow_node = $node->removeChild( $child );
}
}
// Add placeholder.
if ( $placeholder_node || true === $this->args['add_placeholder'] ) {
if ( ! $placeholder_node ) {
$placeholder_node = $this->build_placeholder(); // @todo Can a better placeholder default be devised?
}
$new_node->appendChild( $placeholder_node );
}
// Add overflow.
if ( $new_node->hasAttribute( 'resizable' ) && ! $overflow_node ) {
$overflow_node = $this->dom->createElement( 'button' );
$overflow_node->setAttribute( 'overflow', '' );
if ( $node->hasAttribute( 'data-amp-overflow-text' ) ) {
$overflow_text = $node->getAttribute( 'data-amp-overflow-text' );
} else {
$overflow_text = __( 'Show all', 'amp' );
}
$overflow_node->appendChild( $this->dom->createTextNode( $overflow_text ) );
}
if ( $overflow_node ) {
$new_node->appendChild( $overflow_node );
}
$node->parentNode->replaceChild( $new_node, $node );
if ( $this->args['add_noscript_fallback'] ) {
$node->setAttribute( 'src', $normalized_attributes['src'] );
// AMP is stricter than HTML5 for this attribute, so make sure we use a normalized value.
if ( $node->hasAttribute( 'frameborder' ) ) {
$node->setAttribute( 'frameborder', $normalized_attributes['frameborder'] );
}
// Preserve original node in noscript for no-JS environments.
$this->append_old_node_noscript( $new_node, $node, $this->dom );
}
}
}
/**
* Normalize HTML attributes for <amp-iframe> elements.
*
* @param string[] $attributes {
* Attributes.
*
* @type string $src IFrame URL - Empty if HTTPS required per $this->args['require_https_src']
* @type int $width <iframe> width attribute - Set to numeric value if px or %
* @type int $height <iframe> height attribute - Set to numeric value if px or %
* @type string $sandbox <iframe> `sandbox` attribute - Pass along if found; default to value of self::SANDBOX_DEFAULTS
* @type string $class <iframe> `class` attribute - Pass along if found
* @type string $sizes <iframe> `sizes` attribute - Pass along if found
* @type string $id <iframe> `id` attribute - Pass along if found
* @type int $frameborder <iframe> `frameborder` attribute - Filter to '0' or '1'; default to '0'
* @type bool $allowfullscreen <iframe> `allowfullscreen` attribute - Convert 'false' to empty string ''
* @type bool $allowtransparency <iframe> `allowtransparency` attribute - Convert 'false' to empty string ''
* @type string $type <iframe> `type` attribute - Pass along if value is not `text/html`
* }
* @return array Returns HTML attributes; normalizes src, dimensions, frameborder, sandbox, allowtransparency and allowfullscreen
*/
private function normalize_attributes( $attributes ) {
$out = [];
$remove_allow_same_origin = false;
foreach ( $attributes as $name => $value ) {
switch ( $name ) {
case 'src':
// Make the URL absolute since relative URLs are not allowed in amp-iframe.
if ( '/' === substr( $value, 0, 1 ) && '/' !== substr( $value, 1, 1 ) ) {
$value = untrailingslashit( $this->args['current_origin'] ) . $value;
}
$value = $this->maybe_enforce_https_src( $value, true );
// Handle case where iframe source origin is the same as the host page's origin.
if ( $this->get_origin_from_url( $value ) === $this->args['current_origin'] ) {
if ( ! empty( $this->args['alias_origin'] ) ) {
$value = preg_replace( '#^\w+://[^/]+#', $this->args['alias_origin'], $value );
} else {
$remove_allow_same_origin = true;
}
}
$out[ $name ] = $value;
break;
case 'width':
case 'height':
$out[ $name ] = $this->sanitize_dimension( $value, $name );
break;
case 'frameborder':
$out[ $name ] = $this->sanitize_boolean_digit( $value );
break;
case 'allowfullscreen':
case 'allowtransparency':
if ( 'false' !== strtolower( $value ) ) {
$out[ $name ] = '';
}
break;
case 'mozallowfullscreen':
case 'webkitallowfullscreen':
// Omit these since amp-iframe will add them if needed if the `allowfullscreen` attribute is present.
break;
case 'loading':
/*
* The `amp-iframe` component already does lazy-loading by default; trigger a validation error only
* if the value is not `lazy`.
*/
if ( 'lazy' !== strtolower( $value ) ) {
$out[ $name ] = $value;
}
break;
case 'security':
/*
* Omit the `security` attribute as it now been superseded by the `sandbox` attribute. It is
* (apparently) only supported by IE <https://stackoverflow.com/a/20071528>.
*/
break;
case 'marginwidth':
case 'marginheight':
// These attributes have been obsolete since HTML5. If they have the value `0` they can be omitted.
if ( '0' !== $value ) {
$out[ $name ] = $value;
}
break;
case 'data-amp-resizable':
$out['resizable'] = '';
break;
case 'data-amp-overflow-text':
// No need to copy.
break;
case 'type':
/*
* Omit the `type` attribute if its value is `text/html`. Popular embed providers such as Amazon
* Kindle use this non-standard attribute, which is apparently a vestige from usage on <object>.
*/
if ( 'text/html' !== strtolower( $value ) ) {
$out[ $name ] = $value;
}
break;
default:
$out[ $name ] = $value;
break;
}
}
if ( ! isset( $out['sandbox'] ) ) {
$out['sandbox'] = self::SANDBOX_DEFAULTS;
}
// Remove allow-same-origin from sandbox if required.
if ( $remove_allow_same_origin ) {
$out['sandbox'] = trim( preg_replace( '/(^|\s)allow-same-origin(\s|$)/', ' ', $out['sandbox'] ) );
}
return $out;
}
/**
* Obtain the origin part of a given URL (scheme, host, port).
*
* @param string $url URL.
* @return string|null Origin URL or null if parse failed.
*/
private function get_origin_from_url( $url ) {
$parsed_url = wp_parse_url( $url );
if ( ! isset( $parsed_url['host'] ) ) {
return null;
}
if ( ! isset( $parsed_url['scheme'] ) ) {
$parsed_url['scheme'] = wp_parse_url( $this->args['current_origin'], PHP_URL_SCHEME );
}
$origin = $parsed_url['scheme'] . '://';
$origin .= $parsed_url['host'];
if ( isset( $parsed_url['port'] ) ) {
$origin .= ':' . $parsed_url['port'];
}
return $origin;
}
/**
* Builds a DOMElement to use as a placeholder for an <iframe>.
*
* Important: The element returned must not be block-level (e.g. div) as the PHP DOM parser
* will move it out from inside any containing paragraph. So this is why a span is used.
*
* @since 0.2
*
* @return DOMElement|false
*/
private function build_placeholder() {
return AMP_DOM_Utils::create_node(
$this->dom,
'span',
[
'placeholder' => '',
'class' => 'amp-wp-iframe-placeholder',
]
);
}
/**
* Sanitizes a boolean character (or string) into a '0' or '1' character.
*
* @param mixed $value A boolean character to sanitize. If a string containing more than a single
* character is provided, only the first character is taken into account.
*
* @return string Returns either '0' or '1'.
*/
private function sanitize_boolean_digit( $value ) {
// Default to false if the value was forgotten.
if ( empty( $value ) ) {
return '0';
}
// Default to false if the value has an unexpected type.
if ( ! is_string( $value ) && ! is_numeric( $value ) ) {
return '0';
}
// See: https://github.com/ampproject/amp-wp/issues/2335#issuecomment-493209861.
switch ( substr( (string) $value, 0, 1 ) ) {
case '1':
case 'y':
case 'Y':
return '1';
}
return '0';
}
}