Skip to content

Commit

Permalink
Fix libxml issues with parsing characater encoding (ampproject#1067)
Browse files Browse the repository at this point in the history
  • Loading branch information
douglyuckling committed Apr 12, 2018
1 parent 5ba9f5b commit f5f5855
Showing 1 changed file with 17 additions and 0 deletions.
17 changes: 17 additions & 0 deletions includes/utils/class-amp-dom-utils.php
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,18 @@ function( $matches ) {
);
}

/*
* Add a pre-HTML5-style declaration of the encoding since libxml<2.8 doesn't recognize
* HTML5's meta charset. See <https://bugzilla.gnome.org/show_bug.cgi?id=655218>.
*/
if ( version_compare( LIBXML_DOTTED_VERSION, '2.8', '<' ) ) {
$document = preg_replace(
'#<meta[^>]+charset="([^"]+)"#i',
'<meta http-equiv="Content-Type" content="text/html; charset=$1" id="meta-http-equiv">$0',
$document
);
}

/*
* Wrap in dummy tags, since XML needs one parent node.
* It also makes it easier to loop through nodes.
Expand All @@ -123,6 +135,11 @@ function( $matches ) {
return false;
}

$metaHttpEquivElement = $dom->getElementById("meta-http-equiv");
if ( $metaHttpEquivElement ) {
$metaHttpEquivElement->parentNode->removeChild($metaHttpEquivElement);
}

return $dom;
}

Expand Down

0 comments on commit f5f5855

Please sign in to comment.