diff --git a/src/wp-includes/formatting.php b/src/wp-includes/formatting.php index 5616d388a5a20..8bd82f4e79c47 100644 --- a/src/wp-includes/formatting.php +++ b/src/wp-includes/formatting.php @@ -2712,50 +2712,111 @@ function slugify( string $sluggee ): string { */ function sanitize_title_with_dashes( $title, $raw_title = '', $context = 'display' ) { $title = strip_tags( $title ); + // Preserve escaped octets. + $title = preg_replace( '|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $title ); + // Remove percent signs that are not part of an octet. + $title = str_replace( '%', '', $title ); + // Restore octets. + $title = preg_replace( '|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $title ); if ( seems_utf8( $title ) ) { if ( function_exists( 'mb_strtolower' ) ) { $title = mb_strtolower( $title, 'UTF-8' ); } - $title = utf8_uri_encode( $title, 200 ); } $title = strtolower( $title ); if ( 'save' === $context ) { - $title = html_entity_decode( $title, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5 ); - $title = rawurldecode( $title ); - // Dash/hyphen symbols plus whitespace turn into a dash. - $title = preg_replace( '~[\p{Pd}\s/]+~u', '-', $title ); - // Control characters, combining marks, symbols, punctuation, and invisible characters are removed. - $title = preg_replace_callback( - '~-[_-]+|[\p{C}\p{M}\p{S}\p{P}\p{Z}]+~u', - static function ( $chunk ) { - switch ( $chunk[0] ) { - case '-': - return '-'; - - case '_': - return '_'; - - default: - return ''; - } - }, - $title - ); + // Convert  , &ndash, and &mdash to hyphens. + $title = str_replace( array( '%c2%a0', '%e2%80%93', '%e2%80%94' ), '-', $title ); + // Convert  , &ndash, and &mdash HTML entities to hyphens. + $title = str_replace( array( ' ', ' ', '–', '–', '—', '—' ), '-', $title ); + // Convert forward slash to hyphen. + $title = str_replace( '/', '-', $title ); + + // Strip these characters entirely. $title = str_replace( array( - "\xD7", // Replace multiplication sign "×" with "x". - '�', // Remove invalid decoded characters. + // Soft hyphens. + '%c2%ad', + // ¡ and ¿. + '%c2%a1', + '%c2%bf', + // Angle quotes. + '%c2%ab', + '%c2%bb', + '%e2%80%b9', + '%e2%80%ba', + // Curly quotes. + '%e2%80%98', + '%e2%80%99', + '%e2%80%9c', + '%e2%80%9d', + '%e2%80%9a', + '%e2%80%9b', + '%e2%80%9e', + '%e2%80%9f', + // Bullet. + '%e2%80%a2', + // ©, ®, °, &hellip, and &trade. + '%c2%a9', + '%c2%ae', + '%c2%b0', + '%e2%80%a6', + '%e2%84%a2', + // Acute accents. + '%c2%b4', + '%cb%8a', + '%cc%81', + '%cd%81', + // Grave accent, macron, caron. + '%cc%80', + '%cc%84', + '%cc%8c', + // Non-visible characters that display without a width. + '%e2%80%8b', // Zero width space. + '%e2%80%8c', // Zero width non-joiner. + '%e2%80%8d', // Zero width joiner. + '%e2%80%8e', // Left-to-right mark. + '%e2%80%8f', // Right-to-left mark. + '%e2%80%aa', // Left-to-right embedding. + '%e2%80%ab', // Right-to-left embedding. + '%e2%80%ac', // Pop directional formatting. + '%e2%80%ad', // Left-to-right override. + '%e2%80%ae', // Right-to-left override. + '%ef%bb%bf', // Byte order mark. + '%ef%bf%bc', // Object replacement character. ), + '', + $title + ); + + // Convert non-visible characters that display with a width to hyphen. + $title = str_replace( array( - 'x', - '', + '%e2%80%80', // En quad. + '%e2%80%81', // Em quad. + '%e2%80%82', // En space. + '%e2%80%83', // Em space. + '%e2%80%84', // Three-per-em space. + '%e2%80%85', // Four-per-em space. + '%e2%80%86', // Six-per-em space. + '%e2%80%87', // Figure space. + '%e2%80%88', // Punctuation space. + '%e2%80%89', // Thin space. + '%e2%80%8a', // Hair space. + '%e2%80%a8', // Line separator. + '%e2%80%a9', // Paragraph separator. + '%e2%80%af', // Narrow no-break space. ), + '-', $title ); + + // Convert × to 'x'. + $title = str_replace( '%c3%97', 'x', $title ); } // Remove HTML entities.