split string module (#9987)

astral-sh · Feb 14, 2024 · fe79798 · fe79798
1 parent bb8d203
commit fe79798
Showing 4 changed files with 847 additions and 813 deletions.
diff --git a/crates/ruff_python_formatter/src/string/any.rs b/crates/ruff_python_formatter/src/string/any.rs
@@ -0,0 +1,212 @@
+use std::iter::FusedIterator;
+
+use memchr::memchr2;
+
+use ruff_python_ast::{
+    self as ast, AnyNodeRef, Expr, ExprBytesLiteral, ExprFString, ExprStringLiteral, ExpressionRef,
+    StringLiteral,
+};
+use ruff_source_file::Locator;
+use ruff_text_size::{Ranged, TextLen, TextRange};
+
+use crate::expression::expr_f_string::f_string_quoting;
+use crate::other::f_string::FormatFString;
+use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind};
+use crate::prelude::*;
+use crate::string::{Quoting, StringPrefix, StringQuotes};
+
+/// Represents any kind of string expression. This could be either a string,
+/// bytes or f-string.
+#[derive(Copy, Clone, Debug)]
+pub(crate) enum AnyString<'a> {
+    String(&'a ExprStringLiteral),
+    Bytes(&'a ExprBytesLiteral),
+    FString(&'a ExprFString),
+}
+
+impl<'a> AnyString<'a> {
+    /// Creates a new [`AnyString`] from the given [`Expr`].
+    ///
+    /// Returns `None` if the expression is not either a string, bytes or f-string.
+    pub(crate) fn from_expression(expression: &'a Expr) -> Option<AnyString<'a>> {
+        match expression {
+            Expr::StringLiteral(string) => Some(AnyString::String(string)),
+            Expr::BytesLiteral(bytes) => Some(AnyString::Bytes(bytes)),
+            Expr::FString(fstring) => Some(AnyString::FString(fstring)),
+            _ => None,
+        }
+    }
+
+    /// Returns `true` if the string is implicitly concatenated.
+    pub(crate) fn is_implicit_concatenated(self) -> bool {
+        match self {
+            Self::String(ExprStringLiteral { value, .. }) => value.is_implicit_concatenated(),
+            Self::Bytes(ExprBytesLiteral { value, .. }) => value.is_implicit_concatenated(),
+            Self::FString(ExprFString { value, .. }) => value.is_implicit_concatenated(),
+        }
+    }
+
+    /// Returns the quoting to be used for this string.
+    pub(super) fn quoting(self, locator: &Locator<'_>) -> Quoting {
+        match self {
+            Self::String(_) | Self::Bytes(_) => Quoting::CanChange,
+            Self::FString(f_string) => f_string_quoting(f_string, locator),
+        }
+    }
+
+    /// Returns a vector of all the [`AnyStringPart`] of this string.
+    pub(super) fn parts(self, quoting: Quoting) -> AnyStringPartsIter<'a> {
+        match self {
+            Self::String(ExprStringLiteral { value, .. }) => {
+                AnyStringPartsIter::String(value.iter())
+            }
+            Self::Bytes(ExprBytesLiteral { value, .. }) => AnyStringPartsIter::Bytes(value.iter()),
+            Self::FString(ExprFString { value, .. }) => {
+                AnyStringPartsIter::FString(value.iter(), quoting)
+            }
+        }
+    }
+
+    pub(crate) fn is_multiline(self, source: &str) -> bool {
+        match self {
+            AnyString::String(_) | AnyString::Bytes(_) => {
+                let contents = &source[self.range()];
+                let prefix = StringPrefix::parse(contents);
+                let quotes = StringQuotes::parse(
+                    &contents[TextRange::new(prefix.text_len(), contents.text_len())],
+                );
+
+                quotes.is_some_and(StringQuotes::is_triple)
+                    && memchr2(b'\n', b'\r', contents.as_bytes()).is_some()
+            }
+            AnyString::FString(fstring) => {
+                memchr2(b'\n', b'\r', source[fstring.range].as_bytes()).is_some()
+            }
+        }
+    }
+}
+
+impl Ranged for AnyString<'_> {
+    fn range(&self) -> TextRange {
+        match self {
+            Self::String(expr) => expr.range(),
+            Self::Bytes(expr) => expr.range(),
+            Self::FString(expr) => expr.range(),
+        }
+    }
+}
+
+impl<'a> From<&AnyString<'a>> for AnyNodeRef<'a> {
+    fn from(value: &AnyString<'a>) -> Self {
+        match value {
+            AnyString::String(expr) => AnyNodeRef::ExprStringLiteral(expr),
+            AnyString::Bytes(expr) => AnyNodeRef::ExprBytesLiteral(expr),
+            AnyString::FString(expr) => AnyNodeRef::ExprFString(expr),
+        }
+    }
+}
+
+impl<'a> From<AnyString<'a>> for AnyNodeRef<'a> {
+    fn from(value: AnyString<'a>) -> Self {
+        AnyNodeRef::from(&value)
+    }
+}
+
+impl<'a> From<&AnyString<'a>> for ExpressionRef<'a> {
+    fn from(value: &AnyString<'a>) -> Self {
+        match value {
+            AnyString::String(expr) => ExpressionRef::StringLiteral(expr),
+            AnyString::Bytes(expr) => ExpressionRef::BytesLiteral(expr),
+            AnyString::FString(expr) => ExpressionRef::FString(expr),
+        }
+    }
+}
+
+pub(super) enum AnyStringPartsIter<'a> {
+    String(std::slice::Iter<'a, StringLiteral>),
+    Bytes(std::slice::Iter<'a, ast::BytesLiteral>),
+    FString(std::slice::Iter<'a, ast::FStringPart>, Quoting),
+}
+
+impl<'a> Iterator for AnyStringPartsIter<'a> {
+    type Item = AnyStringPart<'a>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let part = match self {
+            Self::String(inner) => {
+                let part = inner.next()?;
+                AnyStringPart::String {
+                    part,
+                    layout: StringLiteralKind::String,
+                }
+            }
+            Self::Bytes(inner) => AnyStringPart::Bytes(inner.next()?),
+            Self::FString(inner, quoting) => {
+                let part = inner.next()?;
+                match part {
+                    ast::FStringPart::Literal(string_literal) => AnyStringPart::String {
+                        part: string_literal,
+                        layout: StringLiteralKind::InImplicitlyConcatenatedFString(*quoting),
+                    },
+                    ast::FStringPart::FString(f_string) => AnyStringPart::FString {
+                        part: f_string,
+                        quoting: *quoting,
+                    },
+                }
+            }
+        };
+
+        Some(part)
+    }
+}
+
+impl FusedIterator for AnyStringPartsIter<'_> {}
+
+/// Represents any kind of string which is part of an implicitly concatenated
+/// string. This could be either a string, bytes or f-string.
+///
+/// This is constructed from the [`AnyString::parts`] method on [`AnyString`].
+#[derive(Clone, Debug)]
+pub(super) enum AnyStringPart<'a> {
+    String {
+        part: &'a ast::StringLiteral,
+        layout: StringLiteralKind,
+    },
+    Bytes(&'a ast::BytesLiteral),
+    FString {
+        part: &'a ast::FString,
+        quoting: Quoting,
+    },
+}
+
+impl<'a> From<&AnyStringPart<'a>> for AnyNodeRef<'a> {
+    fn from(value: &AnyStringPart<'a>) -> Self {
+        match value {
+            AnyStringPart::String { part, .. } => AnyNodeRef::StringLiteral(part),
+            AnyStringPart::Bytes(part) => AnyNodeRef::BytesLiteral(part),
+            AnyStringPart::FString { part, .. } => AnyNodeRef::FString(part),
+        }
+    }
+}
+
+impl Ranged for AnyStringPart<'_> {
+    fn range(&self) -> TextRange {
+        match self {
+            Self::String { part, .. } => part.range(),
+            Self::Bytes(part) => part.range(),
+            Self::FString { part, .. } => part.range(),
+        }
+    }
+}
+
+impl Format<PyFormatContext<'_>> for AnyStringPart<'_> {
+    fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
+        match self {
+            AnyStringPart::String { part, layout } => {
+                FormatStringLiteral::new(part, *layout).fmt(f)
+            }
+            AnyStringPart::Bytes(bytes_literal) => bytes_literal.format().fmt(f),
+            AnyStringPart::FString { part, quoting } => FormatFString::new(part, *quoting).fmt(f),
+        }
+    }
+}
diff --git a/crates/ruff_python_formatter/src/string/docstring.rs b/crates/ruff_python_formatter/src/string/docstring.rs
@@ -109,7 +109,7 @@ use super::{NormalizedString, QuoteChar};
 /// `indent-width * spaces` to tabs because doing so could break ASCII art and other docstrings
 /// that use spaces for alignment.
 pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> FormatResult<()> {
-    let docstring = &normalized.text;
+    let docstring = &normalized.text();
 
     // Black doesn't change the indentation of docstrings that contain an escaped newline
     if contains_unescaped_newline(docstring) {
@@ -125,7 +125,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
     let mut lines = docstring.split('\n').peekable();
 
     // Start the string
-    write!(f, [normalized.prefix, normalized.quotes])?;
+    write!(f, [normalized.prefix(), normalized.quotes()])?;
     // We track where in the source docstring we are (in source code byte offsets)
     let mut offset = normalized.start();
 
@@ -141,7 +141,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
 
     // Edge case: The first line is `""" "content`, so we need to insert chaperone space that keep
     // inner quotes and closing quotes from getting to close to avoid `""""content`
-    if trim_both.starts_with(normalized.quotes.quote_char.as_char()) {
+    if trim_both.starts_with(normalized.quotes().quote_char.as_char()) {
         space().fmt(f)?;
     }
 
@@ -168,7 +168,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
         {
             space().fmt(f)?;
         }
-        normalized.quotes.fmt(f)?;
+        normalized.quotes().fmt(f)?;
         return Ok(());
     }
 
@@ -194,7 +194,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
         offset,
         stripped_indentation,
         already_normalized,
-        quote_char: normalized.quotes.quote_char,
+        quote_char: normalized.quotes().quote_char,
         code_example: CodeExample::default(),
     }
     .add_iter(lines)?;
@@ -207,7 +207,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
         space().fmt(f)?;
     }
 
-    write!(f, [normalized.quotes])
+    write!(f, [normalized.quotes()])
 }
 
 fn contains_unescaped_newline(haystack: &str) -> bool {
@@ -1569,7 +1569,7 @@ fn docstring_format_source(
 /// that avoids `content""""` and `content\"""`. This does only applies to un-escaped backslashes,
 /// so `content\\ """` doesn't need a space while `content\\\ """` does.
 fn needs_chaperone_space(normalized: &NormalizedString, trim_end: &str) -> bool {
-    trim_end.ends_with(normalized.quotes.quote_char.as_char())
+    trim_end.ends_with(normalized.quotes().quote_char.as_char())
         || trim_end.chars().rev().take_while(|c| *c == '\\').count() % 2 == 1
 }
 

diff --git a/crates/ruff_python_formatter/src/string/mod.rs b/crates/ruff_python_formatter/src/string/mod.rs
diff --git a/crates/ruff_python_formatter/src/string/normalize.rs b/crates/ruff_python_formatter/src/string/normalize.rs
@@ -0,0 +1,622 @@
+use std::borrow::Cow;
+
+use ruff_source_file::Locator;
+use ruff_text_size::{Ranged, TextRange};
+
+use crate::prelude::*;
+use crate::preview::is_hex_codes_in_unicode_sequences_enabled;
+use crate::string::{QuoteChar, Quoting, StringPart, StringPrefix, StringQuotes};
+use crate::QuoteStyle;
+
+pub(crate) struct StringNormalizer {
+    quoting: Quoting,
+    preferred_quote_style: QuoteStyle,
+    parent_docstring_quote_char: Option<QuoteChar>,
+    normalize_hex: bool,
+}
+
+impl StringNormalizer {
+    pub(crate) fn from_context(context: &PyFormatContext<'_>) -> Self {
+        Self {
+            quoting: Quoting::default(),
+            preferred_quote_style: QuoteStyle::default(),
+            parent_docstring_quote_char: context.docstring(),
+            normalize_hex: is_hex_codes_in_unicode_sequences_enabled(context),
+        }
+    }
+
+    pub(crate) fn with_preferred_quote_style(mut self, quote_style: QuoteStyle) -> Self {
+        self.preferred_quote_style = quote_style;
+        self
+    }
+
+    pub(crate) fn with_quoting(mut self, quoting: Quoting) -> Self {
+        self.quoting = quoting;
+        self
+    }
+
+    /// Computes the strings preferred quotes.
+    pub(crate) fn choose_quotes(&self, string: &StringPart, locator: &Locator) -> StringQuotes {
+        // Per PEP 8, always prefer double quotes for triple-quoted strings.
+        // Except when using quote-style-preserve.
+        let preferred_style = if string.quotes().triple {
+            // ... unless we're formatting a code snippet inside a docstring,
+            // then we specifically want to invert our quote style to avoid
+            // writing out invalid Python.
+            //
+            // It's worth pointing out that we can actually wind up being
+            // somewhat out of sync with PEP8 in this case. Consider this
+            // example:
+            //
+            //     def foo():
+            //         '''
+            //         Something.
+            //
+            //         >>> """tricksy"""
+            //         '''
+            //         pass
+            //
+            // Ideally, this would be reformatted as:
+            //
+            //     def foo():
+            //         """
+            //         Something.
+            //
+            //         >>> '''tricksy'''
+            //         """
+            //         pass
+            //
+            // But the logic here results in the original quoting being
+            // preserved. This is because the quoting style of the outer
+            // docstring is determined, in part, by looking at its contents. In
+            // this case, it notices that it contains a `"""` and thus infers
+            // that using `'''` would overall read better because it avoids
+            // the need to escape the interior `"""`. Except... in this case,
+            // the `"""` is actually part of a code snippet that could get
+            // reformatted to using a different quoting style itself.
+            //
+            // Fixing this would, I believe, require some fairly seismic
+            // changes to how formatting strings works. Namely, we would need
+            // to look for code snippets before normalizing the docstring, and
+            // then figure out the quoting style more holistically by looking
+            // at the various kinds of quotes used in the code snippets and
+            // what reformatting them might look like.
+            //
+            // Overall this is a bit of a corner case and just inverting the
+            // style from what the parent ultimately decided upon works, even
+            // if it doesn't have perfect alignment with PEP8.
+            if let Some(quote) = self.parent_docstring_quote_char {
+                QuoteStyle::from(quote.invert())
+            } else if self.preferred_quote_style.is_preserve() {
+                QuoteStyle::Preserve
+            } else {
+                QuoteStyle::Double
+            }
+        } else {
+            self.preferred_quote_style
+        };
+
+        match self.quoting {
+            Quoting::Preserve => string.quotes(),
+            Quoting::CanChange => {
+                if let Some(preferred_quote) = QuoteChar::from_style(preferred_style) {
+                    let raw_content = locator.slice(string.content_range());
+                    if string.prefix().is_raw_string() {
+                        choose_quotes_for_raw_string(raw_content, string.quotes(), preferred_quote)
+                    } else {
+                        choose_quotes_impl(raw_content, string.quotes(), preferred_quote)
+                    }
+                } else {
+                    string.quotes()
+                }
+            }
+        }
+    }
+
+    /// Computes the strings preferred quotes and normalizes its content.
+    pub(crate) fn normalize<'a>(
+        &self,
+        string: &StringPart,
+        locator: &'a Locator,
+    ) -> NormalizedString<'a> {
+        let raw_content = locator.slice(string.content_range());
+
+        let quotes = self.choose_quotes(string, locator);
+
+        let normalized = normalize_string(raw_content, quotes, string.prefix(), self.normalize_hex);
+
+        NormalizedString {
+            prefix: string.prefix(),
+            content_range: string.content_range(),
+            text: normalized,
+            quotes,
+        }
+    }
+}
+
+#[derive(Debug)]
+pub(crate) struct NormalizedString<'a> {
+    prefix: crate::string::StringPrefix,
+
+    /// The quotes of the normalized string (preferred quotes)
+    quotes: StringQuotes,
+
+    /// The range of the string's content in the source (minus prefix and quotes).
+    content_range: TextRange,
+
+    /// The normalized text
+    text: Cow<'a, str>,
+}
+
+impl<'a> NormalizedString<'a> {
+    pub(crate) fn text(&self) -> &Cow<'a, str> {
+        &self.text
+    }
+
+    pub(crate) fn quotes(&self) -> StringQuotes {
+        self.quotes
+    }
+
+    pub(crate) fn prefix(&self) -> StringPrefix {
+        self.prefix
+    }
+}
+
+impl Ranged for NormalizedString<'_> {
+    fn range(&self) -> TextRange {
+        self.content_range
+    }
+}
+
+impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
+    fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
+        ruff_formatter::write!(f, [self.prefix, self.quotes])?;
+        match &self.text {
+            Cow::Borrowed(_) => {
+                source_text_slice(self.range()).fmt(f)?;
+            }
+            Cow::Owned(normalized) => {
+                text(normalized).fmt(f)?;
+            }
+        }
+        self.quotes.fmt(f)
+    }
+}
+
+/// Choose the appropriate quote style for a raw string.
+///
+/// The preferred quote style is chosen unless the string contains unescaped quotes of the
+/// preferred style. For example, `r"foo"` is chosen over `r'foo'` if the preferred quote
+/// style is double quotes.
+fn choose_quotes_for_raw_string(
+    input: &str,
+    quotes: StringQuotes,
+    preferred_quote: QuoteChar,
+) -> StringQuotes {
+    let preferred_quote_char = preferred_quote.as_char();
+    let mut chars = input.chars().peekable();
+    let contains_unescaped_configured_quotes = loop {
+        match chars.next() {
+            Some('\\') => {
+                // Ignore escaped characters
+                chars.next();
+            }
+            // `"` or `'`
+            Some(c) if c == preferred_quote_char => {
+                if !quotes.triple {
+                    break true;
+                }
+
+                match chars.peek() {
+                    // We can't turn `r'''\""'''` into `r"""\"""""`, this would confuse the parser
+                    // about where the closing triple quotes start
+                    None => break true,
+                    Some(next) if *next == preferred_quote_char => {
+                        // `""` or `''`
+                        chars.next();
+
+                        // We can't turn `r'''""'''` into `r""""""""`, nor can we have
+                        // `"""` or `'''` respectively inside the string
+                        if chars.peek().is_none() || chars.peek() == Some(&preferred_quote_char) {
+                            break true;
+                        }
+                    }
+                    _ => {}
+                }
+            }
+            Some(_) => continue,
+            None => break false,
+        }
+    };
+
+    StringQuotes {
+        triple: quotes.triple,
+        quote_char: if contains_unescaped_configured_quotes {
+            quotes.quote_char
+        } else {
+            preferred_quote
+        },
+    }
+}
+
+/// Choose the appropriate quote style for a string.
+///
+/// For single quoted strings, the preferred quote style is used, unless the alternative quote style
+/// would require fewer escapes.
+///
+/// For triple quoted strings, the preferred quote style is always used, unless the string contains
+/// a triplet of the quote character (e.g., if double quotes are preferred, double quotes will be
+/// used unless the string contains `"""`).
+fn choose_quotes_impl(
+    input: &str,
+    quotes: StringQuotes,
+    preferred_quote: QuoteChar,
+) -> StringQuotes {
+    let quote = if quotes.triple {
+        // True if the string contains a triple quote sequence of the configured quote style.
+        let mut uses_triple_quotes = false;
+        let mut chars = input.chars().peekable();
+
+        while let Some(c) = chars.next() {
+            let preferred_quote_char = preferred_quote.as_char();
+            match c {
+                '\\' => {
+                    if matches!(chars.peek(), Some('"' | '\\')) {
+                        chars.next();
+                    }
+                }
+                // `"` or `'`
+                c if c == preferred_quote_char => {
+                    match chars.peek().copied() {
+                        Some(c) if c == preferred_quote_char => {
+                            // `""` or `''`
+                            chars.next();
+
+                            match chars.peek().copied() {
+                                Some(c) if c == preferred_quote_char => {
+                                    // `"""` or `'''`
+                                    chars.next();
+                                    uses_triple_quotes = true;
+                                    break;
+                                }
+                                Some(_) => {}
+                                None => {
+                                    // Handle `''' ""'''`. At this point we have consumed both
+                                    // double quotes, so on the next iteration the iterator is empty
+                                    // and we'd miss the string ending with a preferred quote
+                                    uses_triple_quotes = true;
+                                    break;
+                                }
+                            }
+                        }
+                        Some(_) => {
+                            // A single quote char, this is ok
+                        }
+                        None => {
+                            // Trailing quote at the end of the comment
+                            uses_triple_quotes = true;
+                            break;
+                        }
+                    }
+                }
+                _ => continue,
+            }
+        }
+
+        if uses_triple_quotes {
+            // String contains a triple quote sequence of the configured quote style.
+            // Keep the existing quote style.
+            quotes.quote_char
+        } else {
+            preferred_quote
+        }
+    } else {
+        let mut single_quotes = 0u32;
+        let mut double_quotes = 0u32;
+
+        for c in input.chars() {
+            match c {
+                '\'' => {
+                    single_quotes += 1;
+                }
+
+                '"' => {
+                    double_quotes += 1;
+                }
+
+                _ => continue,
+            }
+        }
+
+        match preferred_quote {
+            QuoteChar::Single => {
+                if single_quotes > double_quotes {
+                    QuoteChar::Double
+                } else {
+                    QuoteChar::Single
+                }
+            }
+            QuoteChar::Double => {
+                if double_quotes > single_quotes {
+                    QuoteChar::Single
+                } else {
+                    QuoteChar::Double
+                }
+            }
+        }
+    };
+
+    StringQuotes {
+        triple: quotes.triple,
+        quote_char: quote,
+    }
+}
+
+/// Adds the necessary quote escapes and removes unnecessary escape sequences when quoting `input`
+/// with the provided [`StringQuotes`] style.
+///
+/// Returns the normalized string and whether it contains new lines.
+pub(crate) fn normalize_string(
+    input: &str,
+    quotes: StringQuotes,
+    prefix: StringPrefix,
+    normalize_hex: bool,
+) -> Cow<str> {
+    // The normalized string if `input` is not yet normalized.
+    // `output` must remain empty if `input` is already normalized.
+    let mut output = String::new();
+    // Tracks the last index of `input` that has been written to `output`.
+    // If `last_index` is `0` at the end, then the input is already normalized and can be returned as is.
+    let mut last_index = 0;
+
+    let quote = quotes.quote_char;
+    let preferred_quote = quote.as_char();
+    let opposite_quote = quote.invert().as_char();
+
+    let mut chars = input.char_indices().peekable();
+
+    let is_raw = prefix.is_raw_string();
+    let is_fstring = prefix.is_fstring();
+    let mut formatted_value_nesting = 0u32;
+
+    while let Some((index, c)) = chars.next() {
+        if is_fstring && matches!(c, '{' | '}') {
+            if chars.peek().copied().is_some_and(|(_, next)| next == c) {
+                // Skip over the second character of the double braces
+                chars.next();
+            } else if c == '{' {
+                formatted_value_nesting += 1;
+            } else {
+                // Safe to assume that `c == '}'` here because of the matched pattern above
+                formatted_value_nesting = formatted_value_nesting.saturating_sub(1);
+            }
+            continue;
+        }
+        if c == '\r' {
+            output.push_str(&input[last_index..index]);
+
+            // Skip over the '\r' character, keep the `\n`
+            if chars.peek().copied().is_some_and(|(_, next)| next == '\n') {
+                chars.next();
+            }
+            // Replace the `\r` with a `\n`
+            else {
+                output.push('\n');
+            }
+
+            last_index = index + '\r'.len_utf8();
+        } else if !is_raw {
+            if c == '\\' {
+                if let Some((_, next)) = chars.clone().next() {
+                    if next == '\\' {
+                        // Skip over escaped backslashes
+                        chars.next();
+                    } else if normalize_hex {
+                        if let Some(normalised) = UnicodeEscape::new(next, !prefix.is_byte())
+                            .and_then(|escape| {
+                                escape.normalize(&input[index + c.len_utf8() + next.len_utf8()..])
+                            })
+                        {
+                            // Length of the `\` plus the length of the escape sequence character (`u` | `U` | `x`)
+                            let escape_start_len = '\\'.len_utf8() + next.len_utf8();
+                            let escape_start_offset = index + escape_start_len;
+                            if let Cow::Owned(normalised) = &normalised {
+                                output.push_str(&input[last_index..escape_start_offset]);
+                                output.push_str(normalised);
+                                last_index = escape_start_offset + normalised.len();
+                            };
+
+                            // Move the `chars` iterator passed the escape sequence.
+                            // Simply reassigning `chars` doesn't work because the indices` would
+                            // then be off.
+                            for _ in 0..next.len_utf8() + normalised.len() {
+                                chars.next();
+                            }
+                        }
+                    }
+
+                    if !quotes.triple {
+                        #[allow(clippy::if_same_then_else)]
+                        if next == opposite_quote && formatted_value_nesting == 0 {
+                            // Remove the escape by ending before the backslash and starting again with the quote
+                            chars.next();
+                            output.push_str(&input[last_index..index]);
+                            last_index = index + '\\'.len_utf8();
+                        } else if next == preferred_quote {
+                            // Quote is already escaped, skip over it.
+                            chars.next();
+                        }
+                    }
+                }
+            } else if !quotes.triple && c == preferred_quote && formatted_value_nesting == 0 {
+                // Escape the quote
+                output.push_str(&input[last_index..index]);
+                output.push('\\');
+                output.push(c);
+                last_index = index + preferred_quote.len_utf8();
+            }
+        }
+    }
+
+    let normalized = if last_index == 0 {
+        Cow::Borrowed(input)
+    } else {
+        output.push_str(&input[last_index..]);
+        Cow::Owned(output)
+    };
+
+    normalized
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+enum UnicodeEscape {
+    /// A hex escape sequence of either 2 (`\x`), 4 (`\u`) or 8 (`\U`) hex characters.
+    Hex(usize),
+
+    /// An escaped unicode name (`\N{name}`)
+    CharacterName,
+}
+
+impl UnicodeEscape {
+    fn new(first: char, allow_unicode: bool) -> Option<UnicodeEscape> {
+        Some(match first {
+            'x' => UnicodeEscape::Hex(2),
+            'u' if allow_unicode => UnicodeEscape::Hex(4),
+            'U' if allow_unicode => UnicodeEscape::Hex(8),
+            'N' if allow_unicode => UnicodeEscape::CharacterName,
+            _ => return None,
+        })
+    }
+
+    /// Normalises `\u..`, `\U..`, `\x..` and `\N{..}` escape sequences to:
+    ///
+    /// * `\u`, `\U'` and `\x`: To use lower case for the characters `a-f`.
+    /// * `\N`: To use uppercase letters
+    fn normalize(self, input: &str) -> Option<Cow<str>> {
+        let mut normalised = String::new();
+
+        let len = match self {
+            UnicodeEscape::Hex(len) => {
+                // It's not a valid escape sequence if the input string has fewer characters
+                // left than required by the escape sequence.
+                if input.len() < len {
+                    return None;
+                }
+
+                for (index, c) in input.char_indices().take(len) {
+                    match c {
+                        '0'..='9' | 'a'..='f' => {
+                            if !normalised.is_empty() {
+                                normalised.push(c);
+                            }
+                        }
+                        'A'..='F' => {
+                            if normalised.is_empty() {
+                                normalised.reserve(len);
+                                normalised.push_str(&input[..index]);
+                                normalised.push(c.to_ascii_lowercase());
+                            } else {
+                                normalised.push(c.to_ascii_lowercase());
+                            }
+                        }
+                        _ => {
+                            // not a valid escape sequence
+                            return None;
+                        }
+                    }
+                }
+
+                len
+            }
+            UnicodeEscape::CharacterName => {
+                let mut char_indices = input.char_indices();
+
+                if !matches!(char_indices.next(), Some((_, '{'))) {
+                    return None;
+                }
+
+                loop {
+                    if let Some((index, c)) = char_indices.next() {
+                        match c {
+                            '}' => {
+                                if !normalised.is_empty() {
+                                    normalised.push('}');
+                                }
+
+                                // Name must be at least two characters long.
+                                if index < 3 {
+                                    return None;
+                                }
+
+                                break index + '}'.len_utf8();
+                            }
+                            '0'..='9' | 'A'..='Z' | ' ' | '-' => {
+                                if !normalised.is_empty() {
+                                    normalised.push(c);
+                                }
+                            }
+                            'a'..='z' => {
+                                if normalised.is_empty() {
+                                    normalised.reserve(c.len_utf8() + '}'.len_utf8());
+                                    normalised.push_str(&input[..index]);
+                                    normalised.push(c.to_ascii_uppercase());
+                                } else {
+                                    normalised.push(c.to_ascii_uppercase());
+                                }
+                            }
+                            _ => {
+                                // Seems like an invalid escape sequence, don't normalise it.
+                                return None;
+                            }
+                        }
+                    } else {
+                        // Unterminated escape sequence, don't normalise it.
+                        return None;
+                    }
+                }
+            }
+        };
+
+        Some(if normalised.is_empty() {
+            Cow::Borrowed(&input[..len])
+        } else {
+            Cow::Owned(normalised)
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::borrow::Cow;
+
+    use crate::string::{QuoteChar, StringPrefix, StringQuotes};
+
+    use super::{normalize_string, UnicodeEscape};
+
+    #[test]
+    fn normalize_32_escape() {
+        let escape_sequence = UnicodeEscape::new('U', true).unwrap();
+
+        assert_eq!(
+            Some(Cow::Owned("0001f60e".to_string())),
+            escape_sequence.normalize("0001F60E")
+        );
+    }
+
+    #[test]
+    fn normalize_hex_in_byte_string() {
+        let input = r"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A";
+
+        let normalized = normalize_string(
+            input,
+            StringQuotes {
+                triple: false,
+                quote_char: QuoteChar::Double,
+            },
+            StringPrefix::BYTE,
+            true,
+        );
+
+        assert_eq!(r"\x89\x50\x4e\x47\x0d\x0a\x1a\x0a", &normalized);
+    }
+}