Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[internal] ComparableExpr (f)strings and bytes made invariant under concatenation #13301

Merged
merged 18 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 110 additions & 39 deletions crates/ruff_python_ast/src/comparable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
//! an implicit concatenation of string literals, as these expressions are considered to
//! have the same shape in that they evaluate to the same value.

use std::borrow::Cow;

use crate as ast;

#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)]
Expand Down Expand Up @@ -511,7 +513,7 @@ impl<'a> From<&'a ast::ExceptHandler> for ComparableExceptHandler<'a> {

#[derive(Debug, PartialEq, Eq, Hash)]
pub enum ComparableFStringElement<'a> {
Literal(&'a str),
Literal(Cow<'a, str>),
FStringExpressionElement(FStringExpressionElement<'a>),
}

Expand All @@ -527,23 +529,34 @@ impl<'a> From<&'a ast::FStringElement> for ComparableFStringElement<'a> {
fn from(fstring_element: &'a ast::FStringElement) -> Self {
match fstring_element {
ast::FStringElement::Literal(ast::FStringLiteralElement { value, .. }) => {
Self::Literal(value)
}
ast::FStringElement::Expression(formatted_value) => {
Self::FStringExpressionElement(FStringExpressionElement {
expression: (&formatted_value.expression).into(),
debug_text: formatted_value.debug_text.as_ref(),
conversion: formatted_value.conversion,
format_spec: formatted_value
.format_spec
.as_ref()
.map(|spec| spec.elements.iter().map(Into::into).collect()),
})
Self::Literal(value.as_ref().into())
}
ast::FStringElement::Expression(formatted_value) => formatted_value.into(),
}
}
}

impl<'a> From<&'a ast::FStringExpressionElement> for ComparableFStringElement<'a> {
fn from(fstring_expression_element: &'a ast::FStringExpressionElement) -> Self {
let ast::FStringExpressionElement {
expression,
debug_text,
conversion,
format_spec,
range: _,
} = fstring_expression_element;

Self::FStringExpressionElement(FStringExpressionElement {
expression: (expression).into(),
debug_text: debug_text.as_ref(),
conversion: *conversion,
format_spec: format_spec
.as_ref()
.map(|spec| spec.elements.iter().map(Into::into).collect()),
})
}
}

#[derive(Debug, PartialEq, Eq, Hash)]
pub struct ComparableElifElseClause<'a> {
test: Option<ComparableExpr<'a>>,
Expand Down Expand Up @@ -597,28 +610,82 @@ impl<'a> From<ast::LiteralExpressionRef<'a>> for ComparableLiteral<'a> {

#[derive(Debug, PartialEq, Eq, Hash)]
pub struct ComparableFString<'a> {
elements: Vec<ComparableFStringElement<'a>>,
}
elements: Box<[ComparableFStringElement<'a>]>,
}

impl<'a> From<&'a ast::FStringValue> for ComparableFString<'a> {
// The approach below is somewhat complicated, so it may
// require some justification.
//
// Suppose given an f-string of the form
// `f"{foo!r} one" " and two " f" and three {bar!s}"`
// This decomposes as:
// - An `FStringPart::FString`, `f"{foo!r} one"` with elements
// - `FStringElement::Expression` encoding `{foo!r}`
// - `FStringElement::Literal` encoding " one"
// - An `FStringPart::Literal` capturing `" and two "`
// - An `FStringPart::FString`, `f" and three {bar!s}"` with elements
// - `FStringElement::Literal` encoding " and three "
// - `FStringElement::Expression` encoding `{bar!s}`
//
// We would like to extract from this a vector of (comparable) f-string
// _elements_ which alternate between expression elements and literal
// elements. In order to do so, we need to concatenate adjacent string
// literals. String literals may be separated for two reasons: either
// they appear in adjacent string literal parts, or else a string literal
// part is adjacent to a string literal _element_ inside of an f-string part.
fn from(value: &'a ast::FStringValue) -> Self {
MichaReiser marked this conversation as resolved.
Show resolved Hide resolved
#[derive(Default)]
struct Collector<'a> {
elements: Vec<ComparableFStringElement<'a>>,
}

impl<'a> From<&'a ast::FString> for ComparableFString<'a> {
fn from(fstring: &'a ast::FString) -> Self {
Self {
elements: fstring.elements.iter().map(Into::into).collect(),
impl<'a> Collector<'a> {
// The logic for concatenating adjacent string literals
// occurs here, implicitly: when we encounter a sequence
// of string literals, the first gets pushed to the
// `elements` vector, while subsequent strings
// are concatenated onto this top string.
fn push_literal(&mut self, literal: &'a str) {
if let Some(ComparableFStringElement::Literal(existing_literal)) =
self.elements.last_mut()
{
existing_literal.to_mut().push_str(literal);
} else {
self.elements
.push(ComparableFStringElement::Literal(literal.into()));
}
}

fn push_expression(&mut self, expression: &'a ast::FStringExpressionElement) {
self.elements.push(expression.into());
}
}
}
}

#[derive(Debug, PartialEq, Eq, Hash)]
pub enum ComparableFStringPart<'a> {
Literal(ComparableStringLiteral<'a>),
FString(ComparableFString<'a>),
}
let mut collector = Collector::default();

for part in value {
match part {
ast::FStringPart::Literal(string_literal) => {
collector.push_literal(&string_literal.value);
}
ast::FStringPart::FString(fstring) => {
for element in &fstring.elements {
match element {
ast::FStringElement::Literal(literal) => {
collector.push_literal(&literal.value);
}
ast::FStringElement::Expression(expression) => {
collector.push_expression(expression);
}
}
}
}
}
}

impl<'a> From<&'a ast::FStringPart> for ComparableFStringPart<'a> {
fn from(f_string_part: &'a ast::FStringPart) -> Self {
match f_string_part {
ast::FStringPart::Literal(string_literal) => Self::Literal(string_literal.into()),
ast::FStringPart::FString(f_string) => Self::FString(f_string.into()),
Self {
elements: collector.elements.into_boxed_slice(),
}
}
}
Expand All @@ -638,13 +705,13 @@ impl<'a> From<&'a ast::StringLiteral> for ComparableStringLiteral<'a> {

#[derive(Debug, PartialEq, Eq, Hash)]
pub struct ComparableBytesLiteral<'a> {
value: &'a [u8],
value: Cow<'a, [u8]>,
}

impl<'a> From<&'a ast::BytesLiteral> for ComparableBytesLiteral<'a> {
fn from(bytes_literal: &'a ast::BytesLiteral) -> Self {
Self {
value: &bytes_literal.value,
value: Cow::Borrowed(&bytes_literal.value),
}
}
}
Expand Down Expand Up @@ -775,17 +842,17 @@ pub struct ExprFStringExpressionElement<'a> {

#[derive(Debug, PartialEq, Eq, Hash)]
pub struct ExprFString<'a> {
parts: Vec<ComparableFStringPart<'a>>,
value: ComparableFString<'a>,
}

#[derive(Debug, PartialEq, Eq, Hash)]
pub struct ExprStringLiteral<'a> {
parts: Vec<ComparableStringLiteral<'a>>,
value: ComparableStringLiteral<'a>,
}

#[derive(Debug, PartialEq, Eq, Hash)]
pub struct ExprBytesLiteral<'a> {
parts: Vec<ComparableBytesLiteral<'a>>,
value: ComparableBytesLiteral<'a>,
}

#[derive(Debug, PartialEq, Eq, Hash)]
Expand Down Expand Up @@ -1019,17 +1086,21 @@ impl<'a> From<&'a ast::Expr> for ComparableExpr<'a> {
}),
ast::Expr::FString(ast::ExprFString { value, range: _ }) => {
Self::FString(ExprFString {
parts: value.iter().map(Into::into).collect(),
value: value.into(),
})
}
ast::Expr::StringLiteral(ast::ExprStringLiteral { value, range: _ }) => {
Self::StringLiteral(ExprStringLiteral {
parts: value.iter().map(Into::into).collect(),
value: ComparableStringLiteral {
value: value.to_str(),
},
})
}
ast::Expr::BytesLiteral(ast::ExprBytesLiteral { value, range: _ }) => {
Self::BytesLiteral(ExprBytesLiteral {
parts: value.iter().map(Into::into).collect(),
value: ComparableBytesLiteral {
value: Cow::from(value),
},
})
}
ast::Expr::NumberLiteral(ast::ExprNumberLiteral { value, range: _ }) => {
Expand Down
17 changes: 17 additions & 0 deletions crates/ruff_python_ast/src/nodes.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#![allow(clippy::derive_partial_eq_without_eq)]

use std::borrow::Cow;
use std::fmt;
use std::fmt::Debug;
use std::iter::FusedIterator;
Expand Down Expand Up @@ -2186,6 +2187,22 @@ impl PartialEq<[u8]> for BytesLiteralValue {
}
}

impl<'a> From<&'a BytesLiteralValue> for Cow<'a, [u8]> {
fn from(value: &'a BytesLiteralValue) -> Self {
match &value.inner {
BytesLiteralValueInner::Single(BytesLiteral {
value: bytes_value, ..
}) => Cow::from(bytes_value.as_ref()),
BytesLiteralValueInner::Concatenated(bytes_literal_vec) => Cow::Owned(
bytes_literal_vec
.iter()
.flat_map(|bytes_literal| bytes_literal.value.to_vec())
.collect::<Vec<u8>>(),
),
}
}
}

/// An internal representation of [`BytesLiteralValue`].
#[derive(Clone, Debug, PartialEq)]
enum BytesLiteralValueInner {
Expand Down
47 changes: 47 additions & 0 deletions crates/ruff_python_ast_integration_tests/tests/comparable.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
use ruff_python_ast::comparable::ComparableExpr;
use ruff_python_parser::{parse_expression, ParseError};

#[test]
fn concatenated_strings_compare_equal() -> Result<(), ParseError> {
let split_contents = r#"'a' 'b' r'\n raw'"#;
let value_contents = r#"'ab\\n raw'"#;

let split_parsed = parse_expression(split_contents)?;
let value_parsed = parse_expression(value_contents)?;

let split_compr = ComparableExpr::from(split_parsed.expr());
let value_compr = ComparableExpr::from(value_parsed.expr());

assert_eq!(split_compr, value_compr);
Ok(())
}

#[test]
fn concatenated_bytes_compare_equal() -> Result<(), ParseError> {
let split_contents = r#"b'a' b'b'"#;
let value_contents = r#"b'ab'"#;

let split_parsed = parse_expression(split_contents)?;
let value_parsed = parse_expression(value_contents)?;

let split_compr = ComparableExpr::from(split_parsed.expr());
let value_compr = ComparableExpr::from(value_parsed.expr());

assert_eq!(split_compr, value_compr);
Ok(())
}

#[test]
fn concatenated_fstrings_compare_equal() -> Result<(), ParseError> {
let split_contents = r#"f"{foo!r} this" r"\n raw" f" and {bar!s} that""#;
let value_contents = r#"f"{foo!r} this\\n raw and {bar!s} that""#;

let split_parsed = parse_expression(split_contents)?;
let value_parsed = parse_expression(value_contents)?;

let split_compr = ComparableExpr::from(split_parsed.expr());
let value_compr = ComparableExpr::from(value_parsed.expr());

assert_eq!(split_compr, value_compr);
Ok(())
}
Loading