From 9d442f0a2d28925794c68232beb7c26405534495 Mon Sep 17 00:00:00 2001 From: Ary Borenszweig Date: Thu, 5 Dec 2024 10:34:09 -0300 Subject: [PATCH 1/2] feat: show printable byte arrays as byte strings in SSA --- .../src/ssa/ir/instruction/call.rs | 12 ++-- .../noirc_evaluator/src/ssa/ir/printer.rs | 44 +++++++++++++ .../noirc_evaluator/src/ssa/parser/lexer.rs | 24 +++++-- .../noirc_evaluator/src/ssa/parser/mod.rs | 66 +++++++++++++++++-- .../noirc_evaluator/src/ssa/parser/tests.rs | 30 ++++++++- .../noirc_evaluator/src/ssa/parser/token.rs | 2 + 6 files changed, 156 insertions(+), 22 deletions(-) diff --git a/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs b/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs index 72d781c6e95..7709e5bc0e1 100644 --- a/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs +++ b/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs @@ -842,27 +842,27 @@ mod tests { #[test] fn simplify_derive_generators_has_correct_type() { - let src = " + let src = r#" brillig(inline) fn main f0 { b0(): - v0 = make_array [u8 68, u8 69, u8 70, u8 65, u8 85, u8 76, u8 84, u8 95, u8 68, u8 79, u8 77, u8 65, u8 73, u8 78, u8 95, u8 83, u8 69, u8 80, u8 65, u8 82, u8 65, u8 84, u8 79, u8 82] : [u8; 24] + v0 = make_array b"DEFAULT_DOMAIN_SEPARATOR" // This call was previously incorrectly simplified to something that returned `[Field; 3]` v2 = call derive_pedersen_generators(v0, u32 0) -> [(Field, Field, u1); 1] return v2 } - "; + "#; let ssa = Ssa::from_str(src).unwrap(); - let expected = " + let expected = r#" brillig(inline) fn main f0 { b0(): - v15 = make_array [u8 68, u8 69, u8 70, u8 65, u8 85, u8 76, u8 84, u8 95, u8 68, u8 79, u8 77, u8 65, u8 73, u8 78, u8 95, u8 83, u8 69, u8 80, u8 65, u8 82, u8 65, u8 84, u8 79, u8 82] : [u8; 24] + v15 = make_array b"DEFAULT_DOMAIN_SEPARATOR" v19 = make_array [Field 3728882899078719075161482178784387565366481897740339799480980287259621149274, Field -9903063709032878667290627648209915537972247634463802596148419711785767431332, u1 0] : [(Field, Field, u1); 1] return v19 } - "; + "#; assert_normalized_ssa_equals(ssa, expected); } } diff --git a/compiler/noirc_evaluator/src/ssa/ir/printer.rs b/compiler/noirc_evaluator/src/ssa/ir/printer.rs index 5a451301d24..aa2952d5abc 100644 --- a/compiler/noirc_evaluator/src/ssa/ir/printer.rs +++ b/compiler/noirc_evaluator/src/ssa/ir/printer.rs @@ -5,8 +5,11 @@ use std::{ }; use acvm::acir::AcirField; +use im::Vector; use iter_extended::vecmap; +use crate::ssa::ir::types::{NumericType, Type}; + use super::{ basic_block::BasicBlockId, dfg::DataFlowGraph, @@ -220,6 +223,28 @@ fn display_instruction_inner( ) } Instruction::MakeArray { elements, typ } => { + // If the array is a byte array, we check if all the bytes are printable ascii characters + // and, if so, we print the array as a string literal (easier to understand). + // It could happen that the byte array is a random byte sequence that happens to be printable + // (it didn't come from a string literal) but this still reduces the noise in the output + // and actually represents the same value. + let (element_types, is_slice) = match typ { + Type::Array(types, _) => (types, false), + Type::Slice(types) => (types, true), + _ => panic!("Expected array or slice type for MakeArray"), + }; + if element_types.len() == 1 + && element_types[0] == Type::Numeric(NumericType::Unsigned { bit_size: 8 }) + { + if let Some(string) = try_byte_array_to_string(elements, function) { + if is_slice { + return writeln!(f, "make_array &b{:?}", string); + } else { + return writeln!(f, "make_array b{:?}", string); + } + } + } + write!(f, "make_array [")?; for (i, element) in elements.iter().enumerate() { @@ -234,6 +259,25 @@ fn display_instruction_inner( } } +fn try_byte_array_to_string(elements: &Vector, function: &Function) -> Option { + let mut string = String::new(); + for element in elements { + let element = function.dfg.get_numeric_constant(*element)?; + let element = element.try_to_u32()?; + if element > 0xFF { + return None; + } + let byte = element as u8; + if byte.is_ascii_alphanumeric() || byte.is_ascii_punctuation() || byte.is_ascii_whitespace() + { + string.push(byte as char); + } else { + return None; + } + } + Some(string) +} + fn result_types(function: &Function, results: &[ValueId]) -> String { let types = vecmap(results, |result| function.dfg.type_of_value(*result).to_string()); if types.is_empty() { diff --git a/compiler/noirc_evaluator/src/ssa/parser/lexer.rs b/compiler/noirc_evaluator/src/ssa/parser/lexer.rs index d89bc1e9e28..5b66810c641 100644 --- a/compiler/noirc_evaluator/src/ssa/parser/lexer.rs +++ b/compiler/noirc_evaluator/src/ssa/parser/lexer.rs @@ -62,6 +62,7 @@ impl<'a> Lexer<'a> { Some('-') if self.peek_char() == Some('>') => self.double_char_token(Token::Arrow), Some('-') => self.single_char_token(Token::Dash), Some('"') => self.eat_string_literal(), + Some('b') if self.peek_char() == Some('"') => self.eat_byte_string_literal(), Some(ch) if ch.is_ascii_alphanumeric() || ch == '_' => self.eat_alpha_numeric(ch), Some(char) => Err(LexerError::UnexpectedCharacter { char, @@ -180,8 +181,23 @@ impl<'a> Lexer<'a> { fn eat_string_literal(&mut self) -> SpannedTokenResult { let start = self.position; - let mut string = String::new(); + let string = self.eat_string(start)?; + let str_literal_token = Token::Str(string); + let end = self.position; + Ok(str_literal_token.into_span(start, end)) + } + + fn eat_byte_string_literal(&mut self) -> SpannedTokenResult { + let start = self.position; + self.next_char(); // skip the b + let string = self.eat_string(start)?; + let str_literal_token = Token::ByteStr(string); + let end = self.position; + Ok(str_literal_token.into_span(start, end)) + } + fn eat_string(&mut self, start: u32) -> Result { + let mut string = String::new(); while let Some(next) = self.next_char() { let char = match next { '"' => break, @@ -206,11 +222,7 @@ impl<'a> Lexer<'a> { string.push(char); } - - let str_literal_token = Token::Str(string); - - let end = self.position; - Ok(str_literal_token.into_span(start, end)) + Ok(string) } fn eat_while bool>( diff --git a/compiler/noirc_evaluator/src/ssa/parser/mod.rs b/compiler/noirc_evaluator/src/ssa/parser/mod.rs index 506d2df3dea..0b6a8927e97 100644 --- a/compiler/noirc_evaluator/src/ssa/parser/mod.rs +++ b/compiler/noirc_evaluator/src/ssa/parser/mod.rs @@ -4,7 +4,10 @@ use std::{ }; use super::{ - ir::{instruction::BinaryOp, types::Type}, + ir::{ + instruction::BinaryOp, + types::{NumericType, Type}, + }, Ssa, }; @@ -448,12 +451,39 @@ impl<'a> Parser<'a> { } if self.eat_keyword(Keyword::MakeArray)? { - self.eat_or_error(Token::LeftBracket)?; - let elements = self.parse_comma_separated_values()?; - self.eat_or_error(Token::RightBracket)?; - self.eat_or_error(Token::Colon)?; - let typ = self.parse_type()?; - return Ok(ParsedInstruction::MakeArray { target, elements, typ }); + if self.eat(Token::Ampersand)? { + let Some(string) = self.eat_byte_str()? else { + return self.expected_byte_string(); + }; + let u8 = Type::Numeric(NumericType::Unsigned { bit_size: 8 }); + let typ = Type::Slice(Arc::new(vec![u8.clone()])); + let elements = string + .bytes() + .map(|byte| ParsedValue::NumericConstant { + constant: FieldElement::from(byte as u128), + typ: u8.clone(), + }) + .collect(); + return Ok(ParsedInstruction::MakeArray { target, elements, typ }); + } else if let Some(string) = self.eat_byte_str()? { + let u8 = Type::Numeric(NumericType::Unsigned { bit_size: 8 }); + let typ = Type::Array(Arc::new(vec![u8.clone()]), string.bytes().count() as u32); + let elements = string + .bytes() + .map(|byte| ParsedValue::NumericConstant { + constant: FieldElement::from(byte as u128), + typ: u8.clone(), + }) + .collect(); + return Ok(ParsedInstruction::MakeArray { target, elements, typ }); + } else { + self.eat_or_error(Token::LeftBracket)?; + let elements = self.parse_comma_separated_values()?; + self.eat_or_error(Token::RightBracket)?; + self.eat_or_error(Token::Colon)?; + let typ = self.parse_type()?; + return Ok(ParsedInstruction::MakeArray { target, elements, typ }); + } } if self.eat_keyword(Keyword::Not)? { @@ -796,6 +826,18 @@ impl<'a> Parser<'a> { } } + fn eat_byte_str(&mut self) -> ParseResult> { + if matches!(self.token.token(), Token::ByteStr(..)) { + let token = self.bump()?; + match token.into_token() { + Token::ByteStr(string) => Ok(Some(string)), + _ => unreachable!(), + } + } else { + Ok(None) + } + } + fn eat(&mut self, token: Token) -> ParseResult { if self.token.token() == &token { self.bump()?; @@ -848,6 +890,13 @@ impl<'a> Parser<'a> { }) } + fn expected_byte_string(&mut self) -> ParseResult { + Err(ParserError::ExpectedByteString { + found: self.token.token().clone(), + span: self.token.to_span(), + }) + } + fn expected_identifier(&mut self) -> ParseResult { Err(ParserError::ExpectedIdentifier { found: self.token.token().clone(), @@ -911,6 +960,8 @@ pub(crate) enum ParserError { ExpectedInstructionOrTerminator { found: Token, span: Span }, #[error("Expected a string literal or 'data', found '{found}'")] ExpectedStringOrData { found: Token, span: Span }, + #[error("Expected a byte string literal, found '{found}'")] + ExpectedByteString { found: Token, span: Span }, #[error("Expected a value, found '{found}'")] ExpectedValue { found: Token, span: Span }, #[error("Multiple return values only allowed for call")] @@ -928,6 +979,7 @@ impl ParserError { | ParserError::ExpectedType { span, .. } | ParserError::ExpectedInstructionOrTerminator { span, .. } | ParserError::ExpectedStringOrData { span, .. } + | ParserError::ExpectedByteString { span, .. } | ParserError::ExpectedValue { span, .. } => *span, ParserError::MultipleReturnValuesOnlyAllowedForCall { second_target, .. } => { second_target.span diff --git a/compiler/noirc_evaluator/src/ssa/parser/tests.rs b/compiler/noirc_evaluator/src/ssa/parser/tests.rs index 593b66d0c98..6318f9dc56e 100644 --- a/compiler/noirc_evaluator/src/ssa/parser/tests.rs +++ b/compiler/noirc_evaluator/src/ssa/parser/tests.rs @@ -89,6 +89,30 @@ fn test_make_composite_array() { assert_ssa_roundtrip(src); } +#[test] +fn test_make_byte_array_with_string_literal() { + let src = " + acir(inline) fn main f0 { + b0(): + v9 = make_array b\"Hello world!\" + return v9 + } + "; + assert_ssa_roundtrip(src); +} + +#[test] +fn test_make_byte_slice_with_string_literal() { + let src = " + acir(inline) fn main f0 { + b0(): + v9 = make_array &b\"Hello world!\" + return v9 + } + "; + assert_ssa_roundtrip(src); +} + #[test] fn test_block_parameters() { let src = " @@ -228,14 +252,14 @@ fn test_constrain_with_static_message() { #[test] fn test_constrain_with_dynamic_message() { - let src = " + let src = r#" acir(inline) fn main f0 { b0(v0: Field, v1: Field): - v7 = make_array [u8 123, u8 120, u8 125, u8 32, u8 123, u8 121, u8 125] : [u8; 7] + v7 = make_array b"{x} {y}" constrain v0 == Field 1, data v7, u32 2, v0, v1 return } - "; + "#; assert_ssa_roundtrip(src); } diff --git a/compiler/noirc_evaluator/src/ssa/parser/token.rs b/compiler/noirc_evaluator/src/ssa/parser/token.rs index d8dd4ec011e..83a2a1d1ed2 100644 --- a/compiler/noirc_evaluator/src/ssa/parser/token.rs +++ b/compiler/noirc_evaluator/src/ssa/parser/token.rs @@ -30,6 +30,7 @@ pub(crate) enum Token { Ident(String), Int(FieldElement), Str(String), + ByteStr(String), Keyword(Keyword), IntType(IntType), /// = @@ -79,6 +80,7 @@ impl Display for Token { Token::Ident(ident) => write!(f, "{}", ident), Token::Int(int) => write!(f, "{}", int), Token::Str(string) => write!(f, "{string:?}"), + Token::ByteStr(string) => write!(f, "{string:?}"), Token::Keyword(keyword) => write!(f, "{}", keyword), Token::IntType(int_type) => write!(f, "{}", int_type), Token::Assign => write!(f, "="), From b7085eab8f8fda6d54fc996d195b1b348f515663 Mon Sep 17 00:00:00 2001 From: Ary Borenszweig Date: Thu, 5 Dec 2024 10:46:36 -0300 Subject: [PATCH 2/2] clippy --- compiler/noirc_evaluator/src/ssa/parser/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/noirc_evaluator/src/ssa/parser/mod.rs b/compiler/noirc_evaluator/src/ssa/parser/mod.rs index 0b6a8927e97..24a5ff43071 100644 --- a/compiler/noirc_evaluator/src/ssa/parser/mod.rs +++ b/compiler/noirc_evaluator/src/ssa/parser/mod.rs @@ -467,7 +467,7 @@ impl<'a> Parser<'a> { return Ok(ParsedInstruction::MakeArray { target, elements, typ }); } else if let Some(string) = self.eat_byte_str()? { let u8 = Type::Numeric(NumericType::Unsigned { bit_size: 8 }); - let typ = Type::Array(Arc::new(vec![u8.clone()]), string.bytes().count() as u32); + let typ = Type::Array(Arc::new(vec![u8.clone()]), string.len() as u32); let elements = string .bytes() .map(|byte| ParsedValue::NumericConstant {