Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: show printable byte arrays as byte strings in SSA #6709

Merged
merged 2 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs
Original file line number Diff line number Diff line change
Expand Up @@ -842,27 +842,27 @@ mod tests {

#[test]
fn simplify_derive_generators_has_correct_type() {
let src = "
let src = r#"
brillig(inline) fn main f0 {
b0():
v0 = make_array [u8 68, u8 69, u8 70, u8 65, u8 85, u8 76, u8 84, u8 95, u8 68, u8 79, u8 77, u8 65, u8 73, u8 78, u8 95, u8 83, u8 69, u8 80, u8 65, u8 82, u8 65, u8 84, u8 79, u8 82] : [u8; 24]
v0 = make_array b"DEFAULT_DOMAIN_SEPARATOR"

// This call was previously incorrectly simplified to something that returned `[Field; 3]`
v2 = call derive_pedersen_generators(v0, u32 0) -> [(Field, Field, u1); 1]

return v2
}
";
"#;
let ssa = Ssa::from_str(src).unwrap();

let expected = "
let expected = r#"
brillig(inline) fn main f0 {
b0():
v15 = make_array [u8 68, u8 69, u8 70, u8 65, u8 85, u8 76, u8 84, u8 95, u8 68, u8 79, u8 77, u8 65, u8 73, u8 78, u8 95, u8 83, u8 69, u8 80, u8 65, u8 82, u8 65, u8 84, u8 79, u8 82] : [u8; 24]
v15 = make_array b"DEFAULT_DOMAIN_SEPARATOR"
v19 = make_array [Field 3728882899078719075161482178784387565366481897740339799480980287259621149274, Field -9903063709032878667290627648209915537972247634463802596148419711785767431332, u1 0] : [(Field, Field, u1); 1]
return v19
}
";
"#;
assert_normalized_ssa_equals(ssa, expected);
}
}
44 changes: 44 additions & 0 deletions compiler/noirc_evaluator/src/ssa/ir/printer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@ use std::{
};

use acvm::acir::AcirField;
use im::Vector;
use iter_extended::vecmap;

use crate::ssa::ir::types::{NumericType, Type};

use super::{
basic_block::BasicBlockId,
dfg::DataFlowGraph,
Expand Down Expand Up @@ -220,6 +223,28 @@ fn display_instruction_inner(
)
}
Instruction::MakeArray { elements, typ } => {
// If the array is a byte array, we check if all the bytes are printable ascii characters
// and, if so, we print the array as a string literal (easier to understand).
// It could happen that the byte array is a random byte sequence that happens to be printable
// (it didn't come from a string literal) but this still reduces the noise in the output
// and actually represents the same value.
let (element_types, is_slice) = match typ {
Type::Array(types, _) => (types, false),
Type::Slice(types) => (types, true),
_ => panic!("Expected array or slice type for MakeArray"),
};
if element_types.len() == 1
&& element_types[0] == Type::Numeric(NumericType::Unsigned { bit_size: 8 })
{
if let Some(string) = try_byte_array_to_string(elements, function) {
if is_slice {
return writeln!(f, "make_array &b{:?}", string);
} else {
return writeln!(f, "make_array b{:?}", string);
}
}
}

write!(f, "make_array [")?;

for (i, element) in elements.iter().enumerate() {
Expand All @@ -234,6 +259,25 @@ fn display_instruction_inner(
}
}

fn try_byte_array_to_string(elements: &Vector<ValueId>, function: &Function) -> Option<String> {
let mut string = String::new();
for element in elements {
let element = function.dfg.get_numeric_constant(*element)?;
let element = element.try_to_u32()?;
if element > 0xFF {
return None;
}
let byte = element as u8;
if byte.is_ascii_alphanumeric() || byte.is_ascii_punctuation() || byte.is_ascii_whitespace()
{
string.push(byte as char);
} else {
return None;
}
}
Some(string)
}

fn result_types(function: &Function, results: &[ValueId]) -> String {
let types = vecmap(results, |result| function.dfg.type_of_value(*result).to_string());
if types.is_empty() {
Expand Down
24 changes: 18 additions & 6 deletions compiler/noirc_evaluator/src/ssa/parser/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
chars: source.char_indices(),
position: 0,
done: false,
max_integer: BigInt::from_biguint(num_bigint::Sign::Plus, FieldElement::modulus())

Check warning on line 25 in compiler/noirc_evaluator/src/ssa/parser/lexer.rs

View workflow job for this annotation

GitHub Actions / Code

Unknown word (biguint)
- BigInt::one(),
}
}
Expand Down Expand Up @@ -62,6 +62,7 @@
Some('-') if self.peek_char() == Some('>') => self.double_char_token(Token::Arrow),
Some('-') => self.single_char_token(Token::Dash),
Some('"') => self.eat_string_literal(),
Some('b') if self.peek_char() == Some('"') => self.eat_byte_string_literal(),
Some(ch) if ch.is_ascii_alphanumeric() || ch == '_' => self.eat_alpha_numeric(ch),
Some(char) => Err(LexerError::UnexpectedCharacter {
char,
Expand Down Expand Up @@ -180,8 +181,23 @@

fn eat_string_literal(&mut self) -> SpannedTokenResult {
let start = self.position;
let mut string = String::new();
let string = self.eat_string(start)?;
let str_literal_token = Token::Str(string);
let end = self.position;
Ok(str_literal_token.into_span(start, end))
}

fn eat_byte_string_literal(&mut self) -> SpannedTokenResult {
let start = self.position;
self.next_char(); // skip the b
let string = self.eat_string(start)?;
let str_literal_token = Token::ByteStr(string);
let end = self.position;
Ok(str_literal_token.into_span(start, end))
}

fn eat_string(&mut self, start: u32) -> Result<String, LexerError> {
let mut string = String::new();
while let Some(next) = self.next_char() {
let char = match next {
'"' => break,
Expand All @@ -206,11 +222,7 @@

string.push(char);
}

let str_literal_token = Token::Str(string);

let end = self.position;
Ok(str_literal_token.into_span(start, end))
Ok(string)
}

fn eat_while<F: Fn(char) -> bool>(
Expand Down
66 changes: 59 additions & 7 deletions compiler/noirc_evaluator/src/ssa/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ use std::{
};

use super::{
ir::{instruction::BinaryOp, types::Type},
ir::{
instruction::BinaryOp,
types::{NumericType, Type},
},
Ssa,
};

Expand Down Expand Up @@ -448,12 +451,39 @@ impl<'a> Parser<'a> {
}

if self.eat_keyword(Keyword::MakeArray)? {
self.eat_or_error(Token::LeftBracket)?;
let elements = self.parse_comma_separated_values()?;
self.eat_or_error(Token::RightBracket)?;
self.eat_or_error(Token::Colon)?;
let typ = self.parse_type()?;
return Ok(ParsedInstruction::MakeArray { target, elements, typ });
if self.eat(Token::Ampersand)? {
let Some(string) = self.eat_byte_str()? else {
return self.expected_byte_string();
};
let u8 = Type::Numeric(NumericType::Unsigned { bit_size: 8 });
let typ = Type::Slice(Arc::new(vec![u8.clone()]));
let elements = string
.bytes()
.map(|byte| ParsedValue::NumericConstant {
constant: FieldElement::from(byte as u128),
typ: u8.clone(),
})
.collect();
return Ok(ParsedInstruction::MakeArray { target, elements, typ });
} else if let Some(string) = self.eat_byte_str()? {
let u8 = Type::Numeric(NumericType::Unsigned { bit_size: 8 });
let typ = Type::Array(Arc::new(vec![u8.clone()]), string.len() as u32);
let elements = string
.bytes()
.map(|byte| ParsedValue::NumericConstant {
constant: FieldElement::from(byte as u128),
typ: u8.clone(),
})
.collect();
return Ok(ParsedInstruction::MakeArray { target, elements, typ });
} else {
self.eat_or_error(Token::LeftBracket)?;
let elements = self.parse_comma_separated_values()?;
self.eat_or_error(Token::RightBracket)?;
self.eat_or_error(Token::Colon)?;
let typ = self.parse_type()?;
return Ok(ParsedInstruction::MakeArray { target, elements, typ });
}
}

if self.eat_keyword(Keyword::Not)? {
Expand Down Expand Up @@ -796,6 +826,18 @@ impl<'a> Parser<'a> {
}
}

fn eat_byte_str(&mut self) -> ParseResult<Option<String>> {
if matches!(self.token.token(), Token::ByteStr(..)) {
let token = self.bump()?;
asterite marked this conversation as resolved.
Show resolved Hide resolved
match token.into_token() {
Token::ByteStr(string) => Ok(Some(string)),
_ => unreachable!(),
}
} else {
Ok(None)
}
}

fn eat(&mut self, token: Token) -> ParseResult<bool> {
if self.token.token() == &token {
self.bump()?;
Expand Down Expand Up @@ -848,6 +890,13 @@ impl<'a> Parser<'a> {
})
}

fn expected_byte_string<T>(&mut self) -> ParseResult<T> {
Err(ParserError::ExpectedByteString {
found: self.token.token().clone(),
span: self.token.to_span(),
})
}

fn expected_identifier<T>(&mut self) -> ParseResult<T> {
Err(ParserError::ExpectedIdentifier {
found: self.token.token().clone(),
Expand Down Expand Up @@ -911,6 +960,8 @@ pub(crate) enum ParserError {
ExpectedInstructionOrTerminator { found: Token, span: Span },
#[error("Expected a string literal or 'data', found '{found}'")]
ExpectedStringOrData { found: Token, span: Span },
#[error("Expected a byte string literal, found '{found}'")]
ExpectedByteString { found: Token, span: Span },
#[error("Expected a value, found '{found}'")]
ExpectedValue { found: Token, span: Span },
#[error("Multiple return values only allowed for call")]
Expand All @@ -928,6 +979,7 @@ impl ParserError {
| ParserError::ExpectedType { span, .. }
| ParserError::ExpectedInstructionOrTerminator { span, .. }
| ParserError::ExpectedStringOrData { span, .. }
| ParserError::ExpectedByteString { span, .. }
| ParserError::ExpectedValue { span, .. } => *span,
ParserError::MultipleReturnValuesOnlyAllowedForCall { second_target, .. } => {
second_target.span
Expand Down
30 changes: 27 additions & 3 deletions compiler/noirc_evaluator/src/ssa/parser/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,30 @@ fn test_make_composite_array() {
assert_ssa_roundtrip(src);
}

#[test]
fn test_make_byte_array_with_string_literal() {
let src = "
acir(inline) fn main f0 {
b0():
v9 = make_array b\"Hello world!\"
return v9
}
";
assert_ssa_roundtrip(src);
}

#[test]
fn test_make_byte_slice_with_string_literal() {
let src = "
acir(inline) fn main f0 {
b0():
v9 = make_array &b\"Hello world!\"
return v9
}
";
assert_ssa_roundtrip(src);
}

#[test]
fn test_block_parameters() {
let src = "
Expand Down Expand Up @@ -228,14 +252,14 @@ fn test_constrain_with_static_message() {

#[test]
fn test_constrain_with_dynamic_message() {
let src = "
let src = r#"
acir(inline) fn main f0 {
b0(v0: Field, v1: Field):
v7 = make_array [u8 123, u8 120, u8 125, u8 32, u8 123, u8 121, u8 125] : [u8; 7]
v7 = make_array b"{x} {y}"
constrain v0 == Field 1, data v7, u32 2, v0, v1
return
}
";
"#;
assert_ssa_roundtrip(src);
}

Expand Down
2 changes: 2 additions & 0 deletions compiler/noirc_evaluator/src/ssa/parser/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ pub(crate) enum Token {
Ident(String),
Int(FieldElement),
Str(String),
ByteStr(String),
Keyword(Keyword),
IntType(IntType),
/// =
Expand Down Expand Up @@ -79,6 +80,7 @@ impl Display for Token {
Token::Ident(ident) => write!(f, "{}", ident),
Token::Int(int) => write!(f, "{}", int),
Token::Str(string) => write!(f, "{string:?}"),
Token::ByteStr(string) => write!(f, "{string:?}"),
Token::Keyword(keyword) => write!(f, "{}", keyword),
Token::IntType(int_type) => write!(f, "{}", int_type),
Token::Assign => write!(f, "="),
Expand Down
Loading