Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds binary 1.1 read support for e-expressions, macro expansion #789

Merged
merged 16 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 62 additions & 37 deletions src/lazy/any_encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use crate::lazy::binary::raw::reader::LazyRawBinaryReader_1_0;
use crate::lazy::binary::raw::sequence::{
LazyRawBinaryList_1_0, LazyRawBinarySExp_1_0, RawBinarySequenceIterator_1_0,
};
use crate::lazy::binary::raw::v1_1::e_expression::RawBinaryEExpression_1_1;
use crate::lazy::binary::raw::v1_1::r#struct::{
LazyRawBinaryFieldName_1_1, LazyRawBinaryStruct_1_1, RawBinaryStructIterator_1_1,
};
Expand All @@ -33,7 +34,7 @@ use crate::lazy::encoding::{
BinaryEncoding_1_0, BinaryEncoding_1_1, TextEncoding_1_0, TextEncoding_1_1,
};
use crate::lazy::expanded::macro_evaluator::RawEExpression;
use crate::lazy::never::Never;
use crate::lazy::expanded::EncodingContextRef;
use crate::lazy::raw_stream_item::LazyRawStreamItem;
use crate::lazy::raw_value_ref::RawValueRef;
use crate::lazy::span::Span;
Expand All @@ -54,7 +55,6 @@ use crate::lazy::text::value::{
LazyRawTextVersionMarker_1_1, RawTextAnnotationsIterator,
};
use crate::{Encoding, IonResult, IonType, RawSymbolRef};
use bumpalo::Bump as BumpAllocator;

/// An implementation of the `LazyDecoder` trait that can read any encoding of Ion.
#[derive(Debug, Clone, Copy)]
Expand Down Expand Up @@ -174,7 +174,7 @@ pub struct LazyRawAnyEExpression<'top> {
#[derive(Debug, Copy, Clone)]
pub enum LazyRawAnyEExpressionKind<'top> {
Text_1_1(RawTextEExpression_1_1<'top>),
Binary_1_1(Never), // TODO: RawBinaryEExpression_1_1
Binary_1_1(RawBinaryEExpression_1_1<'top>),
}

impl<'top> LazyRawAnyEExpression<'top> {
Expand All @@ -194,6 +194,13 @@ impl<'top> From<RawTextEExpression_1_1<'top>> for LazyRawAnyEExpression<'top> {
}
}
}
impl<'top> From<RawBinaryEExpression_1_1<'top>> for LazyRawAnyEExpression<'top> {
fn from(binary_invocation: RawBinaryEExpression_1_1<'top>) -> Self {
LazyRawAnyEExpression {
encoding: LazyRawAnyEExpressionKind::Binary_1_1(binary_invocation),
}
}
}

impl<'top> HasSpan<'top> for LazyRawAnyEExpression<'top> {
fn span(&self) -> Span<'top> {
Expand Down Expand Up @@ -222,21 +229,19 @@ impl<'top> RawEExpression<'top, AnyEncoding> for LazyRawAnyEExpression<'top> {
use LazyRawAnyEExpressionKind::*;
match self.encoding {
Text_1_1(ref m) => m.id(),
Binary_1_1(_) => {
todo!("macros in binary Ion 1.1 are not implemented")
}
Binary_1_1(ref m) => m.id(),
}
}

fn raw_arguments(&self) -> Self::RawArgumentsIterator<'_> {
use LazyRawAnyEExpressionKind::*;
match self.encoding {
Text_1_1(m) => LazyRawAnyMacroArgsIterator {
encoding: LazyRawAnyMacroArgsIteratorKind::Text_1_1(m.raw_arguments()),
Text_1_1(e) => LazyRawAnyMacroArgsIterator {
encoding: LazyRawAnyMacroArgsIteratorKind::Text_1_1(e.raw_arguments()),
},
Binary_1_1(e) => LazyRawAnyMacroArgsIterator {
encoding: LazyRawAnyMacroArgsIteratorKind::Binary_1_1(e.raw_arguments()),
},
Binary_1_1(_) => {
todo!("macros in binary Ion 1.1 are not yet implemented")
}
}
}
}
Expand All @@ -248,6 +253,12 @@ pub enum LazyRawAnyMacroArgsIteratorKind<'top> {
TextEncoding_1_1,
>>::RawArgumentsIterator<'top>,
),
Binary_1_1(
<RawBinaryEExpression_1_1<'top> as RawEExpression<
'top,
BinaryEncoding_1_1,
>>::RawArgumentsIterator<'top>,
),
}
pub struct LazyRawAnyMacroArgsIterator<'top> {
encoding: LazyRawAnyMacroArgsIteratorKind<'top>,
Expand All @@ -257,19 +268,31 @@ impl<'top> Iterator for LazyRawAnyMacroArgsIterator<'top> {
type Item = IonResult<LazyRawValueExpr<'top, AnyEncoding>>;

fn next(&mut self) -> Option<Self::Item> {
match self.encoding {
LazyRawAnyMacroArgsIteratorKind::Text_1_1(mut iter) => match iter.next() {
match &mut self.encoding {
LazyRawAnyMacroArgsIteratorKind::Text_1_1(ref mut iter) => match iter.next() {
Some(Ok(RawValueExpr::ValueLiteral(value))) => {
Some(Ok(RawValueExpr::ValueLiteral(LazyRawAnyValue::from(value))))
}
Some(Ok(RawValueExpr::MacroInvocation(invocation))) => {
Some(Ok(RawValueExpr::MacroInvocation(LazyRawAnyEExpression {
Some(Ok(RawValueExpr::EExp(invocation))) => {
Some(Ok(RawValueExpr::EExp(LazyRawAnyEExpression {
Comment on lines -265 to +277
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ I renamed RawValueExpr::MacroInvocation to RawValueExpr::EExp because at the raw level we're always talking about syntactic elements.

encoding: LazyRawAnyEExpressionKind::Text_1_1(invocation),
})))
}
Some(Err(e)) => Some(Err(e)),
None => None,
},
LazyRawAnyMacroArgsIteratorKind::Binary_1_1(ref mut iter) => match iter.next() {
Some(Ok(RawValueExpr::ValueLiteral(value))) => {
Some(Ok(RawValueExpr::ValueLiteral(LazyRawAnyValue::from(value))))
}
Some(Ok(RawValueExpr::EExp(invocation))) => {
Some(Ok(RawValueExpr::EExp(LazyRawAnyEExpression {
encoding: LazyRawAnyEExpressionKind::Binary_1_1(invocation),
})))
}
Some(Err(e)) => Some(Err(e)),
None => None,
},
}
}
}
Expand Down Expand Up @@ -408,17 +431,17 @@ impl<'data> LazyRawReader<'data, AnyEncoding> for LazyRawAnyReader<'data> {

fn next<'top>(
&'top mut self,
allocator: &'top BumpAllocator,
context: EncodingContextRef<'top>,
) -> IonResult<LazyRawStreamItem<'top, AnyEncoding>>
where
'data: 'top,
{
use RawReaderKind::*;
match &mut self.encoding {
Text_1_0(r) => Ok(r.next(allocator)?.into()),
Text_1_0(r) => Ok(r.next(context)?.into()),
Binary_1_0(r) => Ok(r.next()?.into()),
Text_1_1(r) => Ok(r.next(allocator)?.into()),
Binary_1_1(r) => Ok(r.next()?.into()),
Text_1_1(r) => Ok(r.next(context)?.into()),
Binary_1_1(r) => Ok(r.next(context)?.into()),
Comment on lines -418 to +444
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ Most of the buffer types used to hold a reference to the bump allocator in case they needed to decode text escapes or cache child expressions. Now that the reader needs to parse binary e-expressions, the parser needs access to the macro table to look up the macro signature. The encoding context has a reference to both the allocator and the macro table, so now the buffers get a reference to the encoding context.

}
}

Expand Down Expand Up @@ -517,7 +540,7 @@ impl<'top> From<LazyRawValueExpr<'top, TextEncoding_1_0>> for LazyRawValueExpr<'
fn from(value: LazyRawValueExpr<'top, TextEncoding_1_0>) -> Self {
match value {
RawValueExpr::ValueLiteral(v) => RawValueExpr::ValueLiteral(v.into()),
RawValueExpr::MacroInvocation(_) => unreachable!("macro invocation in text Ion 1.0"),
RawValueExpr::EExp(_) => unreachable!("macro invocation in text Ion 1.0"),
}
}
}
Expand All @@ -528,7 +551,7 @@ impl<'top> From<LazyRawValueExpr<'top, BinaryEncoding_1_0>>
fn from(value: LazyRawValueExpr<'top, BinaryEncoding_1_0>) -> Self {
match value {
RawValueExpr::ValueLiteral(v) => RawValueExpr::ValueLiteral(v.into()),
RawValueExpr::MacroInvocation(_) => unreachable!("macro invocation in binary Ion 1.0"),
RawValueExpr::EExp(_) => unreachable!("macro invocation in binary Ion 1.0"),
}
}
}
Expand All @@ -537,11 +560,11 @@ impl<'top> From<LazyRawValueExpr<'top, TextEncoding_1_1>> for LazyRawValueExpr<'
fn from(value: LazyRawValueExpr<'top, TextEncoding_1_1>) -> Self {
match value {
RawValueExpr::ValueLiteral(v) => RawValueExpr::ValueLiteral(v.into()),
RawValueExpr::MacroInvocation(m) => {
RawValueExpr::EExp(m) => {
let invocation = LazyRawAnyEExpression {
encoding: LazyRawAnyEExpressionKind::Text_1_1(m),
};
RawValueExpr::MacroInvocation(invocation)
RawValueExpr::EExp(invocation)
}
}
}
Expand All @@ -553,11 +576,11 @@ impl<'top> From<LazyRawValueExpr<'top, BinaryEncoding_1_1>>
fn from(value: LazyRawValueExpr<'top, BinaryEncoding_1_1>) -> Self {
match value {
RawValueExpr::ValueLiteral(v) => RawValueExpr::ValueLiteral(v.into()),
RawValueExpr::MacroInvocation(m) => {
RawValueExpr::EExp(m) => {
let invocation = LazyRawAnyEExpression {
encoding: LazyRawAnyEExpressionKind::Binary_1_1(m),
};
RawValueExpr::MacroInvocation(invocation)
RawValueExpr::EExp(invocation)
}
}
}
Expand Down Expand Up @@ -723,8 +746,8 @@ impl<'top> From<LazyRawStreamItem<'top, BinaryEncoding_1_1>>
LazyRawStreamItem::<BinaryEncoding_1_1>::Value(value) => {
LazyRawStreamItem::<AnyEncoding>::Value(value.into())
}
LazyRawStreamItem::<BinaryEncoding_1_1>::EExpression(_) => {
todo!("Macro invocations not yet implemented in binary 1.1")
LazyRawStreamItem::<BinaryEncoding_1_1>::EExpression(eexp) => {
LazyRawStreamItem::<AnyEncoding>::EExpression(eexp.into())
}
LazyRawStreamItem::<BinaryEncoding_1_1>::EndOfStream(end) => {
LazyRawStreamItem::<AnyEncoding>::EndOfStream(end)
Expand Down Expand Up @@ -1464,6 +1487,7 @@ mod tests {
use crate::lazy::any_encoding::LazyRawAnyReader;
use crate::lazy::binary::test_utilities::to_binary_ion;
use crate::lazy::decoder::{LazyRawReader, LazyRawSequence, LazyRawValue};
use crate::lazy::expanded::EncodingContext;
use crate::lazy::raw_stream_item::LazyRawStreamItem;
use crate::lazy::raw_value_ref::RawValueRef;
use crate::{IonResult, RawSymbolRef, Timestamp};
Expand All @@ -1473,41 +1497,42 @@ mod tests {
#[test]
fn any_encoding() -> IonResult<()> {
fn test_input(data: &[u8]) -> IonResult<()> {
let allocator = BumpAllocator::new();
let encoding_context = EncodingContext::empty();
let context = encoding_context.get_ref();

let mut reader = LazyRawAnyReader::new(data);
assert_eq!(reader.next(&allocator)?.expect_ivm()?.version(), (1, 0));
assert_eq!(reader.next(context)?.expect_ivm()?.version(), (1, 0));
let _strukt = reader
.next(&allocator)?
.next(context)?
.expect_value()?
.read()?
.expect_struct()?;
let name = reader.next(&allocator)?.expect_value()?;
let name = reader.next(context)?.expect_value()?;
assert_eq!(
name.annotations().next().unwrap()?,
RawSymbolRef::SymbolId(4)
);
assert_eq!(name.read()?.expect_string()?.text(), "Gary");
assert_eq!(
reader.next(&allocator)?.expect_value()?.read()?,
reader.next(context)?.expect_value()?.read()?,
RawValueRef::String("foo".into())
);
assert_eq!(
reader.next(&allocator)?.expect_value()?.read()?,
reader.next(context)?.expect_value()?.read()?,
RawValueRef::Int(5.into())
);
assert_eq!(
reader.next(&allocator)?.expect_value()?.read()?,
reader.next(context)?.expect_value()?.read()?,
RawValueRef::Timestamp(Timestamp::with_year(2023).with_month(8).build()?)
);
assert_eq!(
reader.next(&allocator)?.expect_value()?.read()?,
reader.next(context)?.expect_value()?.read()?,
RawValueRef::Bool(false)
);

let mut sum = 0;
for lazy_value_result in reader
.next(&allocator)?
.next(context)?
.expect_value()?
.read()?
.expect_list()?
Expand All @@ -1521,7 +1546,7 @@ mod tests {
// local symbol table and the raw reader interprets that as a different value.

assert!(matches!(
reader.next(&allocator)?,
reader.next(context)?,
LazyRawStreamItem::<AnyEncoding>::EndOfStream(_)
));
Ok(())
Expand Down
2 changes: 1 addition & 1 deletion src/lazy/binary/encoded_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ pub(crate) struct EncodedValue<HeaderType: EncodedHeader> {
// value. If `annotations` is empty, `annotations_header_length` will be zero. The annotations
// wrapper contains several fields: an opcode, a wrapper length, a sequence length, and the
// sequence itself.
pub annotations_header_length: u8,
pub annotations_header_length: u16,
// The number of bytes used to encode the series of symbol IDs inside the annotations wrapper.
pub annotations_sequence_length: u16,
Comment on lines -79 to 81
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ There was a disagreement between how Ion 1.0 and Ion 1.1 were using these fields.

Ion 1.1 annotations encodings have two parts: a header, and the sequence itself. It treated the annotations_header_length and annotations_sequence_length as descriptions of non-overlapping pieces of the encoding.

Ion 1.0 annotations encodings have several parts: a header, a wrapper length, a sequence length, and the sequence itself. It treated annotations_header_length as the complete length of all of these pieces combined and annotations_sequence_length as the number of bytes at the end of the header that comprised the sequence itself.

For the moment, I've adjusted 1.1's behavior to align with 1.0's. This required me to increase the size of the header field since it's storing the total length. I actually think 1.1's interpretation was better, but switching to that will require changing lots of small accessor methods so I've left it for a future PR.

// Whether the annotations sequence is encoded as `FlexSym`s or as symbol addresses.
Expand Down
2 changes: 1 addition & 1 deletion src/lazy/binary/immutable_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,7 @@ impl<'a> ImmutableBuffer<'a> {
);
}

lazy_value.encoded_value.annotations_header_length = wrapper.header_length;
lazy_value.encoded_value.annotations_header_length = wrapper.header_length as u16;
lazy_value.encoded_value.annotations_sequence_length = wrapper.sequence_length as u16;
lazy_value.encoded_value.total_length += wrapper.header_length as usize;
// Modify the input to include the annotations
Expand Down
4 changes: 2 additions & 2 deletions src/lazy/binary/raw/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::result::IonFailure;
use crate::{Encoding, IonResult};

use crate::lazy::any_encoding::IonEncoding;
use bumpalo::Bump as BumpAllocator;
use crate::lazy::expanded::EncodingContextRef;

/// A binary Ion 1.0 reader that yields [`LazyRawBinaryValue_1_0`]s representing the top level values found
/// in the provided input stream.
Expand Down Expand Up @@ -125,7 +125,7 @@ impl<'data> LazyRawReader<'data, BinaryEncoding_1_0> for LazyRawBinaryReader_1_0

fn next<'top>(
&'top mut self,
_allocator: &'top BumpAllocator,
_context: EncodingContextRef<'top>,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ The binary 1.0 reader is the only one that doesn't use anything from the encoding context (the allocator or the macro table) during parsing.

) -> IonResult<LazyRawStreamItem<'top, BinaryEncoding_1_0>>
where
'data: 'top,
Expand Down
Loading
Loading