-
Notifications
You must be signed in to change notification settings - Fork 37
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adds binary 1.1 read support for e-expressions, macro expansion #789
Changes from all commits
c78ae2f
344826c
8b08c4d
b8b9e1d
bfa0f03
b71b6e3
9470b5b
2cef814
d564826
7b4f4a7
e1f2ba8
d19abeb
0f8c894
afc58cf
b196461
bcc1b62
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,7 @@ use crate::lazy::binary::raw::reader::LazyRawBinaryReader_1_0; | |
use crate::lazy::binary::raw::sequence::{ | ||
LazyRawBinaryList_1_0, LazyRawBinarySExp_1_0, RawBinarySequenceIterator_1_0, | ||
}; | ||
use crate::lazy::binary::raw::v1_1::e_expression::RawBinaryEExpression_1_1; | ||
use crate::lazy::binary::raw::v1_1::r#struct::{ | ||
LazyRawBinaryFieldName_1_1, LazyRawBinaryStruct_1_1, RawBinaryStructIterator_1_1, | ||
}; | ||
|
@@ -33,7 +34,7 @@ use crate::lazy::encoding::{ | |
BinaryEncoding_1_0, BinaryEncoding_1_1, TextEncoding_1_0, TextEncoding_1_1, | ||
}; | ||
use crate::lazy::expanded::macro_evaluator::RawEExpression; | ||
use crate::lazy::never::Never; | ||
use crate::lazy::expanded::EncodingContextRef; | ||
use crate::lazy::raw_stream_item::LazyRawStreamItem; | ||
use crate::lazy::raw_value_ref::RawValueRef; | ||
use crate::lazy::span::Span; | ||
|
@@ -54,7 +55,6 @@ use crate::lazy::text::value::{ | |
LazyRawTextVersionMarker_1_1, RawTextAnnotationsIterator, | ||
}; | ||
use crate::{Encoding, IonResult, IonType, RawSymbolRef}; | ||
use bumpalo::Bump as BumpAllocator; | ||
|
||
/// An implementation of the `LazyDecoder` trait that can read any encoding of Ion. | ||
#[derive(Debug, Clone, Copy)] | ||
|
@@ -174,7 +174,7 @@ pub struct LazyRawAnyEExpression<'top> { | |
#[derive(Debug, Copy, Clone)] | ||
pub enum LazyRawAnyEExpressionKind<'top> { | ||
Text_1_1(RawTextEExpression_1_1<'top>), | ||
Binary_1_1(Never), // TODO: RawBinaryEExpression_1_1 | ||
Binary_1_1(RawBinaryEExpression_1_1<'top>), | ||
} | ||
|
||
impl<'top> LazyRawAnyEExpression<'top> { | ||
|
@@ -194,6 +194,13 @@ impl<'top> From<RawTextEExpression_1_1<'top>> for LazyRawAnyEExpression<'top> { | |
} | ||
} | ||
} | ||
impl<'top> From<RawBinaryEExpression_1_1<'top>> for LazyRawAnyEExpression<'top> { | ||
fn from(binary_invocation: RawBinaryEExpression_1_1<'top>) -> Self { | ||
LazyRawAnyEExpression { | ||
encoding: LazyRawAnyEExpressionKind::Binary_1_1(binary_invocation), | ||
} | ||
} | ||
} | ||
|
||
impl<'top> HasSpan<'top> for LazyRawAnyEExpression<'top> { | ||
fn span(&self) -> Span<'top> { | ||
|
@@ -222,21 +229,19 @@ impl<'top> RawEExpression<'top, AnyEncoding> for LazyRawAnyEExpression<'top> { | |
use LazyRawAnyEExpressionKind::*; | ||
match self.encoding { | ||
Text_1_1(ref m) => m.id(), | ||
Binary_1_1(_) => { | ||
todo!("macros in binary Ion 1.1 are not implemented") | ||
} | ||
Binary_1_1(ref m) => m.id(), | ||
} | ||
} | ||
|
||
fn raw_arguments(&self) -> Self::RawArgumentsIterator<'_> { | ||
use LazyRawAnyEExpressionKind::*; | ||
match self.encoding { | ||
Text_1_1(m) => LazyRawAnyMacroArgsIterator { | ||
encoding: LazyRawAnyMacroArgsIteratorKind::Text_1_1(m.raw_arguments()), | ||
Text_1_1(e) => LazyRawAnyMacroArgsIterator { | ||
encoding: LazyRawAnyMacroArgsIteratorKind::Text_1_1(e.raw_arguments()), | ||
}, | ||
Binary_1_1(e) => LazyRawAnyMacroArgsIterator { | ||
encoding: LazyRawAnyMacroArgsIteratorKind::Binary_1_1(e.raw_arguments()), | ||
}, | ||
Binary_1_1(_) => { | ||
todo!("macros in binary Ion 1.1 are not yet implemented") | ||
} | ||
} | ||
} | ||
} | ||
|
@@ -248,6 +253,12 @@ pub enum LazyRawAnyMacroArgsIteratorKind<'top> { | |
TextEncoding_1_1, | ||
>>::RawArgumentsIterator<'top>, | ||
), | ||
Binary_1_1( | ||
<RawBinaryEExpression_1_1<'top> as RawEExpression< | ||
'top, | ||
BinaryEncoding_1_1, | ||
>>::RawArgumentsIterator<'top>, | ||
), | ||
} | ||
pub struct LazyRawAnyMacroArgsIterator<'top> { | ||
encoding: LazyRawAnyMacroArgsIteratorKind<'top>, | ||
|
@@ -257,19 +268,31 @@ impl<'top> Iterator for LazyRawAnyMacroArgsIterator<'top> { | |
type Item = IonResult<LazyRawValueExpr<'top, AnyEncoding>>; | ||
|
||
fn next(&mut self) -> Option<Self::Item> { | ||
match self.encoding { | ||
LazyRawAnyMacroArgsIteratorKind::Text_1_1(mut iter) => match iter.next() { | ||
match &mut self.encoding { | ||
LazyRawAnyMacroArgsIteratorKind::Text_1_1(ref mut iter) => match iter.next() { | ||
Some(Ok(RawValueExpr::ValueLiteral(value))) => { | ||
Some(Ok(RawValueExpr::ValueLiteral(LazyRawAnyValue::from(value)))) | ||
} | ||
Some(Ok(RawValueExpr::MacroInvocation(invocation))) => { | ||
Some(Ok(RawValueExpr::MacroInvocation(LazyRawAnyEExpression { | ||
Some(Ok(RawValueExpr::EExp(invocation))) => { | ||
Some(Ok(RawValueExpr::EExp(LazyRawAnyEExpression { | ||
encoding: LazyRawAnyEExpressionKind::Text_1_1(invocation), | ||
}))) | ||
} | ||
Some(Err(e)) => Some(Err(e)), | ||
None => None, | ||
}, | ||
LazyRawAnyMacroArgsIteratorKind::Binary_1_1(ref mut iter) => match iter.next() { | ||
Some(Ok(RawValueExpr::ValueLiteral(value))) => { | ||
Some(Ok(RawValueExpr::ValueLiteral(LazyRawAnyValue::from(value)))) | ||
} | ||
Some(Ok(RawValueExpr::EExp(invocation))) => { | ||
Some(Ok(RawValueExpr::EExp(LazyRawAnyEExpression { | ||
encoding: LazyRawAnyEExpressionKind::Binary_1_1(invocation), | ||
}))) | ||
} | ||
Some(Err(e)) => Some(Err(e)), | ||
None => None, | ||
}, | ||
} | ||
} | ||
} | ||
|
@@ -408,17 +431,17 @@ impl<'data> LazyRawReader<'data, AnyEncoding> for LazyRawAnyReader<'data> { | |
|
||
fn next<'top>( | ||
&'top mut self, | ||
allocator: &'top BumpAllocator, | ||
context: EncodingContextRef<'top>, | ||
) -> IonResult<LazyRawStreamItem<'top, AnyEncoding>> | ||
where | ||
'data: 'top, | ||
{ | ||
use RawReaderKind::*; | ||
match &mut self.encoding { | ||
Text_1_0(r) => Ok(r.next(allocator)?.into()), | ||
Text_1_0(r) => Ok(r.next(context)?.into()), | ||
Binary_1_0(r) => Ok(r.next()?.into()), | ||
Text_1_1(r) => Ok(r.next(allocator)?.into()), | ||
Binary_1_1(r) => Ok(r.next()?.into()), | ||
Text_1_1(r) => Ok(r.next(context)?.into()), | ||
Binary_1_1(r) => Ok(r.next(context)?.into()), | ||
Comment on lines
-418
to
+444
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ Most of the buffer types used to hold a reference to the bump allocator in case they needed to decode text escapes or cache child expressions. Now that the reader needs to parse binary e-expressions, the parser needs access to the macro table to look up the macro signature. The encoding context has a reference to both the allocator and the macro table, so now the buffers get a reference to the encoding context. |
||
} | ||
} | ||
|
||
|
@@ -517,7 +540,7 @@ impl<'top> From<LazyRawValueExpr<'top, TextEncoding_1_0>> for LazyRawValueExpr<' | |
fn from(value: LazyRawValueExpr<'top, TextEncoding_1_0>) -> Self { | ||
match value { | ||
RawValueExpr::ValueLiteral(v) => RawValueExpr::ValueLiteral(v.into()), | ||
RawValueExpr::MacroInvocation(_) => unreachable!("macro invocation in text Ion 1.0"), | ||
RawValueExpr::EExp(_) => unreachable!("macro invocation in text Ion 1.0"), | ||
} | ||
} | ||
} | ||
|
@@ -528,7 +551,7 @@ impl<'top> From<LazyRawValueExpr<'top, BinaryEncoding_1_0>> | |
fn from(value: LazyRawValueExpr<'top, BinaryEncoding_1_0>) -> Self { | ||
match value { | ||
RawValueExpr::ValueLiteral(v) => RawValueExpr::ValueLiteral(v.into()), | ||
RawValueExpr::MacroInvocation(_) => unreachable!("macro invocation in binary Ion 1.0"), | ||
RawValueExpr::EExp(_) => unreachable!("macro invocation in binary Ion 1.0"), | ||
} | ||
} | ||
} | ||
|
@@ -537,11 +560,11 @@ impl<'top> From<LazyRawValueExpr<'top, TextEncoding_1_1>> for LazyRawValueExpr<' | |
fn from(value: LazyRawValueExpr<'top, TextEncoding_1_1>) -> Self { | ||
match value { | ||
RawValueExpr::ValueLiteral(v) => RawValueExpr::ValueLiteral(v.into()), | ||
RawValueExpr::MacroInvocation(m) => { | ||
RawValueExpr::EExp(m) => { | ||
let invocation = LazyRawAnyEExpression { | ||
encoding: LazyRawAnyEExpressionKind::Text_1_1(m), | ||
}; | ||
RawValueExpr::MacroInvocation(invocation) | ||
RawValueExpr::EExp(invocation) | ||
} | ||
} | ||
} | ||
|
@@ -553,11 +576,11 @@ impl<'top> From<LazyRawValueExpr<'top, BinaryEncoding_1_1>> | |
fn from(value: LazyRawValueExpr<'top, BinaryEncoding_1_1>) -> Self { | ||
match value { | ||
RawValueExpr::ValueLiteral(v) => RawValueExpr::ValueLiteral(v.into()), | ||
RawValueExpr::MacroInvocation(m) => { | ||
RawValueExpr::EExp(m) => { | ||
let invocation = LazyRawAnyEExpression { | ||
encoding: LazyRawAnyEExpressionKind::Binary_1_1(m), | ||
}; | ||
RawValueExpr::MacroInvocation(invocation) | ||
RawValueExpr::EExp(invocation) | ||
} | ||
} | ||
} | ||
|
@@ -723,8 +746,8 @@ impl<'top> From<LazyRawStreamItem<'top, BinaryEncoding_1_1>> | |
LazyRawStreamItem::<BinaryEncoding_1_1>::Value(value) => { | ||
LazyRawStreamItem::<AnyEncoding>::Value(value.into()) | ||
} | ||
LazyRawStreamItem::<BinaryEncoding_1_1>::EExpression(_) => { | ||
todo!("Macro invocations not yet implemented in binary 1.1") | ||
LazyRawStreamItem::<BinaryEncoding_1_1>::EExpression(eexp) => { | ||
LazyRawStreamItem::<AnyEncoding>::EExpression(eexp.into()) | ||
} | ||
LazyRawStreamItem::<BinaryEncoding_1_1>::EndOfStream(end) => { | ||
LazyRawStreamItem::<AnyEncoding>::EndOfStream(end) | ||
|
@@ -1464,6 +1487,7 @@ mod tests { | |
use crate::lazy::any_encoding::LazyRawAnyReader; | ||
use crate::lazy::binary::test_utilities::to_binary_ion; | ||
use crate::lazy::decoder::{LazyRawReader, LazyRawSequence, LazyRawValue}; | ||
use crate::lazy::expanded::EncodingContext; | ||
use crate::lazy::raw_stream_item::LazyRawStreamItem; | ||
use crate::lazy::raw_value_ref::RawValueRef; | ||
use crate::{IonResult, RawSymbolRef, Timestamp}; | ||
|
@@ -1473,41 +1497,42 @@ mod tests { | |
#[test] | ||
fn any_encoding() -> IonResult<()> { | ||
fn test_input(data: &[u8]) -> IonResult<()> { | ||
let allocator = BumpAllocator::new(); | ||
let encoding_context = EncodingContext::empty(); | ||
let context = encoding_context.get_ref(); | ||
|
||
let mut reader = LazyRawAnyReader::new(data); | ||
assert_eq!(reader.next(&allocator)?.expect_ivm()?.version(), (1, 0)); | ||
assert_eq!(reader.next(context)?.expect_ivm()?.version(), (1, 0)); | ||
let _strukt = reader | ||
.next(&allocator)? | ||
.next(context)? | ||
.expect_value()? | ||
.read()? | ||
.expect_struct()?; | ||
let name = reader.next(&allocator)?.expect_value()?; | ||
let name = reader.next(context)?.expect_value()?; | ||
assert_eq!( | ||
name.annotations().next().unwrap()?, | ||
RawSymbolRef::SymbolId(4) | ||
); | ||
assert_eq!(name.read()?.expect_string()?.text(), "Gary"); | ||
assert_eq!( | ||
reader.next(&allocator)?.expect_value()?.read()?, | ||
reader.next(context)?.expect_value()?.read()?, | ||
RawValueRef::String("foo".into()) | ||
); | ||
assert_eq!( | ||
reader.next(&allocator)?.expect_value()?.read()?, | ||
reader.next(context)?.expect_value()?.read()?, | ||
RawValueRef::Int(5.into()) | ||
); | ||
assert_eq!( | ||
reader.next(&allocator)?.expect_value()?.read()?, | ||
reader.next(context)?.expect_value()?.read()?, | ||
RawValueRef::Timestamp(Timestamp::with_year(2023).with_month(8).build()?) | ||
); | ||
assert_eq!( | ||
reader.next(&allocator)?.expect_value()?.read()?, | ||
reader.next(context)?.expect_value()?.read()?, | ||
RawValueRef::Bool(false) | ||
); | ||
|
||
let mut sum = 0; | ||
for lazy_value_result in reader | ||
.next(&allocator)? | ||
.next(context)? | ||
.expect_value()? | ||
.read()? | ||
.expect_list()? | ||
|
@@ -1521,7 +1546,7 @@ mod tests { | |
// local symbol table and the raw reader interprets that as a different value. | ||
|
||
assert!(matches!( | ||
reader.next(&allocator)?, | ||
reader.next(context)?, | ||
LazyRawStreamItem::<AnyEncoding>::EndOfStream(_) | ||
)); | ||
Ok(()) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -76,7 +76,7 @@ pub(crate) struct EncodedValue<HeaderType: EncodedHeader> { | |
// value. If `annotations` is empty, `annotations_header_length` will be zero. The annotations | ||
// wrapper contains several fields: an opcode, a wrapper length, a sequence length, and the | ||
// sequence itself. | ||
pub annotations_header_length: u8, | ||
pub annotations_header_length: u16, | ||
// The number of bytes used to encode the series of symbol IDs inside the annotations wrapper. | ||
pub annotations_sequence_length: u16, | ||
Comment on lines
-79
to
81
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ There was a disagreement between how Ion 1.0 and Ion 1.1 were using these fields. Ion 1.1 annotations encodings have two parts: a header, and the sequence itself. It treated the Ion 1.0 annotations encodings have several parts: a header, a wrapper length, a sequence length, and the sequence itself. It treated For the moment, I've adjusted 1.1's behavior to align with 1.0's. This required me to increase the size of the |
||
// Whether the annotations sequence is encoded as `FlexSym`s or as symbol addresses. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,7 @@ use crate::result::IonFailure; | |
use crate::{Encoding, IonResult}; | ||
|
||
use crate::lazy::any_encoding::IonEncoding; | ||
use bumpalo::Bump as BumpAllocator; | ||
use crate::lazy::expanded::EncodingContextRef; | ||
|
||
/// A binary Ion 1.0 reader that yields [`LazyRawBinaryValue_1_0`]s representing the top level values found | ||
/// in the provided input stream. | ||
|
@@ -125,7 +125,7 @@ impl<'data> LazyRawReader<'data, BinaryEncoding_1_0> for LazyRawBinaryReader_1_0 | |
|
||
fn next<'top>( | ||
&'top mut self, | ||
_allocator: &'top BumpAllocator, | ||
_context: EncodingContextRef<'top>, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ The binary 1.0 reader is the only one that doesn't use anything from the encoding context (the allocator or the macro table) during parsing. |
||
) -> IonResult<LazyRawStreamItem<'top, BinaryEncoding_1_0>> | ||
where | ||
'data: 'top, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🗺️ I renamed
RawValueExpr::MacroInvocation
toRawValueExpr::EExp
because at the raw level we're always talking about syntactic elements.