Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds read/write support for v1.1 system symbols #847

Merged
merged 14 commits into from
Nov 6, 2024
Merged
3 changes: 2 additions & 1 deletion src/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,12 @@ pub(crate) mod v1_1 {
"make_field", // $65
];

pub mod system_symbol_ids {
pub mod system_symbols {
use crate::raw_symbol_ref::SystemSymbol_1_1;

pub const ION_ENCODING: SystemSymbol_1_1 = SystemSymbol_1_1::new_unchecked(10);
pub const SYMBOL_TABLE: SystemSymbol_1_1 = SystemSymbol_1_1::new_unchecked(15);
pub const EMPTY_TEXT: SystemSymbol_1_1 = SystemSymbol_1_1::new_unchecked(21);
pub const ADD_SYMBOLS: SystemSymbol_1_1 = SystemSymbol_1_1::new_unchecked(45);
pub const ADD_MACROS: SystemSymbol_1_1 = SystemSymbol_1_1::new_unchecked(47);
}
Expand Down
16 changes: 5 additions & 11 deletions src/lazy/encoder/binary/v1_0/container_writers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,12 @@ impl<'value, 'top> BinaryContainerWriter_1_0<'value, 'top> {
for annotation in iterator {
let symbol_address = match annotation.as_raw_symbol_ref() {
RawSymbolRef::SymbolId(symbol_id) => symbol_id,
RawSymbolRef::SystemSymbol_1_1(_system_symbol) => {
unreachable!("Ion 1.1 symbol in Ion 1.0 writer")
}
RawSymbolRef::Text(text) => {
other_token => {
return cold_path! {
IonResult::encoding_error(
format!("binary Ion 1.0 does not support text annotation literals (received '{}')", text)
format!("binary Ion 1.0 only supports symbol ID annotations (received '{other_token:?}')")
)
};
}
}
};
symbol_ids.push(symbol_address);
Expand Down Expand Up @@ -314,14 +311,11 @@ impl<'value, 'top> FieldEncoder for BinaryStructWriter_1_0<'value, 'top> {
// Write the field name
let sid = match name.as_raw_symbol_ref() {
RawSymbolRef::SymbolId(sid) => sid,
RawSymbolRef::Text(text) => {
other => {
return Err(IonError::Encoding(EncodingError::new(format!(
"tried to write a text literal using the v1.0 raw binary writer: '{text}'"
"the v1.0 raw binary writer only supports symbol ID struct field names, received {other:?}"
))));
}
RawSymbolRef::SystemSymbol_1_1(_) => {
unreachable!("Ion 1.1 token in 1.0 struct field name")
}
};
VarUInt::write_u64(&mut self.container_writer.child_values_buffer, sid as u64)?;
Ok(())
Expand Down
7 changes: 2 additions & 5 deletions src/lazy/encoder/binary/v1_0/value_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,12 +222,9 @@ impl<'value, 'top> BinaryValueWriter_1_0<'value, 'top> {
pub fn write_symbol<A: AsRawSymbolRef>(self, value: A) -> IonResult<()> {
match value.as_raw_symbol_ref() {
RawSymbolRef::SymbolId(sid) => self.write_symbol_id(sid),
RawSymbolRef::Text(text) => IonResult::illegal_operation(format!(
"the Ion 1.0 raw binary writer cannot write text symbols (here: '{text}')"
other => IonResult::illegal_operation(format!(
"the Ion 1.0 raw binary writer only supports symbol ID values; received: {other:?})"
)),
RawSymbolRef::SystemSymbol_1_1(_) => {
unreachable!("Ion 1.1 token in 1.0 value writer")
}
}
}

Expand Down
10 changes: 6 additions & 4 deletions src/lazy/encoder/binary/v1_1/flex_sym.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,16 +47,18 @@ impl<'top> FlexSym<'top> {
};
}

/// Encodes the empty string or symbol ID zero as a FlexSym. The caller is responsible for
/// confirming that `symbol` is one of those two cases before calling.
/// Encodes the empty string, symbol ID zero, or a system symbol as a FlexSym. The caller is
/// responsible for confirming that `symbol` is one of these three cases before calling.
fn encode_special_case(output: &mut BumpVec<u8>, symbol: RawSymbolRef) {
use RawSymbolRef::*;
let encoding: &[u8] = match symbol {
SymbolId(_) => &[FlexSym::ZERO, 0x60],
// Per this method's preconditions, this branch must be SymbolId zero.
SymbolId(_zero) => &[FlexSym::ZERO, 0x60],
SystemSymbol_1_1(system_symbol) => {
&[FlexSym::ZERO, 0x60 + system_symbol.address() as u8]
}
Text(_) => &[FlexSym::ZERO, 0x75],
// Per this method's preconditions, this branch's text must be the empty string.
Text(_empty_string) => &[FlexSym::ZERO, 0x75],
};
output.extend_from_slice_copy(encoding);
}
Expand Down
6 changes: 4 additions & 2 deletions src/lazy/encoder/text/v1_0/value_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,10 @@ impl<'value, W: Write> TextAnnotatedValueWriter_1_0<'value, W> {
write!(output, "::")
}
RawSymbolRef::SymbolId(sid) => write!(output, "${sid}::"),
RawSymbolRef::SystemSymbol_1_1(_) => {
unreachable!("Ion 1.1 token in 1.0 annotations sequence")
RawSymbolRef::SystemSymbol_1_1(_symbol) => {
return IonResult::encoding_error(
"the Ion 1.0 text writer does not support encoding Ion 1.1 system symbols",
)
}
}?;
}
Expand Down
6 changes: 3 additions & 3 deletions src/lazy/encoder/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ impl<E: Encoding, Output: Write> Writer<E, Output> {

let mut directive = directive_writer
.value_writer()
.with_annotations(v1_1::system_symbol_ids::ION_ENCODING)?
.with_annotations(v1_1::system_symbols::ION_ENCODING)?
.sexp_writer()?;

let pending_symbols = context
Expand All @@ -176,8 +176,8 @@ impl<E: Encoding, Output: Write> Writer<E, Output> {

let mut symbol_table = directive.sexp_writer()?;
symbol_table
.write_symbol(v1_1::system_symbol_ids::SYMBOL_TABLE)?
.write_symbol(v1_1::system_symbol_ids::ION_ENCODING)?
.write_symbol(v1_1::system_symbols::SYMBOL_TABLE)?
.write_symbol(v1_1::system_symbols::ION_ENCODING)?
.write_list(pending_symbols)?;
symbol_table.close()?;
directive.close()
Expand Down
6 changes: 3 additions & 3 deletions src/lazy/expanded/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1381,9 +1381,7 @@ mod tests {
ExprRange, ParameterEncoding, TemplateBodyExpr, TemplateMacro, TemplateValue,
};
use crate::lazy::expanded::{EncodingContext, EncodingContextRef};
use crate::{
AnyEncoding, Element, ElementReader, Int, IntoAnnotations, IonResult, Macro, Reader, Symbol,
};
use crate::{Int, IntoAnnotations, IonResult, Macro, Symbol};

// XXX: The tests in this module compile inputs and expect a specific output. There is no "correct"
// output, only correct behavior. As such, these tests are fragile; it is possible that optimizing
Expand Down Expand Up @@ -1648,6 +1646,8 @@ mod tests {
#[cfg(feature = "experimental-ion-1-1")]
#[test]
fn dependent_macros() -> IonResult<()> {
use crate::{AnyEncoding, Element, ElementReader, Reader};

let ion = r#"
$ion_1_1
$ion_encoding::(
Expand Down
2 changes: 1 addition & 1 deletion src/lazy/system_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,6 @@ mod tests {

// === Shared Symbol Tables ===

use crate::lazy::encoder::binary::v1_1::writer::LazyRawBinaryWriter_1_1;
use crate::lazy::encoder::value_writer::AnnotatableWriter;
use crate::{MapCatalog, SharedSymbolTable};

Expand Down Expand Up @@ -1021,6 +1020,7 @@ mod tests {
#[cfg(feature = "experimental-ion-1-1")]
#[test]
fn detect_encoding_directive_binary() -> IonResult<()> {
use crate::lazy::encoder::binary::v1_1::writer::LazyRawBinaryWriter_1_1;
let mut writer = LazyRawBinaryWriter_1_1::new(Vec::new())?;
let mut directive = writer
.value_writer()
Expand Down
1 change: 0 additions & 1 deletion src/lazy/text/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ use crate::{
use crate::lazy::expanded::macro_table::Macro;
use crate::lazy::expanded::template::{Parameter, RestSyntaxPolicy};
use crate::lazy::text::as_utf8::AsUtf8;
use crate::raw_symbol_ref::SystemSymbol;
use bumpalo::collections::Vec as BumpVec;

impl<'a> Debug for TextBuffer<'a> {
Expand Down
95 changes: 20 additions & 75 deletions src/raw_symbol_ref.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

use crate::lazy::expanded::EncodingContextRef;
use crate::result::IonFailure;
use crate::symbol_table::{SystemSymbolTable, SYSTEM_SYMBOLS_1_0, SYSTEM_SYMBOLS_1_1};
use crate::symbol_table::{SystemSymbolTable, SYSTEM_SYMBOLS_1_1};
use crate::types::SymbolAddress;
use crate::{IonError, IonResult, IonVersion, Symbol, SymbolId, SymbolRef};
use crate::{IonError, IonResult, Symbol, SymbolId, SymbolRef};
use ice_code::ice;

/// A raw symbol token found in the input stream.
Expand Down Expand Up @@ -78,9 +78,9 @@ impl<'a> RawSymbolRef<'a> {
#[inline(never)]
|| {
IonError::decoding_error(format!(
"found {label} symbol ID (${}) that was not in the symbol table\n{:?}",
"found {label} symbol ID (${}) that was not in the symbol table (len={})",
sid,
context.symbol_table()
context.symbol_table().len()
))
},
)?
Expand Down Expand Up @@ -178,10 +178,15 @@ impl<'a> From<&'a Symbol> for RawSymbolRef<'a> {
}
}

pub(crate) trait SystemSymbol: Sized {
/// Creates a new instance of this type **without range checking the address.**
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct SystemSymbol_1_1(SymbolAddress);

impl SystemSymbol_1_1 {
/// Creates a new instance of this type **without range-checking the address.**
/// It should only be used when the address passed in has already been confirmed to be in range.
fn new_unchecked(symbol_address: SymbolAddress) -> Self;
pub const fn new_unchecked(symbol_address: SymbolAddress) -> Self {
Self(symbol_address)
}

/// Creates a new system symbol if the provided address is non-zero and in range for the
/// corresponding system symbol table.
Expand All @@ -204,14 +209,16 @@ pub(crate) trait SystemSymbol: Sized {
})
}

/// Returns a reference to the system symbol table used by this symbol's corresponding
/// Ion version.
fn system_symbol_table() -> &'static SystemSymbolTable;
#[inline]
pub fn system_symbol_table() -> &'static SystemSymbolTable {
SYSTEM_SYMBOLS_1_1
}

/// The address in the system table where this symbol's text can be found.
fn address(&self) -> SymbolAddress;
pub const fn address(&self) -> SymbolAddress {
self.0
}

fn text(&self) -> &'static str {
pub fn text(&self) -> &'static str {
// TODO: `NonZeroUsize` might offer minor optimization opportunities around system
// symbol representation/resolution.
//
Expand All @@ -220,68 +227,6 @@ pub(crate) trait SystemSymbol: Sized {
// The address has been confirmed to be non-zero and in bounds
Self::system_symbol_table().symbols_by_address()[self.address() - 1]
}

fn ion_version(&self) -> IonVersion {
Self::system_symbol_table().ion_version()
}
}

#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct SystemSymbol_1_1(SymbolAddress);

impl SystemSymbol_1_1 {
pub const fn new_unchecked(symbol_address: SymbolAddress) -> Self {
Self(symbol_address)
}

pub const fn address(&self) -> SymbolAddress {
self.0
}
}

#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub(crate) struct SystemSymbol_1_0(SymbolAddress);

impl SystemSymbol for SystemSymbol_1_0 {
#[inline]
fn new_unchecked(symbol_address: SymbolAddress) -> Self {
Self(symbol_address)
}

#[inline]
fn system_symbol_table() -> &'static SystemSymbolTable {
SYSTEM_SYMBOLS_1_0
}

#[inline]
fn address(&self) -> SymbolAddress {
self.0
}
}

impl AsRawSymbolRef for SystemSymbol_1_0 {
fn as_raw_symbol_ref(&self) -> RawSymbolRef {
// In Ion 1.0, system symbols are encoded just like application symbols,
// as an address in the active symbol table.
RawSymbolRef::SymbolId(self.0)
}
}

impl SystemSymbol for SystemSymbol_1_1 {
#[inline]
fn new_unchecked(symbol_address: SymbolAddress) -> Self {
Self(symbol_address)
}

#[inline]
fn system_symbol_table() -> &'static SystemSymbolTable {
SYSTEM_SYMBOLS_1_1
}

#[inline]
fn address(&self) -> SymbolAddress {
self.0
}
}

impl AsRawSymbolRef for SystemSymbol_1_1 {
Expand Down
6 changes: 3 additions & 3 deletions src/symbol_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ impl SystemSymbolTable {
}

pub fn symbol_for_address(&self, address: SymbolAddress) -> Option<Symbol> {
self.text_for_address(address).map(Symbol::system)
self.text_for_address(address).map(Symbol::static_text)
}

pub(crate) fn symbols_by_text(&self) -> &'static phf::Map<&'static str, usize> {
Expand Down Expand Up @@ -251,8 +251,8 @@ impl SymbolTable {
/// system symbols may be removed from the table.
pub fn permanent_system_prefix_count(&self) -> usize {
match self.ion_version {
IonVersion::v1_0 => 10,
IonVersion::v1_1 => 1,
IonVersion::v1_0 => Self::NUM_PREFIX_SYSTEM_SYMBOLS_1_0,
IonVersion::v1_1 => Self::NUM_PREFIX_SYSTEM_SYMBOLS_1_1,
}
}

Expand Down
42 changes: 24 additions & 18 deletions src/text/text_formatter.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::raw_symbol_ref::{AsRawSymbolRef, RawSymbolRef, SystemSymbol};
use crate::constants::v1_1;
use crate::raw_symbol_ref::{AsRawSymbolRef, RawSymbolRef};
use crate::result::IonFailure;
use crate::{Annotations, Sequence};
use crate::{Decimal, Int, Struct, Timestamp};
Expand Down Expand Up @@ -278,24 +279,29 @@ impl<'a, W: std::fmt::Write> FmtValueFormatter<'a, W> {
}

pub(crate) fn format_symbol_token<A: AsRawSymbolRef>(&mut self, token: A) -> IonResult<()> {
let text = match token.as_raw_symbol_ref() {
RawSymbolRef::SymbolId(sid) => return Ok(write!(self.output, "${sid}")?),
RawSymbolRef::SystemSymbol_1_1(system_symbol) => system_symbol.text(),
RawSymbolRef::Text(text) => text,
use RawSymbolRef::*;
let write_result = match token.as_raw_symbol_ref() {
SymbolId(sid) => write!(self.output, "${sid}"),
// '' is the only system symbol that requires quoting; the rest are identifiers.
SystemSymbol_1_1(v1_1::system_symbols::EMPTY_TEXT) => write!(self.output, "\'\'"),
// Any other system symbol is an identifier and doesn't require quoting.
SystemSymbol_1_1(symbol) => return Ok(write!(self.output, "{}", symbol.text())?),
// If the text could be mistaken for a keyword or symbol ID, wrap it in single quotes.
zslayton marked this conversation as resolved.
Show resolved Hide resolved
Text(text) if Self::token_is_keyword(text) || Self::token_resembles_symbol_id(text) => {
write!(self.output, "'{text}'")
}
// If the text is an identifier, it doesn't require quotes.
Text(text) if Self::token_is_identifier(text) => write!(self.output, "{text}"),
// Any other text has its escape sequences substituted and is wrapped in quotes.
Text(text) => {
// Write the symbol text using quotes and escaping any characters that require it.
write!(self.output, "\'")?;
self.format_escaped_text_body(text)?;
write!(self.output, "\'")
}
};
if Self::token_is_keyword(text) || Self::token_resembles_symbol_id(text) {
// Write the symbol text in single quotes
write!(self.output, "'{text}'")?;
} else if Self::token_is_identifier(text) {
// Write the symbol text without quotes
write!(self.output, "{text}")?
} else {
// Write the symbol text using quotes and escaping any characters that require it.
write!(self.output, "\'")?;
self.format_escaped_text_body(text)?;
write!(self.output, "\'")?;
}
Ok(())
// Convert the fmt::Result into an IonResult
Ok(write_result?)
}

/// Writes the body (i.e. no start or end delimiters) of a string or symbol with any illegal
Expand Down
2 changes: 1 addition & 1 deletion src/types/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ impl Symbol {
}
}

pub fn system(text: &'static str) -> Symbol {
pub fn static_text(text: &'static str) -> Symbol {
Symbol {
text: SymbolText::Static(text),
}
Expand Down
Loading
Loading