Skip to content

Commit

Permalink
Miri uses tree borrows, fixes several Miri errors
Browse files Browse the repository at this point in the history
  • Loading branch information
zslayton committed Aug 23, 2024
1 parent 81b5e0e commit 52569f3
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 47 deletions.
22 changes: 15 additions & 7 deletions .github/workflows/miri.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: Miri test

on: [push, pull_request]
on: [ push, pull_request ]

jobs:
build:
Expand All @@ -11,9 +11,9 @@ jobs:
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != 'amazon-ion/ion-rust'
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
os: [ ubuntu-latest, windows-latest, macos-latest ]
# build and test for experimental features with miri
features: ['experimental']
features: [ 'experimental' ]
permissions:
checks: write

Expand All @@ -37,9 +37,17 @@ jobs:
- name: Test with Miri (ubuntu and macos)
# miri test for Linux and macOS os, since Windows os requires a different syntax to enable miri flags
if: matrix.os == 'ubuntu-latest' || matrix.os == 'macos-latest'
# We need to pass the miri flag to disable host isolation in order to access host for clock information on timestamps
# more information can be found: https://github.com/rust-lang/miri#miri--z-flags-and-environment-variables
run: MIRIFLAGS="-Zmiri-disable-isolation" cargo miri test serde --features "${{ matrix.features }}"
# Miri flags
#
# -Zmiri-disable-isolation
# We need to pass the miri flag to disable host isolation in order to access host for clock information on timestamps
# more information can be found: https://github.com/rust-lang/miri#miri--z-flags-and-environment-variables
#
# -Zmiri-tree-borrows
# Miri's default aliasing model (stacked borrows) is known to be unnecessarily restrictive. Their alternative
# aliasing model (tree borrows) accepts valid programs that stacked borrows would reject.
# For more information, see: https://www.ralfj.de/blog/2023/06/02/tree-borrows.html
run: MIRIFLAGS="-Zmiri-disable-isolation -Zmiri-tree-borrows" cargo miri test serde --features "${{ matrix.features }}"
- name: Test with Miri (windows)
if: matrix.os == 'windows-latest'
run: $env:MIRIFLAGS="-Zmiri-disable-isolation" ; cargo miri test serde --features "${{ matrix.features }}"
run: $env:MIRIFLAGS="-Zmiri-disable-isolation -Zmiri-tree-borrows" ; cargo miri test serde --features "${{ matrix.features }}"
59 changes: 36 additions & 23 deletions src/lazy/encoder/binary/v1_0/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::lazy::encoder::value_writer::SequenceWriter;
use crate::lazy::encoder::write_as_ion::WriteAsIon;
use crate::lazy::encoder::LazyRawWriter;
use crate::lazy::encoding::Encoding;
use crate::unsafe_helpers::{mut_ref_to_ptr, ptr_to_mut_ref};
use crate::unsafe_helpers::{mut_ref_to_ptr, ptr_to_mut_ref, ptr_to_ref};
use crate::write_config::{WriteConfig, WriteConfigKind};
use crate::{IonEncoding, IonResult};

Expand Down Expand Up @@ -70,40 +70,53 @@ impl<W: Write> LazyRawBinaryWriter_1_0<W> {
allocator,
encoding_buffer_ptr,
} = self;

let encoding_buffer = match encoding_buffer_ptr {
// If `encoding_buffer_ptr` is set, get the slice of bytes to which it refers.
Some(ptr) => unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(*ptr).as_slice() },
// Otherwise, there's nothing in the buffer. Use an empty slice.
None => &[],
};
// Write our top level encoding buffer's contents to the output sink.
output.write_all(encoding_buffer)?;
// Flush the output sink, which may have its own buffers.
output.flush()?;
if let Some(ptr) = encoding_buffer_ptr {
let encoding_buffer = unsafe { ptr_to_ref::<'_, BumpVec<'_, u8>>(*ptr).as_slice() };
// Write our top level encoding buffer's contents to the output sink.
output.write_all(encoding_buffer)?;
// Flush the output sink, which may have its own buffers.
output.flush()?;
}
// Now that we've written the encoding buffer's contents to output, clear it.
self.encoding_buffer_ptr = None;
// Clear the allocator. A new encoding buffer will be allocated on the next write.
allocator.reset();
Ok(())
}

pub(crate) fn value_writer(&mut self) -> BinaryValueWriter_1_0<'_, '_> {
let top_level = match self.encoding_buffer_ptr {
fn get_or_allocate_encoding_buffer<'value, 'top>(
encoding_buffer_ptr: &'value mut Option<*mut ()>,
allocator: &'top BumpAllocator,
) -> &'value mut BumpVec<'top, u8> {
match encoding_buffer_ptr {
// If the `encoding_buffer_ptr` is set, we already allocated an encoding buffer on
// a previous call to `value_writer()`. Dereference the pointer and continue encoding
// to that buffer.
Some(ptr) => unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(ptr) },
// Otherwise, allocate a new encoding buffer and set the pointer to refer to it.
Some(ptr) => {
let new_ptr = *ptr;
unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(new_ptr) }
}
None => {
let buffer = self
.allocator
.alloc_with(|| BumpVec::new_in(&self.allocator));
self.encoding_buffer_ptr = Some(mut_ref_to_ptr(buffer));
buffer
let encoding_buffer = allocator.alloc_with(|| BumpVec::<u8>::new_in(allocator));
let ptr = mut_ref_to_ptr(encoding_buffer);
// SAFETY: We cannot both store `ptr` in `encoding_buffer_ptr` AND turn it into
// a mutable BumVec reference to return because this (briefly) constructs
// two mutable references. Instead, we store it in `encoding_buffer_ptr`
// and then read it from its new location.
*encoding_buffer_ptr = Some(ptr);
unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(encoding_buffer_ptr.unwrap()) }
}
};
let annotated_value_writer = BinaryValueWriter_1_0::new(&self.allocator, top_level);
}
}

pub(crate) fn value_writer(&mut self) -> BinaryValueWriter_1_0<'_, '_> {
let Self {
ref allocator,
ref mut encoding_buffer_ptr,
..
} = *self;
let top_level = Self::get_or_allocate_encoding_buffer(encoding_buffer_ptr, allocator);
let annotated_value_writer = BinaryValueWriter_1_0::new(allocator, top_level);
annotated_value_writer
}
}
Expand Down
11 changes: 7 additions & 4 deletions src/lazy/encoder/binary/v1_1/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use crate::lazy::encoder::value_writer_config::ValueWriterConfig;
use crate::lazy::encoder::write_as_ion::WriteAsIon;
use crate::lazy::encoder::LazyRawWriter;
use crate::lazy::encoding::Encoding;
use crate::unsafe_helpers::{mut_ref_to_ptr, ptr_to_mut_ref};
use crate::unsafe_helpers::{mut_ref_to_ptr, ptr_to_mut_ref, ptr_to_ref};
use crate::write_config::{WriteConfig, WriteConfigKind};
use crate::{IonEncoding, IonResult};

Expand Down Expand Up @@ -74,7 +74,7 @@ impl<W: Write> LazyRawBinaryWriter_1_1<W> {

let encoding_buffer = match encoding_buffer_ptr {
// If `encoding_buffer_ptr` is set, get the slice of bytes to which it refers.
Some(ptr) => unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(*ptr).as_slice() },
Some(ptr) => unsafe { ptr_to_ref::<'_, BumpVec<'_, u8>>(*ptr).as_slice() },
// Otherwise, there's nothing in the buffer. Use an empty slice.
None => &[],
};
Expand All @@ -97,14 +97,17 @@ impl<W: Write> LazyRawBinaryWriter_1_1<W> {
Some(ptr) => unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(ptr) },
// Otherwise, allocate a new encoding buffer and set the pointer to refer to it.
None => {
let buffer = self.allocator.alloc_with(|| {
let buffer: &mut BumpVec<u8> = self.allocator.alloc_with(|| {
// Use half of the bump allocator's backing array as an encoding space for this
// top level value. The other half of the bump can be used for incidental
// bookkeeping.
BumpVec::with_capacity_in(DEFAULT_BUMP_SIZE / 2, &self.allocator)
});
// SAFETY: We cannot both store `buffer` in `encoding_buffer_ptr` AND return it
// because this would (briefly) construct two mutable references. Instead,
// we store it in `encoding_buffer_ptr` and then read it from its new location.
self.encoding_buffer_ptr = Some(mut_ref_to_ptr(buffer));
buffer
unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(self.encoding_buffer_ptr.unwrap()) }
}
};
BinaryValueWriter_1_1::new(
Expand Down
25 changes: 12 additions & 13 deletions src/lazy/expanded/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ pub struct ExpandingReader<Encoding: Decoder, Input: IonInput> {
// the pointer and coerce the result into a `&'top mut MacroEvaluator`, allowing the value it
// yields that can be used until `next()` is called again.
//
// Because there is not valid lifetime we can use for the type `*mut MacroEvaluator<'lifetime>`,
// Because there is not a valid lifetime we can use for the type `*mut MacroEvaluator<'lifetime>`,
// in the field below, we cast away the pointer's type for the purposes of storage and then cast
// it back at dereference time when a 'top lifetime is available.
evaluator_ptr: Cell<Option<*mut ()>>,
Expand Down Expand Up @@ -468,10 +468,12 @@ impl<Encoding: Decoder, Input: IonInput> ExpandingReader<Encoding, Input> {
if pending_lst.has_changes {
// SAFETY: Nothing else holds a reference to the `EncodingContext`'s contents, so we can use the
// `UnsafeCell` to get a mutable reference to its symbol table.
let symbol_table: &mut SymbolTable =
&mut unsafe { &mut *self.encoding_context.get() }.symbol_table;
let macro_table: &mut MacroTable =
&mut unsafe { &mut *self.encoding_context.get() }.macro_table;
let encoding_context_ref = unsafe { &mut *self.encoding_context.get() };
let EncodingContext {
macro_table,
symbol_table,
..
} = encoding_context_ref;
Self::apply_pending_context_changes(pending_lst, symbol_table, macro_table);
}
}
Expand Down Expand Up @@ -530,8 +532,8 @@ impl<Encoding: Decoder, Input: IonInput> ExpandingReader<Encoding, Input> {
// See if the raw reader can get another expression from the input stream. It's possible
// to find an expression that yields no values (for example: `(:void)`), so we perform this
// step in a loop until we get a value or end-of-stream.
let allocator: &BumpAllocator = self.context().allocator();
let context_ref = EncodingContextRef::new(allocator.alloc_with(|| self.context()));
let context_ref = self.context();

// Pull another top-level expression from the input stream if one is available.
use crate::lazy::raw_stream_item::RawStreamItem::*;
let raw_reader = unsafe { &mut *self.raw_reader.get() };
Expand All @@ -548,7 +550,6 @@ impl<Encoding: Decoder, Input: IonInput> ExpandingReader<Encoding, Input> {
// It's another macro invocation, we'll add it to the evaluator so it will be evaluated
// on the next call and then we'll return the e-expression itself.
EExp(e_exp) => {
let context = self.context();
let resolved_e_exp = match e_exp.resolve(context_ref) {
Ok(resolved) => resolved,
Err(e) => return Err(e),
Expand All @@ -565,7 +566,7 @@ impl<Encoding: Decoder, Input: IonInput> ExpandingReader<Encoding, Input> {
// If there's not an evaluator in the bump, make a new one.
None => {
let new_evaluator = MacroEvaluator::for_eexp(resolved_e_exp)?;
context.allocator.alloc_with(|| new_evaluator)
context_ref.allocator.alloc_with(|| new_evaluator)
}
};

Expand Down Expand Up @@ -613,9 +614,8 @@ impl<Encoding: Decoder, Input: IonInput> ExpandingReader<Encoding, Input> {
// See if the raw reader can get another expression from the input stream. It's possible
// to find an expression that yields no values (for example: `(:void)`), so we perform this
// step in a loop until we get a value or end-of-stream.
let context_ref = self.context();

let allocator: &BumpAllocator = self.context().allocator();
let context_ref = EncodingContextRef::new(allocator.alloc_with(|| self.context()));
loop {
// Pull another top-level expression from the input stream if one is available.
use crate::lazy::raw_stream_item::RawStreamItem::*;
Expand All @@ -631,7 +631,6 @@ impl<Encoding: Decoder, Input: IonInput> ExpandingReader<Encoding, Input> {
}
// It's another macro invocation, we'll start evaluating it.
EExp(e_exp) => {
let context = self.context();
let resolved_e_exp = match e_exp.resolve(context_ref) {
Ok(resolved) => resolved,
Err(e) => return Err(e),
Expand All @@ -653,7 +652,7 @@ impl<Encoding: Decoder, Input: IonInput> ExpandingReader<Encoding, Input> {
bump_evaluator_ref
}
// If there's not an evaluator in the bump, make a new one.
None => context.allocator.alloc_with(|| new_evaluator),
None => context_ref.allocator.alloc_with(|| new_evaluator),
};

// Try to get a value by starting to evaluate the e-expression.
Expand Down
9 changes: 9 additions & 0 deletions src/unsafe_helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,15 @@ pub(crate) unsafe fn ptr_to_mut_ref<'a, T>(ptr: *mut ()) -> &'a mut T {
&mut *typed_ptr
}

/// Helper function that turns a raw pointer into an immutable reference of the specified type.
///
/// The caller is responsible for confirming that `ptr` is a valid reference to some value
/// of type `T`.
pub(crate) unsafe fn ptr_to_ref<'a, T>(ptr: *mut ()) -> &'a T {
let typed_ptr: *const T = ptr.cast();
&*typed_ptr
}

/// Helper function that turns a mutable reference into a raw pointer.
///
/// Because this method does not read the data to which the reference points,
Expand Down

0 comments on commit 52569f3

Please sign in to comment.