Skip to content

Commit

Permalink
Auto merge of rust-lang#110634 - saethlin:pointy-decoder, r=cjgillot
Browse files Browse the repository at this point in the history
Rewrite MemDecoder around pointers not a slice

This is basically rust-lang#109910 but I'm being a lot more aggressive. The pointer-based structure means that it makes a lot more sense to absorb more complexity into `MemDecoder`, most of the diff is just complexity moving from one place to another.

The primary argument for this structure is that we only incur a single bounds check when doing multi-byte reads from a `MemDecoder`. With the slice-based implementation we need to do those with `data[position..position + len]` , which needs to account for `position + len` wrapping. It would be possible to dodge the first bounds check if we stored a slice that starts at `position`, but that would require updating the pointer and length on every read.

This PR also embeds the failure path in a separate function, which means that this PR should subsume all the perf wins observed in rust-lang#109867.
  • Loading branch information
bors committed Apr 26, 2023
2 parents 84d4f16 + 1f67ba6 commit adaac6b
Show file tree
Hide file tree
Showing 11 changed files with 174 additions and 130 deletions.
25 changes: 5 additions & 20 deletions compiler/rustc_metadata/src/rmeta/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -373,16 +373,6 @@ impl<'a, 'tcx> TyDecoder for DecodeContext<'a, 'tcx> {
self.tcx()
}

#[inline]
fn peek_byte(&self) -> u8 {
self.opaque.data[self.opaque.position()]
}

#[inline]
fn position(&self) -> usize {
self.opaque.position()
}

fn cached_ty_for_shorthand<F>(&mut self, shorthand: usize, or_insert_with: F) -> Ty<'tcx>
where
F: FnOnce(&mut Self) -> Ty<'tcx>,
Expand All @@ -404,7 +394,7 @@ impl<'a, 'tcx> TyDecoder for DecodeContext<'a, 'tcx> {
where
F: FnOnce(&mut Self) -> R,
{
let new_opaque = MemDecoder::new(self.opaque.data, pos);
let new_opaque = MemDecoder::new(self.opaque.data(), pos);
let old_opaque = mem::replace(&mut self.opaque, new_opaque);
let old_state = mem::replace(&mut self.lazy_state, LazyState::NoNode);
let r = f(self);
Expand Down Expand Up @@ -625,17 +615,12 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Symbol {
SYMBOL_OFFSET => {
// read str offset
let pos = d.read_usize();
let old_pos = d.opaque.position();

// move to str offset and read
d.opaque.set_position(pos);
let s = d.read_str();
let sym = Symbol::intern(s);

// restore position
d.opaque.set_position(old_pos);

sym
d.opaque.with_position(pos, |d| {
let s = d.read_str();
Symbol::intern(s)
})
}
SYMBOL_PREINTERNED => {
let symbol_index = d.read_u32();
Expand Down
3 changes: 0 additions & 3 deletions compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,6 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for DefPathHashMapRef<'tcx> {

impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for DefPathHashMapRef<'static> {
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> DefPathHashMapRef<'static> {
// Import TyDecoder so we can access the DecodeContext::position() method
use crate::rustc_middle::ty::codec::TyDecoder;

let len = d.read_usize();
let pos = d.position();
let o = slice_owned(d.blob().clone(), |blob| &blob[pos..pos + len]);
Expand Down
10 changes: 10 additions & 0 deletions compiler/rustc_middle/src/ty/codec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,16 @@ macro_rules! implement_ty_decoder {
fn read_raw_bytes(&mut self, len: usize) -> &[u8] {
self.opaque.read_raw_bytes(len)
}

#[inline]
fn position(&self) -> usize {
self.opaque.position()
}

#[inline]
fn peek_byte(&self) -> u8 {
self.opaque.peek_byte()
}
}
}
}
Expand Down
56 changes: 12 additions & 44 deletions compiler/rustc_query_impl/src/on_disk_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,13 +169,12 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {

// Decode the *position* of the footer, which can be found in the
// last 8 bytes of the file.
decoder.set_position(data.len() - IntEncodedWithFixedSize::ENCODED_SIZE);
let footer_pos = IntEncodedWithFixedSize::decode(&mut decoder).0 as usize;

let footer_pos = decoder
.with_position(decoder.len() - IntEncodedWithFixedSize::ENCODED_SIZE, |decoder| {
IntEncodedWithFixedSize::decode(decoder).0 as usize
});
// Decode the file footer, which contains all the lookup tables, etc.
decoder.set_position(footer_pos);

decode_tagged(&mut decoder, TAG_FILE_FOOTER)
decoder.with_position(footer_pos, |decoder| decode_tagged(decoder, TAG_FILE_FOOTER))
};

Self {
Expand Down Expand Up @@ -522,29 +521,13 @@ impl<'a, 'tcx> CacheDecoder<'a, 'tcx> {
}
}

trait DecoderWithPosition: Decoder {
fn position(&self) -> usize;
}

impl<'a> DecoderWithPosition for MemDecoder<'a> {
fn position(&self) -> usize {
self.position()
}
}

impl<'a, 'tcx> DecoderWithPosition for CacheDecoder<'a, 'tcx> {
fn position(&self) -> usize {
self.opaque.position()
}
}

// Decodes something that was encoded with `encode_tagged()` and verify that the
// tag matches and the correct amount of bytes was read.
fn decode_tagged<D, T, V>(decoder: &mut D, expected_tag: T) -> V
where
T: Decodable<D> + Eq + std::fmt::Debug,
V: Decodable<D>,
D: DecoderWithPosition,
D: Decoder,
{
let start_pos = decoder.position();

Expand All @@ -568,16 +551,6 @@ impl<'a, 'tcx> TyDecoder for CacheDecoder<'a, 'tcx> {
self.tcx
}

#[inline]
fn position(&self) -> usize {
self.opaque.position()
}

#[inline]
fn peek_byte(&self) -> u8 {
self.opaque.data[self.opaque.position()]
}

fn cached_ty_for_shorthand<F>(&mut self, shorthand: usize, or_insert_with: F) -> Ty<'tcx>
where
F: FnOnce(&mut Self) -> Ty<'tcx>,
Expand All @@ -600,9 +573,9 @@ impl<'a, 'tcx> TyDecoder for CacheDecoder<'a, 'tcx> {
where
F: FnOnce(&mut Self) -> R,
{
debug_assert!(pos < self.opaque.data.len());
debug_assert!(pos < self.opaque.len());

let new_opaque = MemDecoder::new(self.opaque.data, pos);
let new_opaque = MemDecoder::new(self.opaque.data(), pos);
let old_opaque = mem::replace(&mut self.opaque, new_opaque);
let r = f(self);
self.opaque = old_opaque;
Expand Down Expand Up @@ -743,17 +716,12 @@ impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for Symbol {
SYMBOL_OFFSET => {
// read str offset
let pos = d.read_usize();
let old_pos = d.opaque.position();

// move to str offset and read
d.opaque.set_position(pos);
let s = d.read_str();
let sym = Symbol::intern(s);

// restore position
d.opaque.set_position(old_pos);

sym
d.opaque.with_position(pos, |d| {
let s = d.read_str();
Symbol::intern(s)
})
}
SYMBOL_PREINTERNED => {
let symbol_index = d.read_u32();
Expand Down
16 changes: 7 additions & 9 deletions compiler/rustc_query_system/src/dep_graph/serialized.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,21 +94,19 @@ impl<'a, K: DepKind + Decodable<MemDecoder<'a>>> Decodable<MemDecoder<'a>>
{
#[instrument(level = "debug", skip(d))]
fn decode(d: &mut MemDecoder<'a>) -> SerializedDepGraph<K> {
let start_position = d.position();

// The last 16 bytes are the node count and edge count.
debug!("position: {:?}", d.position());
d.set_position(d.data.len() - 2 * IntEncodedWithFixedSize::ENCODED_SIZE);
let (node_count, edge_count) =
d.with_position(d.len() - 2 * IntEncodedWithFixedSize::ENCODED_SIZE, |d| {
debug!("position: {:?}", d.position());
let node_count = IntEncodedWithFixedSize::decode(d).0 as usize;
let edge_count = IntEncodedWithFixedSize::decode(d).0 as usize;
(node_count, edge_count)
});
debug!("position: {:?}", d.position());

let node_count = IntEncodedWithFixedSize::decode(d).0 as usize;
let edge_count = IntEncodedWithFixedSize::decode(d).0 as usize;
debug!(?node_count, ?edge_count);

debug!("position: {:?}", d.position());
d.set_position(start_position);
debug!("position: {:?}", d.position());

let mut nodes = IndexVec::with_capacity(node_count);
let mut fingerprints = IndexVec::with_capacity(node_count);
let mut edge_list_indices = IndexVec::with_capacity(node_count);
Expand Down
16 changes: 8 additions & 8 deletions compiler/rustc_serialize/src/leb128.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
use crate::opaque::MemDecoder;
use crate::serialize::Decoder;

/// Returns the length of the longest LEB128 encoding for `T`, assuming `T` is an integer type
pub const fn max_leb128_len<T>() -> usize {
// The longest LEB128 encoding for an integer uses 7 bits per byte.
Expand Down Expand Up @@ -50,21 +53,19 @@ impl_write_unsigned_leb128!(write_usize_leb128, usize);
macro_rules! impl_read_unsigned_leb128 {
($fn_name:ident, $int_ty:ty) => {
#[inline]
pub fn $fn_name(slice: &[u8], position: &mut usize) -> $int_ty {
pub fn $fn_name(decoder: &mut MemDecoder<'_>) -> $int_ty {
// The first iteration of this loop is unpeeled. This is a
// performance win because this code is hot and integer values less
// than 128 are very common, typically occurring 50-80% or more of
// the time, even for u64 and u128.
let byte = slice[*position];
*position += 1;
let byte = decoder.read_u8();
if (byte & 0x80) == 0 {
return byte as $int_ty;
}
let mut result = (byte & 0x7F) as $int_ty;
let mut shift = 7;
loop {
let byte = slice[*position];
*position += 1;
let byte = decoder.read_u8();
if (byte & 0x80) == 0 {
result |= (byte as $int_ty) << shift;
return result;
Expand Down Expand Up @@ -127,14 +128,13 @@ impl_write_signed_leb128!(write_isize_leb128, isize);
macro_rules! impl_read_signed_leb128 {
($fn_name:ident, $int_ty:ty) => {
#[inline]
pub fn $fn_name(slice: &[u8], position: &mut usize) -> $int_ty {
pub fn $fn_name(decoder: &mut MemDecoder<'_>) -> $int_ty {
let mut result = 0;
let mut shift = 0;
let mut byte;

loop {
byte = slice[*position];
*position += 1;
byte = decoder.read_u8();
result |= <$int_ty>::from(byte & 0x7F) << shift;
shift += 7;

Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_serialize/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Core encoding and decoding interfaces.
#![feature(maybe_uninit_slice)]
#![feature(new_uninit)]
#![feature(allocator_api)]
#![feature(ptr_sub_ptr)]
#![cfg_attr(test, feature(test))]
#![allow(rustc::internal)]
#![deny(rustc::untranslatable_diagnostic)]
Expand Down
Loading

0 comments on commit adaac6b

Please sign in to comment.