From bac1c1546101e332d56df75e19a2ac7b6f9c6541 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Sat, 16 Sep 2017 21:43:05 +0300 Subject: [PATCH] Compress "small" spans to 32 bits and intern "large" spans --- src/librustc/util/common.rs | 5 +- src/librustc_driver/profile/trace.rs | 4 +- src/libsyntax_pos/hygiene.rs | 2 +- src/libsyntax_pos/lib.rs | 65 ++++++---- src/libsyntax_pos/span_encoding.rs | 181 +++++++++++++++++++++++++++ 5 files changed, 226 insertions(+), 31 deletions(-) create mode 100644 src/libsyntax_pos/span_encoding.rs diff --git a/src/librustc/util/common.rs b/src/librustc/util/common.rs index 618a4ed331e76..9e566d2b9071f 100644 --- a/src/librustc/util/common.rs +++ b/src/librustc/util/common.rs @@ -20,7 +20,7 @@ use std::path::Path; use std::time::{Duration, Instant}; use std::sync::mpsc::{Sender}; -use syntax_pos::{Span}; +use syntax_pos::{SpanData}; use ty::maps::{QueryMsg}; use dep_graph::{DepNode}; @@ -61,7 +61,8 @@ pub enum ProfileQueriesMsg { /// end a task TaskEnd, /// begin a new query - QueryBegin(Span, QueryMsg), + /// can't use `Span` because queries are sent to other thread + QueryBegin(SpanData, QueryMsg), /// query is satisfied by using an already-known value for the given key CacheHit, /// query requires running a provider; providers may nest, permitting queries to nest. diff --git a/src/librustc_driver/profile/trace.rs b/src/librustc_driver/profile/trace.rs index f5079836c3ca4..280f3c8c79677 100644 --- a/src/librustc_driver/profile/trace.rs +++ b/src/librustc_driver/profile/trace.rs @@ -9,7 +9,7 @@ // except according to those terms. use super::*; -use syntax_pos::Span; +use syntax_pos::SpanData; use rustc::ty::maps::QueryMsg; use std::fs::File; use std::time::{Duration, Instant}; @@ -18,7 +18,7 @@ use rustc::dep_graph::{DepNode}; #[derive(Debug, Clone, Eq, PartialEq)] pub struct Query { - pub span: Span, + pub span: SpanData, pub msg: QueryMsg, } pub enum Effect { diff --git a/src/libsyntax_pos/hygiene.rs b/src/libsyntax_pos/hygiene.rs index 919804d7efd6f..4790fa0a7edc2 100644 --- a/src/libsyntax_pos/hygiene.rs +++ b/src/libsyntax_pos/hygiene.rs @@ -25,7 +25,7 @@ use std::fmt; /// A SyntaxContext represents a chain of macro expansions (represented by marks). #[derive(Clone, Copy, PartialEq, Eq, Default, PartialOrd, Ord, Hash)] -pub struct SyntaxContext(u32); +pub struct SyntaxContext(pub(super) u32); #[derive(Copy, Clone, Default)] pub struct SyntaxContextData { diff --git a/src/libsyntax_pos/lib.rs b/src/libsyntax_pos/lib.rs index 27fbca19dcc4c..34f4470c88b6b 100644 --- a/src/libsyntax_pos/lib.rs +++ b/src/libsyntax_pos/lib.rs @@ -25,11 +25,11 @@ #![feature(optin_builtin_traits)] #![allow(unused_attributes)] #![feature(specialization)] -#![feature(staged_api)] use std::borrow::Cow; use std::cell::{Cell, RefCell}; -use std::cmp; +use std::cmp::{self, Ordering}; +use std::collections::HashMap; use std::fmt; use std::hash::Hasher; use std::ops::{Add, Sub}; @@ -47,6 +47,9 @@ extern crate serialize as rustc_serialize; // used by deriving pub mod hygiene; pub use hygiene::{SyntaxContext, ExpnInfo, ExpnFormat, NameAndSpan, CompilerDesugaringKind}; +mod span_encoding; +pub use span_encoding::{Span, DUMMY_SP}; + pub mod symbol; pub type FileName = String; @@ -59,23 +62,33 @@ pub type FileName = String; /// able to use many of the functions on spans in codemap and you cannot assume /// that the length of the span = hi - lo; there may be space in the BytePos /// range between files. +/// +/// `SpanData` is public because `Span` uses a thread-local interner and can't be +/// sent to other threads, but some pieces of performance infra run in a separate thread. +/// Using `Span` is generally preferred. #[derive(Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd)] -pub struct Span { - #[unstable(feature = "rustc_private", issue = "27812")] - #[rustc_deprecated(since = "1.21", reason = "use getters/setters instead")] +pub struct SpanData { pub lo: BytePos, - #[unstable(feature = "rustc_private", issue = "27812")] - #[rustc_deprecated(since = "1.21", reason = "use getters/setters instead")] pub hi: BytePos, /// Information about where the macro came from, if this piece of /// code was created by a macro expansion. - #[unstable(feature = "rustc_private", issue = "27812")] - #[rustc_deprecated(since = "1.21", reason = "use getters/setters instead")] pub ctxt: SyntaxContext, } -#[allow(deprecated)] -pub const DUMMY_SP: Span = Span { lo: BytePos(0), hi: BytePos(0), ctxt: NO_EXPANSION }; +// The interner in thread-local, so `Span` shouldn't move between threads. +impl !Send for Span {} +impl !Sync for Span {} + +impl PartialOrd for Span { + fn partial_cmp(&self, rhs: &Self) -> Option { + PartialOrd::partial_cmp(&self.data(), &rhs.data()) + } +} +impl Ord for Span { + fn cmp(&self, rhs: &Self) -> Ordering { + Ord::cmp(&self.data(), &rhs.data()) + } +} /// A collection of spans. Spans have two orthogonal attributes: /// @@ -90,38 +103,32 @@ pub struct MultiSpan { } impl Span { - #[allow(deprecated)] - #[inline] - pub fn new(lo: BytePos, hi: BytePos, ctxt: SyntaxContext) -> Self { - if lo <= hi { Span { lo, hi, ctxt } } else { Span { lo: hi, hi: lo, ctxt } } - } - - #[allow(deprecated)] #[inline] pub fn lo(self) -> BytePos { - self.lo + self.data().lo } #[inline] pub fn with_lo(self, lo: BytePos) -> Span { - Span::new(lo, self.hi(), self.ctxt()) + let base = self.data(); + Span::new(lo, base.hi, base.ctxt) } - #[allow(deprecated)] #[inline] pub fn hi(self) -> BytePos { - self.hi + self.data().hi } #[inline] pub fn with_hi(self, hi: BytePos) -> Span { - Span::new(self.lo(), hi, self.ctxt()) + let base = self.data(); + Span::new(base.lo, hi, base.ctxt) } - #[allow(deprecated)] #[inline] pub fn ctxt(self) -> SyntaxContext { - self.ctxt + self.data().ctxt } #[inline] pub fn with_ctxt(self, ctxt: SyntaxContext) -> Span { - Span::new(self.lo(), self.hi(), ctxt) + let base = self.data(); + Span::new(base.lo, base.hi, ctxt) } /// Returns a new span representing just the end-point of this span @@ -342,6 +349,12 @@ impl fmt::Debug for Span { } } +impl fmt::Debug for SpanData { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + SPAN_DEBUG.with(|span_debug| span_debug.get()(Span::new(self.lo, self.hi, self.ctxt), f)) + } +} + impl MultiSpan { pub fn new() -> MultiSpan { MultiSpan { diff --git a/src/libsyntax_pos/span_encoding.rs b/src/libsyntax_pos/span_encoding.rs new file mode 100644 index 0000000000000..1e10d2a0ea993 --- /dev/null +++ b/src/libsyntax_pos/span_encoding.rs @@ -0,0 +1,181 @@ +// Copyright 2017 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// Spans are encoded using 2-bit tag and 4 different encoding formats for each tag. +// Three formats are used for keeping span data inline, +// the fourth one contains index into out-of-line span interner. +// The encoding formats for inline spans were obtained by optimizing over crates in rustc/libstd. +// See https://internals.rust-lang.org/t/rfc-compiler-refactoring-spans/1357/28 + +use super::*; + +/// A compressed span. +/// Contains either fields of `SpanData` inline if they are small, or index into span interner. +/// The primary goal of `Span` is to be as small as possible and fit into other structures +/// (that's why it uses `packed` as well). Decoding speed is the second priority. +/// See `SpanData` for the info on span fields in decoded representation. +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +#[repr(packed)] +pub struct Span(u32); + +/// Dummy span, both position and length are zero, syntax context is zero as well. +/// This span is kept inline and encoded with format 0. +pub const DUMMY_SP: Span = Span(0); + +impl Span { + #[inline] + pub fn new(lo: BytePos, hi: BytePos, ctxt: SyntaxContext) -> Self { + encode(&match lo <= hi { + true => SpanData { lo, hi, ctxt }, + false => SpanData { lo: hi, hi: lo, ctxt }, + }) + } + + #[inline] + pub fn data(self) -> SpanData { + decode(self) + } +} + +// Tags +const TAG_INLINE0: u32 = 0b00; +const TAG_INLINE1: u32 = 0b01; +const TAG_INLINE2: u32 = 0b10; +const TAG_INTERNED: u32 = 0b11; +const TAG_MASK: u32 = 0b11; + +// Fields indexes +const BASE_INDEX: usize = 0; +const LEN_INDEX: usize = 1; +const CTXT_INDEX: usize = 2; + +// Tag = 0b00, inline format 0. +// ----------------------------------- +// | base 31:8 | len 7:2 | tag 1:0 | +// ----------------------------------- +const INLINE0_SIZES: [u32; 3] = [24, 6, 0]; +const INLINE0_OFFSETS: [u32; 3] = [8, 2, 2]; + +// Tag = 0b01, inline format 1. +// ----------------------------------- +// | base 31:10 | len 9:2 | tag 1:0 | +// ----------------------------------- +const INLINE1_SIZES: [u32; 3] = [22, 8, 0]; +const INLINE1_OFFSETS: [u32; 3] = [10, 2, 2]; + +// Tag = 0b10, inline format 2. +// ------------------------------------------------ +// | base 31:14 | len 13:13 | ctxt 12:2 | tag 1:0 | +// ------------------------------------------------ +const INLINE2_SIZES: [u32; 3] = [18, 1, 11]; +const INLINE2_OFFSETS: [u32; 3] = [14, 13, 2]; + +// Tag = 0b11, interned format. +// ------------------------ +// | index 31:3 | tag 1:0 | +// ------------------------ +const INTERNED_INDEX_SIZE: u32 = 30; +const INTERNED_INDEX_OFFSET: u32 = 2; + +fn encode(sd: &SpanData) -> Span { + let (base, len, ctxt) = (sd.lo.0, sd.hi.0 - sd.lo.0, sd.ctxt.0); + + // Can we fit the span data into this encoding? + let fits = |sizes: [u32; 3]| { + (base >> sizes[BASE_INDEX]) == 0 && (len >> sizes[LEN_INDEX]) == 0 && + (ctxt >> sizes[CTXT_INDEX]) == 0 + }; + // Turn fields into a single `u32` value. + let compose = |offsets: [u32; 3], tag| { + (base << offsets[BASE_INDEX]) | (len << offsets[LEN_INDEX]) | + (ctxt << offsets[CTXT_INDEX]) | tag + }; + + let val = if fits(INLINE0_SIZES) { + compose(INLINE0_OFFSETS, TAG_INLINE0) + } else if fits(INLINE1_SIZES) { + compose(INLINE1_OFFSETS, TAG_INLINE1) + } else if fits(INLINE2_SIZES) { + compose(INLINE2_OFFSETS, TAG_INLINE2) + } else { + let index = with_span_interner(|interner| interner.intern(sd)); + if (index >> INTERNED_INDEX_SIZE) == 0 { + (index << INTERNED_INDEX_OFFSET) | TAG_INTERNED + } else { + panic!("too many spans in a crate"); + } + }; + Span(val) +} + +fn decode(span: Span) -> SpanData { + let val = span.0; + + // Extract a field at position `pos` having size `size`. + let extract = |pos, size| { + let mask = ((!0u32) as u64 >> (32 - size)) as u32; // Can't shift u32 by 32 + (val >> pos) & mask + }; + + let (base, len, ctxt) = match val & TAG_MASK { + TAG_INLINE0 => ( + extract(INLINE0_OFFSETS[BASE_INDEX], INLINE0_SIZES[BASE_INDEX]), + extract(INLINE0_OFFSETS[LEN_INDEX], INLINE0_SIZES[LEN_INDEX]), + extract(INLINE0_OFFSETS[CTXT_INDEX], INLINE0_SIZES[CTXT_INDEX]), + ), + TAG_INLINE1 => ( + extract(INLINE1_OFFSETS[BASE_INDEX], INLINE1_SIZES[BASE_INDEX]), + extract(INLINE1_OFFSETS[LEN_INDEX], INLINE1_SIZES[LEN_INDEX]), + extract(INLINE1_OFFSETS[CTXT_INDEX], INLINE1_SIZES[CTXT_INDEX]), + ), + TAG_INLINE2 => ( + extract(INLINE2_OFFSETS[BASE_INDEX], INLINE2_SIZES[BASE_INDEX]), + extract(INLINE2_OFFSETS[LEN_INDEX], INLINE2_SIZES[LEN_INDEX]), + extract(INLINE2_OFFSETS[CTXT_INDEX], INLINE2_SIZES[CTXT_INDEX]), + ), + TAG_INTERNED => { + let index = extract(INTERNED_INDEX_OFFSET, INTERNED_INDEX_SIZE); + return with_span_interner(|interner| *interner.get(index)); + } + _ => unreachable!() + }; + SpanData { lo: BytePos(base), hi: BytePos(base + len), ctxt: SyntaxContext(ctxt) } +} + +#[derive(Default)] +struct SpanInterner { + spans: HashMap, + span_data: Vec, +} + +impl SpanInterner { + fn intern(&mut self, span_data: &SpanData) -> u32 { + if let Some(index) = self.spans.get(span_data) { + return *index; + } + + let index = self.spans.len() as u32; + self.span_data.push(*span_data); + self.spans.insert(*span_data, index); + index + } + + fn get(&self, index: u32) -> &SpanData { + &self.span_data[index as usize] + } +} + +// If an interner exists in TLS, return it. Otherwise, prepare a fresh one. +fn with_span_interner T>(f: F) -> T { + thread_local!(static INTERNER: RefCell = { + RefCell::new(SpanInterner::default()) + }); + INTERNER.with(|interner| f(&mut *interner.borrow_mut())) +}