Skip to content

Commit

Permalink
Merge branch 'main' into 07-21-perf_semantic_use_atom_instead_of_comp…
Browse files Browse the repository at this point in the history
…actstr_for_unresolved_references
  • Loading branch information
Dunqing committed Jul 22, 2024
2 parents 053297b + 40f9356 commit ca52f77
Show file tree
Hide file tree
Showing 9 changed files with 166 additions and 13 deletions.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ language-tags = "0.3.2"
tsify = "0.4.5"
wasm-bindgen = "0.2.92"
serde-wasm-bindgen = "0.6.5"
handlebars = "5.1.2"
handlebars = "6.0.0"
base64 = "0.22.1"
compact_str = "0.8.0"
console = "0.15.8"
Expand Down
38 changes: 37 additions & 1 deletion crates/oxc_semantic/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use crate::{
binder::Binder,
checker,
class::ClassTableBuilder,
counter::Counter,
diagnostics::redeclaration,
jsdoc::JSDocBuilder,
label::LabelBuilder,
Expand Down Expand Up @@ -178,16 +179,51 @@ impl<'a> SemanticBuilder<'a> {
let scope_id = self.scope.add_root_scope(AstNodeId::DUMMY, ScopeFlags::Top);
program.scope_id.set(Some(scope_id));
} else {
// Count the number of nodes, scopes, symbols, and references.
// Use these counts to reserve sufficient capacity in `AstNodes`, `ScopeTree`
// and `SymbolTable` to store them.
// This means that as we traverse the AST and fill up these structures with data,
// they never need to grow and reallocate - which is an expensive operation as it
// involves copying all the memory from the old allocation to the new one.
// For large source files, these structures are very large, so growth is very costly
// as it involves copying massive chunks of memory.
// Avoiding this growth produces up to 30% perf boost on our benchmarks.
// TODO: It would be even more efficient to calculate counts in parser to avoid
// this extra AST traversal.
let mut counter = Counter::default();
counter.visit_program(program);
self.nodes.reserve(counter.nodes_count);
self.scope.reserve(counter.scopes_count);
self.symbols.reserve(counter.symbols_count, counter.references_count);

// Visit AST to generate scopes tree etc
self.visit_program(program);

// Check that `Counter` got accurate counts
debug_assert_eq!(self.nodes.len(), counter.nodes_count);
debug_assert_eq!(self.scope.len(), counter.scopes_count);
debug_assert_eq!(self.symbols.references.len(), counter.references_count);
// `Counter` may overestimate number of symbols, because multiple `BindingIdentifier`s
// can result in only a single symbol.
// e.g. `var x; var x;` = 2 x `BindingIdentifier` but 1 x symbol.
// This is not a big problem - allocating a `Vec` with excess capacity is cheap.
// It's allocating with *not enough* capacity which is costly, as then the `Vec`
// will grow and reallocate.
debug_assert!(self.symbols.len() <= counter.symbols_count);

// Checking syntax error on module record requires scope information from the previous AST pass
if self.check_syntax_error {
checker::check_module_record(&self);
}
}

debug_assert_eq!(self.unresolved_references.scope_depth(), 1);
self.scope.root_unresolved_references = self.unresolved_references.into_root();
self.scope.root_unresolved_references = self
.unresolved_references
.into_root()
.into_iter()
.map(|(k, v)| (k.into(), v))
.collect();

let jsdoc = if self.build_jsdoc { self.jsdoc.build() } else { JSDocFinder::default() };

Expand Down
90 changes: 90 additions & 0 deletions crates/oxc_semantic/src/counter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
//! Visitor to count nodes, scopes, symbols and references in AST.
//! These counts can be used to pre-allocate sufficient capacity in `AstNodes`,
//! `ScopeTree`, and `SymbolTable` to store info for all these items.

use std::cell::Cell;

use oxc_ast::{
ast::{
BindingIdentifier, IdentifierReference, JSXElementName, JSXMemberExpressionObject,
TSEnumMemberName, TSModuleDeclarationName,
},
visit::walk::{walk_ts_enum_member_name, walk_ts_module_declaration_name},
AstKind, Visit,
};
use oxc_syntax::scope::{ScopeFlags, ScopeId};

#[allow(clippy::struct_field_names)]
#[derive(Default, Debug)]
pub struct Counter {
pub nodes_count: usize,
pub scopes_count: usize,
pub symbols_count: usize,
pub references_count: usize,
}

impl<'a> Visit<'a> for Counter {
#[inline]
fn enter_node(&mut self, _: AstKind<'a>) {
self.nodes_count += 1;
}
#[inline]
fn enter_scope(&mut self, _: ScopeFlags, _: &Cell<Option<ScopeId>>) {
self.scopes_count += 1;
}

#[inline]
fn visit_binding_identifier(&mut self, _: &BindingIdentifier<'a>) {
self.nodes_count += 1;
self.symbols_count += 1;
}

#[inline]
fn visit_identifier_reference(&mut self, _: &IdentifierReference<'a>) {
self.nodes_count += 1;
self.references_count += 1;
}

#[inline]
fn visit_jsx_member_expression_object(&mut self, it: &JSXMemberExpressionObject<'a>) {
self.nodes_count += 1;
match it {
JSXMemberExpressionObject::MemberExpression(expr) => {
self.visit_jsx_member_expression(expr);
}
JSXMemberExpressionObject::Identifier(_) => {
self.nodes_count += 1;
self.references_count += 1;
}
}
}

#[inline]
fn visit_jsx_element_name(&mut self, it: &JSXElementName<'a>) {
self.nodes_count += 1;
match it {
JSXElementName::Identifier(ident) => {
self.nodes_count += 1;
if ident.name.chars().next().is_some_and(char::is_uppercase) {
self.references_count += 1;
}
}
JSXElementName::NamespacedName(name) => self.visit_jsx_namespaced_name(name),
JSXElementName::MemberExpression(expr) => self.visit_jsx_member_expression(expr),
}
}

#[inline]
fn visit_ts_enum_member_name(&mut self, it: &TSEnumMemberName<'a>) {
if !it.is_expression() {
self.symbols_count += 1;
}
walk_ts_enum_member_name(self, it);
}

#[inline]
fn visit_ts_module_declaration_name(&mut self, it: &TSModuleDeclarationName<'a>) {
self.symbols_count += 1;
walk_ts_module_declaration_name(self, it);
}
}
1 change: 1 addition & 0 deletions crates/oxc_semantic/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ mod binder;
mod builder;
mod checker;
mod class;
mod counter;
mod diagnostics;
mod jsdoc;
mod label;
Expand Down
5 changes: 5 additions & 0 deletions crates/oxc_semantic/src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,11 @@ impl<'a> AstNodes<'a> {
self.nodes.push(node);
ast_node_id
}

pub fn reserve(&mut self, additional: usize) {
self.nodes.reserve(additional);
self.parent_ids.reserve(additional);
}
}

#[derive(Debug)]
Expand Down
8 changes: 8 additions & 0 deletions crates/oxc_semantic/src/scope.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,4 +236,12 @@ impl ScopeTree {
pub fn remove_binding(&mut self, scope_id: ScopeId, name: &CompactStr) {
self.bindings[scope_id].shift_remove(name);
}

pub fn reserve(&mut self, additional: usize) {
self.parent_ids.reserve(additional);
self.child_ids.reserve(additional);
self.flags.reserve(additional);
self.bindings.reserve(additional);
self.node_ids.reserve(additional);
}
}
12 changes: 12 additions & 0 deletions crates/oxc_semantic/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,4 +193,16 @@ impl SymbolTable {
_ => false,
}
}

pub fn reserve(&mut self, additional_symbols: usize, additional_references: usize) {
self.spans.reserve(additional_symbols);
self.names.reserve(additional_symbols);
self.flags.reserve(additional_symbols);
self.scope_ids.reserve(additional_symbols);
self.declarations.reserve(additional_symbols);
self.resolved_references.reserve(additional_symbols);
self.redeclare_variables.reserve(additional_symbols);

self.references.reserve(additional_references);
}
}
19 changes: 10 additions & 9 deletions crates/oxc_semantic/src/unresolved_stack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,18 @@ use assert_unchecked::assert_unchecked;
use oxc_span::Atom;
use rustc_hash::FxHashMap;

use crate::scope::{UnresolvedReference, UnresolvedReferences as ScopeUnresolvedReferences};
use crate::scope::UnresolvedReference;

type UnresolvedReferences<'a> = FxHashMap<Atom<'a>, Vec<UnresolvedReference>>;
/// The difference with Scope's `UnresolvedReferences` is that this type uses Atom as the key. its clone is very cheap!
type TempUnresolvedReferences<'a> = FxHashMap<Atom<'a>, Vec<UnresolvedReference>>;

// Stack used to accumulate unresolved refs while traversing scopes.
// Indexed by scope depth. We recycle `UnresolvedReferences` instances during traversal
// to reduce allocations, so the stack grows to maximum scope depth, but never shrinks.
// See: <https://github.com/oxc-project/oxc/issues/4169>
// This stack abstraction uses the invariant that stack only grows to avoid bounds checks.
pub(crate) struct UnresolvedReferencesStack<'a> {
stack: Vec<UnresolvedReferences<'a>>,
stack: Vec<TempUnresolvedReferences<'a>>,
/// Current scope depth.
/// 0 is global scope. 1 is `Program`.
/// Incremented on entering a scope, and decremented on exit.
Expand All @@ -35,7 +36,7 @@ impl<'a> UnresolvedReferencesStack<'a> {

pub(crate) fn new() -> Self {
let mut stack = vec![];
stack.resize_with(Self::INITIAL_SIZE, UnresolvedReferences::default);
stack.resize_with(Self::INITIAL_SIZE, TempUnresolvedReferences::default);
Self { stack, current_scope_depth: Self::INITIAL_DEPTH }
}

Expand All @@ -44,7 +45,7 @@ impl<'a> UnresolvedReferencesStack<'a> {

// Grow stack if required to ensure `self.stack[self.depth]` is in bounds
if self.stack.len() <= self.current_scope_depth {
self.stack.push(UnresolvedReferences::default());
self.stack.push(TempUnresolvedReferences::default());
}
}

Expand All @@ -61,7 +62,7 @@ impl<'a> UnresolvedReferencesStack<'a> {
}

/// Get unresolved references hash map for current scope
pub(crate) fn current_mut(&mut self) -> &mut UnresolvedReferences<'a> {
pub(crate) fn current_mut(&mut self) -> &mut TempUnresolvedReferences<'a> {
// SAFETY: `stack.len() > current_scope_depth` initially.
// Thereafter, `stack` never shrinks, only grows.
// `current_scope_depth` is only increased in `increment_scope_depth`,
Expand All @@ -73,7 +74,7 @@ impl<'a> UnresolvedReferencesStack<'a> {
/// Get unresolved references hash maps for current scope, and parent scope
pub(crate) fn current_and_parent_mut(
&mut self,
) -> (&mut UnresolvedReferences<'a>, &mut UnresolvedReferences<'a>) {
) -> (&mut TempUnresolvedReferences<'a>, &mut TempUnresolvedReferences<'a>) {
// Assert invariants to remove bounds checks in code below.
// https://godbolt.org/z/vv5Wo5csv
// SAFETY: `current_scope_depth` starts at 1, and is only decremented
Expand All @@ -91,10 +92,10 @@ impl<'a> UnresolvedReferencesStack<'a> {
(current, parent)
}

pub(crate) fn into_root(self) -> ScopeUnresolvedReferences {
pub(crate) fn into_root(self) -> TempUnresolvedReferences<'a> {
// SAFETY: Stack starts with a non-zero size and never shrinks.
// This assertion removes bounds check in `.next()`.
unsafe { assert_unchecked!(!self.stack.is_empty()) };
self.stack.into_iter().next().unwrap().into_iter().map(|(k, v)| (k.into(), v)).collect()
self.stack.into_iter().next().unwrap()
}
}

0 comments on commit ca52f77

Please sign in to comment.