Skip to content

Commit

Permalink
Auto merge of rust-lang#96135 - petrochenkov:doclink6, r=GuillaumeGomez
Browse files Browse the repository at this point in the history
rustdoc: Optimize and refactor doc link resolution

One more subset of rust-lang#94857 that should bring perf improvements rather than regressions + a couple more optimizations on top of it.
It's better to read individual commits and their descriptions to understand the changes.
The `may_have_doc_links` optimization is not *very* useful here, but it's much more important for rust-lang#94857.

Closes rust-lang#96079
  • Loading branch information
bors committed Apr 20, 2022
2 parents 0034bbc + ca5c752 commit d39864d
Show file tree
Hide file tree
Showing 17 changed files with 275 additions and 134 deletions.
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2894,6 +2894,7 @@ dependencies = [
name = "proc_macro"
version = "0.0.0"
dependencies = [
"core",
"std",
]

Expand Down
5 changes: 5 additions & 0 deletions compiler/rustc_ast/src/attr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use crate::token::{self, CommentKind, Token};
use crate::tokenstream::{AttrAnnotatedTokenStream, AttrAnnotatedTokenTree};
use crate::tokenstream::{DelimSpan, Spacing, TokenTree, TreeAndSpacing};
use crate::tokenstream::{LazyTokenStream, TokenStream};
use crate::util::comments;

use rustc_index::bit_set::GrowableBitSet;
use rustc_span::source_map::BytePos;
Expand Down Expand Up @@ -262,6 +263,10 @@ impl Attribute {
}
}

pub fn may_have_doc_links(&self) -> bool {
self.doc_str().map_or(false, |s| comments::may_have_doc_links(s.as_str()))
}

pub fn get_normal_item(&self) -> &AttrItem {
match self.kind {
AttrKind::Normal(ref item, _) => item,
Expand Down
8 changes: 8 additions & 0 deletions compiler/rustc_ast/src/util/comments.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@ pub struct Comment {
pub pos: BytePos,
}

/// A fast conservative estimate on whether the string can contain documentation links.
/// A pair of square brackets `[]` must exist in the string, but we only search for the
/// opening bracket because brackets always go in pairs in practice.
#[inline]
pub fn may_have_doc_links(s: &str) -> bool {
s.contains('[')
}

/// Makes a doc string more presentable to users.
/// Used by rustdoc and perhaps other tools, but not by rustc.
pub fn beautify_doc_string(data: Symbol, kind: CommentKind) -> Symbol {
Expand Down
4 changes: 4 additions & 0 deletions compiler/rustc_metadata/src/rmeta/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1744,6 +1744,10 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
adjustments: generator_data.adjustments,
})
}

fn get_may_have_doc_links(self, index: DefIndex) -> bool {
self.root.tables.may_have_doc_links.get(self, index).is_some()
}
}

impl CrateMetadata {
Expand Down
4 changes: 4 additions & 0 deletions compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,10 @@ impl CStore {
) -> impl Iterator<Item = DefId> + '_ {
self.get_crate_data(cnum).get_all_incoherent_impls()
}

pub fn may_have_doc_links_untracked(&self, def_id: DefId) -> bool {
self.get_crate_data(def_id.krate).get_may_have_doc_links(def_id.index)
}
}

impl CrateStore for CStore {
Expand Down
14 changes: 11 additions & 3 deletions compiler/rustc_metadata/src/rmeta/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -977,6 +977,14 @@ fn should_encode_generics(def_kind: DefKind) -> bool {
}

impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
fn encode_attrs(&mut self, def_id: DefId) {
let attrs = self.tcx.get_attrs(def_id);
record!(self.tables.attributes[def_id] <- attrs);
if attrs.iter().any(|attr| attr.may_have_doc_links()) {
self.tables.may_have_doc_links.set(def_id.index, ());
}
}

fn encode_def_ids(&mut self) {
if self.is_proc_macro {
return;
Expand All @@ -989,7 +997,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
let Some(def_kind) = def_kind else { continue };
self.tables.opt_def_kind.set(def_id.index, def_kind);
record!(self.tables.def_span[def_id] <- tcx.def_span(def_id));
record!(self.tables.attributes[def_id] <- tcx.get_attrs(def_id));
self.encode_attrs(def_id);
record!(self.tables.expn_that_defined[def_id] <- self.tcx.expn_that_defined(def_id));
if should_encode_visibility(def_kind) {
record!(self.tables.visibility[def_id] <- self.tcx.visibility(def_id));
Expand Down Expand Up @@ -1651,7 +1659,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {

self.tables.opt_def_kind.set(LOCAL_CRATE.as_def_id().index, DefKind::Mod);
record!(self.tables.def_span[LOCAL_CRATE.as_def_id()] <- tcx.def_span(LOCAL_CRATE.as_def_id()));
record!(self.tables.attributes[LOCAL_CRATE.as_def_id()] <- tcx.get_attrs(LOCAL_CRATE.as_def_id()));
self.encode_attrs(LOCAL_CRATE.as_def_id());
record!(self.tables.visibility[LOCAL_CRATE.as_def_id()] <- tcx.visibility(LOCAL_CRATE.as_def_id()));
if let Some(stability) = stability {
record!(self.tables.lookup_stability[LOCAL_CRATE.as_def_id()] <- stability);
Expand Down Expand Up @@ -1692,7 +1700,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
let def_id = id.to_def_id();
self.tables.opt_def_kind.set(def_id.index, DefKind::Macro(macro_kind));
record!(self.tables.kind[def_id] <- EntryKind::ProcMacro(macro_kind));
record!(self.tables.attributes[def_id] <- attrs);
self.encode_attrs(def_id);
record!(self.tables.def_keys[def_id] <- def_key);
record!(self.tables.def_ident_span[def_id] <- span);
record!(self.tables.def_span[def_id] <- span);
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_metadata/src/rmeta/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,7 @@ define_tables! {
def_path_hashes: Table<DefIndex, DefPathHash>,
proc_macro_quoted_spans: Table<usize, Lazy<Span>>,
generator_diagnostic_data: Table<DefIndex, Lazy<GeneratorDiagnosticData<'tcx>>>,
may_have_doc_links: Table<DefIndex, ()>,
}

#[derive(Copy, Clone, MetadataEncodable, MetadataDecodable)]
Expand Down
14 changes: 14 additions & 0 deletions compiler/rustc_metadata/src/rmeta/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,20 @@ impl FixedSizeEncoding for Option<RawDefId> {
}
}

impl FixedSizeEncoding for Option<()> {
type ByteArray = [u8; 1];

#[inline]
fn from_bytes(b: &[u8; 1]) -> Self {
(b[0] != 0).then(|| ())
}

#[inline]
fn write_to_bytes(self, b: &mut [u8; 1]) {
b[0] = self.is_some() as u8
}
}

// NOTE(eddyb) there could be an impl for `usize`, which would enable a more
// generic `Lazy<T>` impl, but in the general case we might not need / want to
// fit every `usize` in `u32`.
Expand Down
4 changes: 4 additions & 0 deletions library/proc_macro/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,7 @@ edition = "2021"

[dependencies]
std = { path = "../std" }
# Workaround: when documenting this crate rustdoc will try to load crate named
# `core` when resolving doc links. Without this line a different `core` will be
# loaded from sysroot causing duplicate lang items and other similar errors.
core = { path = "../core" }
70 changes: 32 additions & 38 deletions src/librustdoc/clean/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1089,35 +1089,35 @@ impl Attributes {
attrs: &[ast::Attribute],
additional_attrs: Option<(&[ast::Attribute], DefId)>,
) -> Attributes {
let mut doc_strings: Vec<DocFragment> = vec![];
let clean_attr = |(attr, parent_module): (&ast::Attribute, Option<DefId>)| {
if let Some((value, kind)) = attr.doc_str_and_comment_kind() {
trace!("got doc_str={:?}", value);
let value = beautify_doc_string(value, kind);
// Additional documentation should be shown before the original documentation.
let attrs1 = additional_attrs
.into_iter()
.flat_map(|(attrs, def_id)| attrs.iter().map(move |attr| (attr, Some(def_id))));
let attrs2 = attrs.iter().map(|attr| (attr, None));
Attributes::from_ast_iter(attrs1.chain(attrs2), false)
}

crate fn from_ast_iter<'a>(
attrs: impl Iterator<Item = (&'a ast::Attribute, Option<DefId>)>,
doc_only: bool,
) -> Attributes {
let mut doc_strings = Vec::new();
let mut other_attrs = Vec::new();
for (attr, parent_module) in attrs {
if let Some((doc_str, comment_kind)) = attr.doc_str_and_comment_kind() {
trace!("got doc_str={doc_str:?}");
let doc = beautify_doc_string(doc_str, comment_kind);
let kind = if attr.is_doc_comment() {
DocFragmentKind::SugaredDoc
} else {
DocFragmentKind::RawDoc
};

let frag =
DocFragment { span: attr.span, doc: value, kind, parent_module, indent: 0 };

doc_strings.push(frag);

None
} else {
Some(attr.clone())
let fragment = DocFragment { span: attr.span, doc, kind, parent_module, indent: 0 };
doc_strings.push(fragment);
} else if !doc_only {
other_attrs.push(attr.clone());
}
};

// Additional documentation should be shown before the original documentation
let other_attrs = additional_attrs
.into_iter()
.flat_map(|(attrs, id)| attrs.iter().map(move |attr| (attr, Some(id))))
.chain(attrs.iter().map(|attr| (attr, None)))
.filter_map(clean_attr)
.collect();
}

Attributes { doc_strings, other_attrs }
}
Expand All @@ -1138,23 +1138,17 @@ impl Attributes {
}

/// Return the doc-comments on this item, grouped by the module they came from.
///
/// The module can be different if this is a re-export with added documentation.
crate fn collapsed_doc_value_by_module_level(&self) -> FxHashMap<Option<DefId>, String> {
let mut ret = FxHashMap::default();
if self.doc_strings.len() == 0 {
return ret;
}
let last_index = self.doc_strings.len() - 1;

for (i, new_frag) in self.doc_strings.iter().enumerate() {
let out = ret.entry(new_frag.parent_module).or_default();
add_doc_fragment(out, new_frag);
if i == last_index {
out.pop();
}
///
/// The last newline is not trimmed so the produced strings are reusable between
/// early and late doc link resolution regardless of their position.
crate fn prepare_to_doc_link_resolution(&self) -> FxHashMap<Option<DefId>, String> {
let mut res = FxHashMap::default();
for fragment in &self.doc_strings {
let out_str = res.entry(fragment.parent_module).or_default();
add_doc_fragment(out_str, fragment);
}
ret
res
}

/// Finds all `doc` attributes as NameValues and returns their corresponding values, joined
Expand Down
5 changes: 4 additions & 1 deletion src/librustdoc/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use rustc_data_structures::sync::{self, Lrc};
use rustc_errors::emitter::{Emitter, EmitterWriter};
use rustc_errors::json::JsonEmitter;
use rustc_feature::UnstableFeatures;
use rustc_hir::def::Res;
use rustc_hir::def::{Namespace, Res};
use rustc_hir::def_id::{DefId, DefIdMap, LocalDefId};
use rustc_hir::intravisit::{self, Visitor};
use rustc_hir::{HirId, Path, TraitCandidate};
Expand All @@ -29,11 +29,14 @@ use crate::clean::inline::build_external_trait;
use crate::clean::{self, ItemId, TraitWithExtraInfo};
use crate::config::{Options as RustdocOptions, OutputFormat, RenderOptions};
use crate::formats::cache::Cache;
use crate::passes::collect_intra_doc_links::PreprocessedMarkdownLink;
use crate::passes::{self, Condition::*};

crate use rustc_session::config::{DebuggingOptions, Input, Options};

crate struct ResolverCaches {
crate markdown_links: Option<FxHashMap<String, Vec<PreprocessedMarkdownLink>>>,
crate doc_link_resolutions: FxHashMap<(Symbol, Namespace, DefId), Option<Res<NodeId>>>,
/// Traits in scope for a given module.
/// See `collect_intra_doc_links::traits_implemented_by` for more details.
crate traits_in_scope: DefIdMap<Vec<TraitCandidate>>,
Expand Down
10 changes: 6 additions & 4 deletions src/librustdoc/html/markdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1255,7 +1255,7 @@ crate struct MarkdownLink {
pub range: Range<usize>,
}

crate fn markdown_links(md: &str) -> Vec<MarkdownLink> {
crate fn markdown_links<R>(md: &str, filter_map: impl Fn(MarkdownLink) -> Option<R>) -> Vec<R> {
if md.is_empty() {
return vec![];
}
Expand Down Expand Up @@ -1295,11 +1295,12 @@ crate fn markdown_links(md: &str) -> Vec<MarkdownLink> {

let mut push = |link: BrokenLink<'_>| {
let span = span_for_link(&link.reference, link.span);
links.borrow_mut().push(MarkdownLink {
filter_map(MarkdownLink {
kind: LinkType::ShortcutUnknown,
link: link.reference.to_string(),
range: span,
});
})
.map(|link| links.borrow_mut().push(link));
None
};
let p = Parser::new_with_broken_link_callback(md, main_body_opts(), Some(&mut push))
Expand All @@ -1314,7 +1315,8 @@ crate fn markdown_links(md: &str) -> Vec<MarkdownLink> {
if let Event::Start(Tag::Link(kind, dest, _)) = ev.0 {
debug!("found link: {dest}");
let span = span_for_link(&dest, ev.1);
links.borrow_mut().push(MarkdownLink { kind, link: dest.into_string(), range: span });
filter_map(MarkdownLink { kind, link: dest.into_string(), range: span })
.map(|link| links.borrow_mut().push(link));
}
}

Expand Down
Loading

0 comments on commit d39864d

Please sign in to comment.