From a543516ea4e3f14e56fb6b04cce79e203ebab517 Mon Sep 17 00:00:00 2001 From: roife Date: Fri, 19 Apr 2024 14:47:48 +0800 Subject: [PATCH 1/2] fix: handle escaped chars in doc comments --- crates/hir-def/src/attr.rs | 6 ++- crates/hir-def/src/nameres/collector.rs | 13 +++++-- crates/hir-expand/src/attrs.rs | 49 ++++++++++++++++++++++++- crates/ide-db/src/documentation.rs | 13 ++++--- 4 files changed, 69 insertions(+), 12 deletions(-) diff --git a/crates/hir-def/src/attr.rs b/crates/hir-def/src/attr.rs index f4f78ce6a4d68..d9eeffd7983bf 100644 --- a/crates/hir-def/src/attr.rs +++ b/crates/hir-def/src/attr.rs @@ -5,7 +5,7 @@ pub mod builtin; #[cfg(test)] mod tests; -use std::{hash::Hash, ops, slice::Iter as SliceIter}; +use std::{borrow::Cow, hash::Hash, ops, slice::Iter as SliceIter}; use base_db::CrateId; use cfg::{CfgExpr, CfgOptions}; @@ -573,6 +573,10 @@ impl<'attr> AttrQuery<'attr> { self.attrs().find_map(|attr| attr.string_value()) } + pub fn string_value_unescape(self) -> Option> { + self.attrs().find_map(|attr| attr.string_value_unescape()) + } + pub fn exists(self) -> bool { self.attrs().next().is_some() } diff --git a/crates/hir-def/src/nameres/collector.rs b/crates/hir-def/src/nameres/collector.rs index 0a74123abbc51..0a6cd0fe9ed5f 100644 --- a/crates/hir-def/src/nameres/collector.rs +++ b/crates/hir-def/src/nameres/collector.rs @@ -1917,7 +1917,7 @@ impl ModCollector<'_, '_> { } fn collect_module(&mut self, module_id: FileItemTreeId, attrs: &Attrs) { - let path_attr = attrs.by_key("path").string_value(); + let path_attr = attrs.by_key("path").string_value_unescape(); let is_macro_use = attrs.by_key("macro_use").exists(); let module = &self.item_tree[module_id]; match &module.kind { @@ -1931,7 +1931,8 @@ impl ModCollector<'_, '_> { module_id, ); - let Some(mod_dir) = self.mod_dir.descend_into_definition(&module.name, path_attr) + let Some(mod_dir) = + self.mod_dir.descend_into_definition(&module.name, path_attr.as_deref()) else { return; }; @@ -1952,8 +1953,12 @@ impl ModCollector<'_, '_> { ModKind::Outline => { let ast_id = AstId::new(self.file_id(), module.ast_id); let db = self.def_collector.db; - match self.mod_dir.resolve_declaration(db, self.file_id(), &module.name, path_attr) - { + match self.mod_dir.resolve_declaration( + db, + self.file_id(), + &module.name, + path_attr.as_deref(), + ) { Ok((file_id, is_mod_rs, mod_dir)) => { let item_tree = db.file_item_tree(file_id.into()); let krate = self.def_collector.def_map.krate; diff --git a/crates/hir-expand/src/attrs.rs b/crates/hir-expand/src/attrs.rs index f1540498f2664..7782b0fc81fcd 100644 --- a/crates/hir-expand/src/attrs.rs +++ b/crates/hir-expand/src/attrs.rs @@ -1,5 +1,5 @@ //! A higher level attributes based on TokenTree, with also some shortcuts. -use std::{fmt, ops}; +use std::{borrow::Cow, fmt, ops}; use base_db::CrateId; use cfg::CfgExpr; @@ -297,6 +297,20 @@ impl Attr { } } + pub fn string_value_unescape(&self) -> Option> { + match self.input.as_deref()? { + AttrInput::Literal(it) => match it.text.strip_prefix('r') { + Some(it) => { + it.trim_matches('#').strip_prefix('"')?.strip_suffix('"').map(Cow::Borrowed) + } + None => { + it.text.strip_prefix('"')?.strip_suffix('"').and_then(unescape).map(Cow::Owned) + } + }, + _ => None, + } + } + /// #[path(ident)] pub fn single_ident_value(&self) -> Option<&tt::Ident> { match self.input.as_deref()? { @@ -346,6 +360,39 @@ impl Attr { } } +fn unescape(s: &str) -> Option { + let mut res = String::with_capacity(s.len()); + let mut chars = s.chars(); + + while let Some(c) = chars.next() { + if c == '\\' { + match chars.next()? { + 'n' => res.push('\n'), + 'r' => res.push('\r'), + 't' => res.push('\t'), + '\\' => res.push('\\'), + '\'' => res.push('\''), + '"' => res.push('"'), + '0' => res.push('\0'), + 'x' => { + let hex = chars.by_ref().take(2).collect::(); + let c = u8::from_str_radix(&hex, 16).ok()?; + res.push(c as char); + } + 'u' => { + let hex = chars.by_ref().take(4).collect::(); + let c = u32::from_str_radix(&hex, 16).ok()?; + res.push(char::from_u32(c)?); + } + _ => return None, + } + } else { + res.push(c); + } + } + Some(res) +} + pub fn collect_attrs( owner: &dyn ast::HasAttrs, ) -> impl Iterator)> { diff --git a/crates/ide-db/src/documentation.rs b/crates/ide-db/src/documentation.rs index 72ca354365e7b..58e77b95c3292 100644 --- a/crates/ide-db/src/documentation.rs +++ b/crates/ide-db/src/documentation.rs @@ -91,8 +91,10 @@ pub fn docs_with_rangemap( db: &dyn DefDatabase, attrs: &AttrsWithOwner, ) -> Option<(Documentation, DocsRangeMap)> { - let docs = - attrs.by_key("doc").attrs().filter_map(|attr| attr.string_value().map(|s| (s, attr.id))); + let docs = attrs + .by_key("doc") + .attrs() + .filter_map(|attr| attr.string_value_unescape().map(|s| (s, attr.id))); let indent = doc_indent(attrs); let mut buf = String::new(); let mut mapping = Vec::new(); @@ -132,7 +134,7 @@ pub fn docs_with_rangemap( } pub fn docs_from_attrs(attrs: &hir::Attrs) -> Option { - let docs = attrs.by_key("doc").attrs().filter_map(|attr| attr.string_value()); + let docs = attrs.by_key("doc").attrs().filter_map(|attr| attr.string_value_unescape()); let indent = doc_indent(attrs); let mut buf = String::new(); for doc in docs { @@ -270,10 +272,9 @@ fn doc_indent(attrs: &hir::Attrs) -> usize { attrs .by_key("doc") .attrs() - .filter_map(|attr| attr.string_value()) + .filter_map(|attr| attr.string_value()) // no need to use unescape version here .flat_map(|s| s.lines()) - .filter(|line| !line.chars().all(|c| c.is_whitespace())) - .map(|line| line.chars().take_while(|c| c.is_whitespace()).count()) + .filter_map(|line| line.chars().position(|c| !c.is_whitespace())) .min() .unwrap_or(0) } From 3e232bb78a362cb33420c329e7d17438987a9fb9 Mon Sep 17 00:00:00 2001 From: roife Date: Fri, 19 Apr 2024 16:42:07 +0800 Subject: [PATCH 2/2] fix: replace unescape fn with the one in ra-ap-rustc_lexer --- crates/hir-expand/src/attrs.rs | 64 ++++++++++++++++------------------ crates/syntax/src/lib.rs | 1 + 2 files changed, 31 insertions(+), 34 deletions(-) diff --git a/crates/hir-expand/src/attrs.rs b/crates/hir-expand/src/attrs.rs index 7782b0fc81fcd..f8bf88d83cd9c 100644 --- a/crates/hir-expand/src/attrs.rs +++ b/crates/hir-expand/src/attrs.rs @@ -8,6 +8,7 @@ use intern::Interned; use mbe::{syntax_node_to_token_tree, DelimiterKind, Punct}; use smallvec::{smallvec, SmallVec}; use span::{Span, SyntaxContextId}; +use syntax::unescape; use syntax::{ast, format_smolstr, match_ast, AstNode, AstToken, SmolStr, SyntaxNode}; use triomphe::ThinArc; @@ -54,8 +55,7 @@ impl RawAttrs { Attr { id, input: Some(Interned::new(AttrInput::Literal(tt::Literal { - // FIXME: Escape quotes from comment content - text: SmolStr::new(format_smolstr!("\"{doc}\"",)), + text: SmolStr::new(format_smolstr!("\"{}\"", Self::escape_chars(doc))), span, }))), path: Interned::new(ModPath::from(crate::name!(doc))), @@ -74,6 +74,10 @@ impl RawAttrs { RawAttrs { entries } } + fn escape_chars(s: &str) -> String { + s.replace('\\', r#"\\"#).replace('"', r#"\""#) + } + pub fn from_attrs_owner( db: &dyn ExpandDatabase, owner: InFile<&dyn ast::HasAttrs>, @@ -303,9 +307,7 @@ impl Attr { Some(it) => { it.trim_matches('#').strip_prefix('"')?.strip_suffix('"').map(Cow::Borrowed) } - None => { - it.text.strip_prefix('"')?.strip_suffix('"').and_then(unescape).map(Cow::Owned) - } + None => it.text.strip_prefix('"')?.strip_suffix('"').and_then(unescape), }, _ => None, } @@ -360,37 +362,31 @@ impl Attr { } } -fn unescape(s: &str) -> Option { - let mut res = String::with_capacity(s.len()); - let mut chars = s.chars(); - - while let Some(c) = chars.next() { - if c == '\\' { - match chars.next()? { - 'n' => res.push('\n'), - 'r' => res.push('\r'), - 't' => res.push('\t'), - '\\' => res.push('\\'), - '\'' => res.push('\''), - '"' => res.push('"'), - '0' => res.push('\0'), - 'x' => { - let hex = chars.by_ref().take(2).collect::(); - let c = u8::from_str_radix(&hex, 16).ok()?; - res.push(c as char); - } - 'u' => { - let hex = chars.by_ref().take(4).collect::(); - let c = u32::from_str_radix(&hex, 16).ok()?; - res.push(char::from_u32(c)?); - } - _ => return None, - } - } else { - res.push(c); +fn unescape(s: &str) -> Option> { + let mut buf = String::new(); + let mut prev_end = 0; + let mut has_error = false; + unescape::unescape_unicode(s, unescape::Mode::Str, &mut |char_range, unescaped_char| match ( + unescaped_char, + buf.capacity() == 0, + ) { + (Ok(c), false) => buf.push(c), + (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => { + prev_end = char_range.end + } + (Ok(c), true) => { + buf.reserve_exact(s.len()); + buf.push_str(&s[..prev_end]); + buf.push(c); } + (Err(_), _) => has_error = true, + }); + + match (has_error, buf.capacity() == 0) { + (true, _) => None, + (false, false) => Some(Cow::Owned(buf)), + (false, true) => Some(Cow::Borrowed(s)), } - Some(res) } pub fn collect_attrs( diff --git a/crates/syntax/src/lib.rs b/crates/syntax/src/lib.rs index e7bbf936dc149..3a9ebafe87dc1 100644 --- a/crates/syntax/src/lib.rs +++ b/crates/syntax/src/lib.rs @@ -65,6 +65,7 @@ pub use rowan::{ api::Preorder, Direction, GreenNode, NodeOrToken, SyntaxText, TextRange, TextSize, TokenAtOffset, WalkEvent, }; +pub use rustc_lexer::unescape; pub use smol_str::{format_smolstr, SmolStr}; /// `Parse` is the result of the parsing: a syntax tree and a collection of