Skip to content

Commit

Permalink
Add nfc_normalize_idents flag
Browse files Browse the repository at this point in the history
Also fixes #6069.
  • Loading branch information
Jules-Bertholet committed Feb 14, 2024
1 parent 5805040 commit 48352e3
Show file tree
Hide file tree
Showing 16 changed files with 196 additions and 49 deletions.
42 changes: 32 additions & 10 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,12 @@ thiserror = "1.0.40"
toml = "0.7.4"
tracing = "0.1.37"
tracing-subscriber = { version = "0.3.17", features = ["env-filter"] }
unicode-normalization = "0.1.22"
unicode-properties = { version = "0.1", default-features = false, features = [
"general-category",
] }
unicode-segmentation = "1.9"
unicode-width = "0.1"
unicode-properties = { version = "0.1", default-features = false, features = ["general-category"] }

rustfmt-config_proc_macro = { version = "0.3", path = "config_proc_macro" }

Expand Down
8 changes: 8 additions & 0 deletions Configurations.md
Original file line number Diff line number Diff line change
Expand Up @@ -1069,6 +1069,14 @@ Number of lines to check for a `@generated` pragma header, starting from the top

See also [format_generated_files](#format_generated_files) link here.

## `nfc_normalize_idents`

Whether to normalize identifiers with Unicode Normalization Form C (NFC). The compiler considers identifiers with identical NFC normalizations to be interchangeable.

- **Default value**: `false`
- **Possible values**: `true`, `false`
- **Stable**: No

## `format_macro_matchers`

Format the metavariable matching patterns in macros.
Expand Down
3 changes: 3 additions & 0 deletions src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,8 @@ create_config! {
format_generated_files: bool, true, false, "Format generated files";
generated_marker_line_search_limit: usize, 5, false, "Number of lines to check for a \
`@generated` marker when `format_generated_files` is enabled";
nfc_normalize_idents: bool, false, false, "Whether to normalize identifiers \
to Unicode Normalization Form C";

// Options that can change the source code beyond whitespace/blocks (somewhat linty things)
merge_derives: bool, true, true, "Merge multiple `#[derive(...)]` into a single one";
Expand Down Expand Up @@ -683,6 +685,7 @@ version = "One"
inline_attribute_width = 0
format_generated_files = true
generated_marker_line_search_limit = 5
nfc_normalize_idents = false
merge_derives = true
use_try_shorthand = false
use_field_init_shorthand = false
Expand Down
8 changes: 4 additions & 4 deletions src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use crate::types::{rewrite_path, PathContext};
use crate::utils::{
colon_spaces, contains_skip, count_newlines, filtered_str_fits, first_line_ends_with,
inner_attributes, last_line_extendable, last_line_width, mk_sp, outer_attributes,
semicolon_for_expr, unicode_str_width, wrap_str,
rewrite_ident, semicolon_for_expr, unicode_str_width, wrap_str,
};
use crate::vertical::rewrite_with_alignment;
use crate::visitor::FmtVisitor;
Expand Down Expand Up @@ -1754,9 +1754,9 @@ pub(crate) fn rewrite_field(
if !attrs_str.is_empty() {
attrs_str.push_str(&shape.indent.to_string_with_newline(context.config));
};
let name = context.snippet(field.ident.span);
let name = rewrite_ident(context, field.ident);
if field.is_shorthand {
Some(attrs_str + name)
Some(attrs_str + &name)
} else {
let mut separator = String::from(struct_lit_field_separator(context.config));
for _ in 0..prefix_max_width.saturating_sub(name.len()) {
Expand All @@ -1770,7 +1770,7 @@ pub(crate) fn rewrite_field(
Some(ref e)
if !is_lit && e.as_str() == name && context.config.use_field_init_shorthand() =>
{
Some(attrs_str + name)
Some(attrs_str + &name)
}
Some(e) => Some(format!("{attrs_str}{name}{separator}{e}")),
None => {
Expand Down
6 changes: 3 additions & 3 deletions src/imports.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ impl UseSegment {
if name.is_empty() || name == "{{root}}" {
return None;
}
let kind = match name {
let kind = match &*name {
"self" => UseSegmentKind::Slf(None),
"super" => UseSegmentKind::Super(None),
"crate" => UseSegmentKind::Crate(None),
Expand Down Expand Up @@ -498,7 +498,7 @@ impl UseTree {
let name = if a.prefix.segments.len() == 2 && leading_modsep {
context.snippet(a.prefix.span).to_owned()
} else {
rewrite_ident(context, path_to_imported_ident(&a.prefix)).to_owned()
rewrite_ident(context, path_to_imported_ident(&a.prefix)).into_owned()
};
let alias = rename.and_then(|ident| {
if ident.name == sym::underscore_imports {
Expand All @@ -507,7 +507,7 @@ impl UseTree {
} else if ident == path_to_imported_ident(&a.prefix) {
None
} else {
Some(rewrite_ident(context, ident).to_owned())
Some(rewrite_ident(context, ident).into_owned())
}
});
let kind = match name.as_ref() {
Expand Down
22 changes: 13 additions & 9 deletions src/items.rs
Original file line number Diff line number Diff line change
Expand Up @@ -679,7 +679,7 @@ impl<'a> FmtVisitor<'a> {
self.block_indent,
Some(one_line_width),
)?,
ast::VariantData::Unit(..) => rewrite_ident(&context, field.ident).to_owned(),
ast::VariantData::Unit(..) => rewrite_ident(&context, field.ident).into_owned(),
};

let variant_body = if let Some(ref expr) = field.disr_expr {
Expand Down Expand Up @@ -1160,8 +1160,12 @@ pub(crate) fn format_trait(
let body_lo = context.snippet_provider.span_after(item.span, "{");

let shape = Shape::indented(offset, context.config).offset_left(result.len())?;
let generics_str =
rewrite_generics(context, rewrite_ident(context, item.ident), generics, shape)?;
let generics_str = rewrite_generics(
context,
&rewrite_ident(context, item.ident),
generics,
shape,
)?;
result.push_str(&generics_str);

// FIXME(#2055): rustfmt fails to format when there are comments between trait bounds.
Expand Down Expand Up @@ -1356,7 +1360,7 @@ pub(crate) fn format_trait_alias(
let alias = rewrite_ident(context, ident);
// 6 = "trait ", 2 = " ="
let g_shape = shape.offset_left(6)?.sub_width(2)?;
let generics_str = rewrite_generics(context, alias, generics, g_shape)?;
let generics_str = rewrite_generics(context, &alias, generics, g_shape)?;
let vis_str = format_visibility(context, vis);
let lhs = format!("{vis_str}trait {generics_str} =");
// 1 = ";"
Expand Down Expand Up @@ -1749,13 +1753,13 @@ fn rewrite_ty<R: Rewrite>(
let ident_str = rewrite_ident(context, ident);

if generics.params.is_empty() {
result.push_str(ident_str)
result.push_str(&ident_str)
} else {
// 2 = `= `
let g_shape = Shape::indented(indent, context.config)
.offset_left(result.len())?
.sub_width(2)?;
let generics_str = rewrite_generics(context, ident_str, generics, g_shape)?;
let generics_str = rewrite_generics(context, &ident_str, generics, g_shape)?;
result.push_str(&generics_str);
}

Expand Down Expand Up @@ -2362,7 +2366,7 @@ fn rewrite_fn_base(
let fd = fn_sig.decl;
let generics_str = rewrite_generics(
context,
rewrite_ident(context, ident),
&rewrite_ident(context, ident),
&fn_sig.generics,
shape,
)?;
Expand Down Expand Up @@ -3188,7 +3192,7 @@ fn format_header(
}
}

result.push_str(rewrite_ident(context, ident));
result.push_str(&rewrite_ident(context, ident));

result
}
Expand Down Expand Up @@ -3438,7 +3442,7 @@ pub(crate) fn rewrite_mod(
let mut result = String::with_capacity(32);
result.push_str(&*format_visibility(context, &item.vis));
result.push_str("mod ");
result.push_str(rewrite_ident(context, item.ident));
result.push_str(&rewrite_ident(context, item.ident));
result.push(';');
rewrite_attrs(context, item, &result, attrs_shape)
}
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ extern crate rustc_builtin_macros;
extern crate rustc_data_structures;
extern crate rustc_errors;
extern crate rustc_expand;
extern crate rustc_lexer;
extern crate rustc_parse;
extern crate rustc_session;
extern crate rustc_span;
Expand Down
30 changes: 20 additions & 10 deletions src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ use crate::shape::{Indent, Shape};
use crate::source_map::SpanUtils;
use crate::spanned::Spanned;
use crate::utils::{
filtered_str_fits, format_visibility, indent_next_line, is_empty_line, mk_sp,
filtered_str_fits, format_visibility, indent_next_line, is_empty_line, mk_sp, nfc_normalize,
remove_trailing_white_spaces, rewrite_ident, trim_left_preserve_layout, NodeIdExt,
};
use crate::visitor::FmtVisitor;
Expand Down Expand Up @@ -284,7 +284,7 @@ fn rewrite_macro_inner(
},
)
.map(|rw| match position {
MacroPosition::Item => format!("{};", rw),
MacroPosition::Item => format!("{rw};"),
_ => rw,
})
}
Expand Down Expand Up @@ -425,7 +425,7 @@ pub(crate) fn rewrite_macro_def(
};

result += " ";
result += rewrite_ident(context, ident);
result += &rewrite_ident(context, ident);

let multi_branch_style = def.macro_rules || parsed_def.branches.len() != 1;

Expand Down Expand Up @@ -490,6 +490,7 @@ pub(crate) fn rewrite_macro_def(
}

fn register_metavariable(
context: &RewriteContext<'_>,
map: &mut HashMap<String, String>,
result: &mut String,
name: &str,
Expand All @@ -502,14 +503,21 @@ fn register_metavariable(
new_name.push_str(name);
old_name.push_str(name);

// `$` is `NFC_Inert`, so won't get mangled
let new_name = nfc_normalize(context, &new_name).into_owned();
let old_name = nfc_normalize(context, &old_name).into_owned();

result.push_str(&new_name);
map.insert(old_name, new_name);
}

// Replaces `$foo` with `zfoo`. We must check for name overlap to ensure we
// aren't causing problems.
// This should also work for escaped `$` variables, where we leave earlier `$`s.
fn replace_names(input: &str) -> Option<(String, HashMap<String, String>)> {
fn replace_names(
context: &RewriteContext<'_>,
input: &str,
) -> Option<(String, HashMap<String, String>)> {
// Each substitution will require five or six extra bytes.
let mut result = String::with_capacity(input.len() + 64);
let mut substs = HashMap::new();
Expand All @@ -523,23 +531,23 @@ fn replace_names(input: &str) -> Option<(String, HashMap<String, String>)> {
dollar_count += 1;
} else if dollar_count == 0 {
result.push(c);
} else if !c.is_alphanumeric() && !cur_name.is_empty() {
} else if !rustc_lexer::is_id_continue(c) && !cur_name.is_empty() {
// Terminates a name following one or more dollars.
register_metavariable(&mut substs, &mut result, &cur_name, dollar_count);
register_metavariable(context, &mut substs, &mut result, &cur_name, dollar_count);

result.push(c);
dollar_count = 0;
cur_name.clear();
} else if c == '(' && cur_name.is_empty() {
// FIXME: Support macro def with repeat.
return None;
} else if c.is_alphanumeric() || c == '_' {
} else if rustc_lexer::is_id_continue(c) {
cur_name.push(c);
}
}

if !cur_name.is_empty() {
register_metavariable(&mut substs, &mut result, &cur_name, dollar_count);
register_metavariable(context, &mut substs, &mut result, &cur_name, dollar_count);
}

debug!("replace_names `{}` {:?}", result, substs);
Expand Down Expand Up @@ -655,7 +663,9 @@ impl MacroArgKind {
};

match *self {
MacroArgKind::MetaVariable(ty, ref name) => Some(format!("${name}:{ty}")),
MacroArgKind::MetaVariable(ty, ref name) => {
Some(format!("${}:{ty}", nfc_normalize(context, name)))
}
MacroArgKind::Repeat(delim_tok, ref args, ref another, ref tok) => {
let (lhs, inner, rhs) = rewrite_delimited_inner(delim_tok, args)?;
let another = another
Expand Down Expand Up @@ -1273,7 +1283,7 @@ impl MacroBranch {
// `$$`). We'll try and format like an AST node, but we'll substitute
// variables for new names with the same length first.

let (body_str, substs) = replace_names(old_body)?;
let (body_str, substs) = replace_names(context, old_body)?;

let mut config = context.config.clone();
config.set().show_parse_errors(false);
Expand Down
4 changes: 2 additions & 2 deletions src/patterns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,14 +162,14 @@ impl Rewrite for Pat {
let hi = context.snippet_provider.span_before(self.span, "@");
combine_strs_with_missing_comments(
context,
id_str,
&id_str,
&sub_pat,
mk_sp(ident.span.hi(), hi),
shape,
true,
)?
} else {
id_str.to_owned()
id_str.into_owned()
};

combine_strs_with_missing_comments(
Expand Down
Loading

0 comments on commit 48352e3

Please sign in to comment.