forked from rust-lang/rust
-
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Auto merge of rust-lang#7400 - popzxc:restrict-locales, r=Manishearth
New lint: `disallowed_script_idents` This PR implements a new lint to restrict locales that can be used in the code, as proposed in rust-lang#7376. Current concerns / unresolved questions: - ~~Mixed usage of `script` (as a Unicode term) and `locale` (as something that is easier to understand for the broad audience). I'm not sure whether these terms are fully interchangeable and whether in the current form it is more confusing than helpful.~~ `script` is now used everywhere. - ~~Having to mostly copy-paste `AllowedScript`. Probably it's not a big problem, as the list of scripts is standardized and is unlikely to change, and even if we'd stick to the `unicode_script::Script`, we'll still have to implement custom deserialization, and I don't think that it will be shorter in terms of the amount of LoC.~~ `unicode::Script` is used together with a filtering deserialize function. - Should we stick to the list of "recommended scripts" from [UAX rust-lang#31](http://www.unicode.org/reports/tr31/#Table_Recommended_Scripts) in the configuration? *Please write a short comment explaining your change (or "none" for internal only changes)* changelog: ``[`disallowed_script_idents`]`` r? `@Manishearth`
- Loading branch information
Showing
8 changed files
with
152 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
use clippy_utils::diagnostics::span_lint; | ||
use rustc_ast::ast; | ||
use rustc_data_structures::fx::FxHashSet; | ||
use rustc_lint::{EarlyContext, EarlyLintPass, Level}; | ||
use rustc_session::{declare_tool_lint, impl_lint_pass}; | ||
use unicode_script::{Script, UnicodeScript}; | ||
|
||
declare_clippy_lint! { | ||
/// **What it does:** Checks for usage of unicode scripts other than those explicitly allowed | ||
/// by the lint config. | ||
/// | ||
/// This lint doesn't take into account non-text scripts such as `Unknown` and `Linear_A`. | ||
/// It also ignores the `Common` script type. | ||
/// While configuring, be sure to use official script name [aliases] from | ||
/// [the list of supported scripts][supported_scripts]. | ||
/// | ||
/// See also: [`non_ascii_idents`]. | ||
/// | ||
/// [aliases]: http://www.unicode.org/reports/tr24/tr24-31.html#Script_Value_Aliases | ||
/// [supported_scripts]: https://www.unicode.org/iso15924/iso15924-codes.html | ||
/// | ||
/// **Why is this bad?** It may be not desired to have many different scripts for | ||
/// identifiers in the codebase. | ||
/// | ||
/// Note that if you only want to allow plain English, you might want to use | ||
/// built-in [`non_ascii_idents`] lint instead. | ||
/// | ||
/// [`non_ascii_idents`]: https://doc.rust-lang.org/rustc/lints/listing/allowed-by-default.html#non-ascii-idents | ||
/// | ||
/// **Known problems:** None. | ||
/// | ||
/// **Example:** | ||
/// ```rust | ||
/// // Assuming that `clippy.toml` contains the following line: | ||
/// // allowed-locales = ["Latin", "Cyrillic"] | ||
/// let counter = 10; // OK, latin is allowed. | ||
/// let счётчик = 10; // OK, cyrillic is allowed. | ||
/// let zähler = 10; // OK, it's still latin. | ||
/// let カウンタ = 10; // Will spawn the lint. | ||
/// ``` | ||
pub DISALLOWED_SCRIPT_IDENTS, | ||
restriction, | ||
"usage of non-allowed Unicode scripts" | ||
} | ||
|
||
#[derive(Clone, Debug)] | ||
pub struct DisallowedScriptIdents { | ||
whitelist: FxHashSet<Script>, | ||
} | ||
|
||
impl DisallowedScriptIdents { | ||
pub fn new(whitelist: &[String]) -> Self { | ||
let whitelist = whitelist | ||
.iter() | ||
.map(String::as_str) | ||
.filter_map(Script::from_full_name) | ||
.collect(); | ||
Self { whitelist } | ||
} | ||
} | ||
|
||
impl_lint_pass!(DisallowedScriptIdents => [DISALLOWED_SCRIPT_IDENTS]); | ||
|
||
impl EarlyLintPass for DisallowedScriptIdents { | ||
fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) { | ||
// Implementation is heavily inspired by the implementation of [`non_ascii_idents`] lint: | ||
// https://github.com/rust-lang/rust/blob/master/compiler/rustc_lint/src/non_ascii_idents.rs | ||
|
||
let check_disallowed_script_idents = cx.builder.lint_level(DISALLOWED_SCRIPT_IDENTS).0 != Level::Allow; | ||
if !check_disallowed_script_idents { | ||
return; | ||
} | ||
|
||
let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock(); | ||
// Sort by `Span` so that error messages make sense with respect to the | ||
// order of identifier locations in the code. | ||
let mut symbols: Vec<_> = symbols.iter().collect(); | ||
symbols.sort_unstable_by_key(|k| k.1); | ||
|
||
for (symbol, &span) in &symbols { | ||
// Note: `symbol.as_str()` is an expensive operation, thus should not be called | ||
// more than once for a single symbol. | ||
let symbol_str = symbol.as_str(); | ||
if symbol_str.is_ascii() { | ||
continue; | ||
} | ||
|
||
for c in symbol_str.chars() { | ||
// We want to iterate through all the scripts associated with this character | ||
// and check whether at least of one scripts is in the whitelist. | ||
let forbidden_script = c | ||
.script_extension() | ||
.iter() | ||
.find(|script| !self.whitelist.contains(script)); | ||
if let Some(script) = forbidden_script { | ||
span_lint( | ||
cx, | ||
DISALLOWED_SCRIPT_IDENTS, | ||
span, | ||
&format!( | ||
"identifier `{}` has a Unicode script that is not allowed by configuration: {}", | ||
symbol_str, | ||
script.full_name() | ||
), | ||
); | ||
// We don't want to spawn warning multiple times over a single identifier. | ||
break; | ||
} | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
error: error reading Clippy's configuration file `$DIR/clippy.toml`: unknown field `foobar`, expected one of `avoid-breaking-exported-api`, `msrv`, `blacklisted-names`, `cognitive-complexity-threshold`, `cyclomatic-complexity-threshold`, `doc-valid-idents`, `too-many-arguments-threshold`, `type-complexity-threshold`, `single-char-binding-names-threshold`, `too-large-for-stack`, `enum-variant-name-threshold`, `enum-variant-size-threshold`, `verbose-bit-mask-threshold`, `literal-representation-threshold`, `trivial-copy-size-limit`, `pass-by-value-size-limit`, `too-many-lines-threshold`, `array-size-threshold`, `vec-box-size-threshold`, `max-trait-bounds`, `max-struct-bools`, `max-fn-params-bools`, `warn-on-all-wildcard-imports`, `disallowed-methods`, `disallowed-types`, `unreadable-literal-lint-fractions`, `upper-case-acronyms-aggressive`, `cargo-ignore-publish`, `standard-macro-braces`, `enforced-import-renames`, `third-party` at line 5 column 1 | ||
error: error reading Clippy's configuration file `$DIR/clippy.toml`: unknown field `foobar`, expected one of `avoid-breaking-exported-api`, `msrv`, `blacklisted-names`, `cognitive-complexity-threshold`, `cyclomatic-complexity-threshold`, `doc-valid-idents`, `too-many-arguments-threshold`, `type-complexity-threshold`, `single-char-binding-names-threshold`, `too-large-for-stack`, `enum-variant-name-threshold`, `enum-variant-size-threshold`, `verbose-bit-mask-threshold`, `literal-representation-threshold`, `trivial-copy-size-limit`, `pass-by-value-size-limit`, `too-many-lines-threshold`, `array-size-threshold`, `vec-box-size-threshold`, `max-trait-bounds`, `max-struct-bools`, `max-fn-params-bools`, `warn-on-all-wildcard-imports`, `disallowed-methods`, `disallowed-types`, `unreadable-literal-lint-fractions`, `upper-case-acronyms-aggressive`, `cargo-ignore-publish`, `standard-macro-braces`, `enforced-import-renames`, `allowed-scripts`, `third-party` at line 5 column 1 | ||
|
||
error: aborting due to previous error | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#![deny(clippy::disallowed_script_idents)] | ||
#![allow(dead_code)] | ||
|
||
fn main() { | ||
let counter = 10; // OK, latin is allowed. | ||
let zähler = 10; // OK, it's still latin. | ||
|
||
let счётчик = 10; // Cyrillic is not allowed by default. | ||
let カウンタ = 10; // Same for japanese. | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
error: identifier `счётчик` has a Unicode script that is not allowed by configuration: Cyrillic | ||
--> $DIR/disallowed_script_idents.rs:8:9 | ||
| | ||
LL | let счётчик = 10; // Cyrillic is not allowed by default. | ||
| ^^^^^^^ | ||
| | ||
note: the lint level is defined here | ||
--> $DIR/disallowed_script_idents.rs:1:9 | ||
| | ||
LL | #![deny(clippy::disallowed_script_idents)] | ||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
|
||
error: identifier `カウンタ` has a Unicode script that is not allowed by configuration: Katakana | ||
--> $DIR/disallowed_script_idents.rs:9:9 | ||
| | ||
LL | let カウンタ = 10; // Same for japanese. | ||
| ^^^^^^^^ | ||
|
||
error: aborting due to 2 previous errors | ||
|