Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sort by key #7963

Merged
merged 13 commits into from
Oct 30, 2023
34 changes: 23 additions & 11 deletions crates/ruff_linter/src/rules/isort/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@ pub(crate) use categorize::categorize;
use categorize::categorize_imports;
pub use categorize::{ImportSection, ImportType};
use comments::Comment;
use itertools::Itertools;
use normalize::normalize_imports;
use order::order_imports;
use ruff_python_ast::PySourceType;
use ruff_python_codegen::Stylist;
use ruff_source_file::Locator;
use settings::Settings;
use sorting::cmp_either_import;
use sorting::module_key;
use types::EitherImport::{Import, ImportFrom};
use types::{AliasData, EitherImport, ImportBlock, TrailingComma};

Expand Down Expand Up @@ -173,17 +174,28 @@ fn format_import_block(

let imports = order_imports(import_block, settings);

let imports = {
let mut imports = imports
.import
.into_iter()
.map(Import)
.chain(imports.import_from.into_iter().map(ImportFrom))
.collect::<Vec<EitherImport>>();
if settings.force_sort_within_sections {
imports.sort_by(|import1, import2| cmp_either_import(import1, import2, settings));
};
let imports = imports
.import
.into_iter()
.map(Import)
.chain(imports.import_from.into_iter().map(ImportFrom));
let imports = if settings.force_sort_within_sections {
imports
.sorted_by_cached_key(|import| match import {
Import((alias, _)) => {
module_key(Some(alias.name), alias.asname, None, None, settings)
}
ImportFrom((import_from, _, _, aliases)) => module_key(
import_from.module,
None,
import_from.level,
aliases.first().map(|(alias, _)| (alias.name, alias.asname)),
settings,
),
})
.collect::<Vec<EitherImport>>()
} else {
imports.collect::<Vec<EitherImport>>()
};

// Add a blank line between every section.
Expand Down
42 changes: 17 additions & 25 deletions crates/ruff_linter/src/rules/isort/order.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
use std::cmp::Ordering;

use itertools::Itertools;

use super::settings::Settings;
use super::sorting::{cmp_import_from, cmp_members, cmp_modules};
use super::sorting::{member_key, module_key};
use super::types::{AliasData, CommentSet, ImportBlock, ImportFromStatement, OrderedImportBlock};

pub(crate) fn order_imports<'a>(
Expand All @@ -13,12 +11,11 @@ pub(crate) fn order_imports<'a>(
let mut ordered = OrderedImportBlock::default();

// Sort `Stmt::Import`.
ordered.import.extend(
block
.import
.into_iter()
.sorted_by(|(alias1, _), (alias2, _)| cmp_modules(alias1, alias2, settings)),
);
ordered
.import
.extend(block.import.into_iter().sorted_by_cached_key(|(alias, _)| {
module_key(Some(alias.name), alias.asname, None, None, settings)
}));

// Sort `Stmt::ImportFrom`.
ordered.import_from.extend(
Expand Down Expand Up @@ -53,27 +50,22 @@ pub(crate) fn order_imports<'a>(
trailing_comma,
aliases
.into_iter()
.sorted_by(|(alias1, _), (alias2, _)| {
cmp_members(alias1, alias2, settings)
.sorted_by_cached_key(|(alias, _)| {
member_key(alias.name, alias.asname, settings)
})
.collect::<Vec<(AliasData, CommentSet)>>(),
)
},
)
.sorted_by(
|(import_from1, _, _, aliases1), (import_from2, _, _, aliases2)| {
cmp_import_from(import_from1, import_from2, settings).then_with(|| {
match (aliases1.first(), aliases2.first()) {
(None, None) => Ordering::Equal,
(None, Some(_)) => Ordering::Less,
(Some(_), None) => Ordering::Greater,
(Some((alias1, _)), Some((alias2, _))) => {
cmp_members(alias1, alias2, settings)
}
}
})
},
),
.sorted_by_cached_key(|(import_from, _, _, aliases)| {
module_key(
import_from.module,
None,
import_from.level,
aliases.first().map(|(alias, _)| (alias.name, alias.asname)),
settings,
)
}),
);
ordered
}
228 changes: 97 additions & 131 deletions crates/ruff_linter/src/rules/isort/sorting.rs
Original file line number Diff line number Diff line change
@@ -1,171 +1,137 @@
/// See: <https://github.com/PyCQA/isort/blob/12cc5fbd67eebf92eb2213b03c07b138ae1fb448/isort/sorting.py#L13>
use natord;
use std::cmp::Ordering;
use std::collections::BTreeSet;

use ruff_python_stdlib::str;

use super::settings::{RelativeImportsOrder, Settings};
use super::types::EitherImport::{Import, ImportFrom};
use super::types::{AliasData, EitherImport, ImportFromData, Importable};

#[derive(PartialOrd, Ord, PartialEq, Eq, Copy, Clone)]
pub(crate) enum Prefix {
Constants,
Classes,
Variables,

#[derive(Debug, PartialOrd, Ord, PartialEq, Eq, Copy, Clone)]
pub(crate) enum MemberType {
Constant,
Class,
Variable,
}

fn prefix(name: &str, settings: &Settings) -> Prefix {
fn member_type(name: &str, settings: &Settings) -> MemberType {
if settings.constants.contains(name) {
// Ex) `CONSTANT`
Prefix::Constants
MemberType::Constant
} else if settings.classes.contains(name) {
// Ex) `CLASS`
Prefix::Classes
MemberType::Class
} else if settings.variables.contains(name) {
// Ex) `variable`
Prefix::Variables
MemberType::Variable
} else if name.len() > 1 && str::is_cased_uppercase(name) {
// Ex) `CONSTANT`
Prefix::Constants
MemberType::Constant
} else if name.chars().next().is_some_and(char::is_uppercase) {
// Ex) `Class`
Prefix::Classes
MemberType::Class
} else {
// Ex) `variable`
Prefix::Variables
MemberType::Variable
}
}

/// Compare two module names' by their `force-to-top`ness.
fn cmp_force_to_top(name1: &str, name2: &str, force_to_top: &BTreeSet<String>) -> Ordering {
let force_to_top1 = force_to_top.contains(name1);
let force_to_top2 = force_to_top.contains(name2);
force_to_top1.cmp(&force_to_top2).reverse()
}
#[derive(Eq, PartialEq, Debug)]
pub(crate) struct NatOrdString(String);

/// Compare two top-level modules.
pub(crate) fn cmp_modules(alias1: &AliasData, alias2: &AliasData, settings: &Settings) -> Ordering {
cmp_force_to_top(alias1.name, alias2.name, &settings.force_to_top)
.then_with(|| {
if settings.case_sensitive {
natord::compare(alias1.name, alias2.name)
} else {
natord::compare_ignore_case(alias1.name, alias2.name)
.then_with(|| natord::compare(alias1.name, alias2.name))
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this logic still being captured in the refactor?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be because both module_key and member_key return a tuple with first maybe_lower_case_name (which is Some(name.to_lowercase()) if settings.case_sensitive is false, otherwise None) and then the module/member name.

So if settings.case_sensitive is true, then we just compare the names using natord::compare, and if it's false we first compare the lowercase names with natord::compare, and then the actual names.

I guess the question is, is comparing two strings that have been turned into lowercase with natord::compare the same as comparing them directly with natord::compare_ignore_case? I looked at the source code for natord::compare_ignore_case and they're just calling to_lowercase on the chars individually, but apart from that it seems like what I did should be equivalent.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is slightly different, because they also map integers to avoid sorting them lexicographically, right? For example, to avoid putting module10 before module2.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I thought you were talking about the fact that there are two function calls (one case sensitive and one case insensitive).

The correct natrural ordering (typically module2 comes before module10) is indeed correctly captured in my refactor because I'm relying on natord::compare to implement the Ord trait for my types. Was that the question?

})
.then_with(|| match (alias1.asname, alias2.asname) {
(None, None) => Ordering::Equal,
(None, Some(_)) => Ordering::Less,
(Some(_), None) => Ordering::Greater,
(Some(asname1), Some(asname2)) => natord::compare(asname1, asname2),
})
impl Ord for NatOrdString {
fn cmp(&self, other: &Self) -> Ordering {
natord::compare(&self.0, &other.0)
}
}

/// Compare two member imports within `Stmt::ImportFrom` blocks.
pub(crate) fn cmp_members(alias1: &AliasData, alias2: &AliasData, settings: &Settings) -> Ordering {
match (alias1.name == "*", alias2.name == "*") {
(true, false) => Ordering::Less,
(false, true) => Ordering::Greater,
_ => {
if settings.order_by_type {
prefix(alias1.name, settings)
.cmp(&prefix(alias2.name, settings))
.then_with(|| cmp_modules(alias1, alias2, settings))
} else {
cmp_modules(alias1, alias2, settings)
}
}
impl PartialOrd for NatOrdString {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}

/// Compare two relative import levels.
pub(crate) fn cmp_levels(
level1: Option<u32>,
level2: Option<u32>,
relative_imports_order: RelativeImportsOrder,
) -> Ordering {
match (level1, level2) {
(None, None) => Ordering::Equal,
(None, Some(_)) => Ordering::Less,
(Some(_), None) => Ordering::Greater,
(Some(level1), Some(level2)) => match relative_imports_order {
RelativeImportsOrder::ClosestToFurthest => level1.cmp(&level2),
RelativeImportsOrder::FurthestToClosest => level2.cmp(&level1),
},
#[derive(Eq, PartialEq, Debug)]
pub(crate) struct NatOrdStr<'a>(&'a str);

impl Ord for NatOrdStr<'_> {
fn cmp(&self, other: &Self) -> Ordering {
natord::compare(self.0, other.0)
}
}

/// Compare two `Stmt::ImportFrom` blocks.
pub(crate) fn cmp_import_from(
import_from1: &ImportFromData,
import_from2: &ImportFromData,
settings: &Settings,
) -> Ordering {
cmp_levels(
import_from1.level,
import_from2.level,
settings.relative_imports_order,
)
.then_with(|| {
cmp_force_to_top(
&import_from1.module_name(),
&import_from2.module_name(),
&settings.force_to_top,
)
})
.then_with(|| match (&import_from1.module, import_from2.module) {
(None, None) => Ordering::Equal,
(None, Some(_)) => Ordering::Less,
(Some(_), None) => Ordering::Greater,
(Some(module1), Some(module2)) => {
if settings.case_sensitive {
natord::compare(module1, module2)
} else {
natord::compare_ignore_case(module1, module2)
}
}
})
impl PartialOrd for NatOrdStr<'_> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}

/// Compare an import to an import-from.
fn cmp_import_import_from(
import: &AliasData,
import_from: &ImportFromData,
type ModuleKey<'a> = (
i64,
Option<bool>,
Option<NatOrdString>,
Option<NatOrdStr<'a>>,
Option<NatOrdStr<'a>>,
Option<MemberKey<'a>>,
);

pub(crate) fn module_key<'a>(
name: Option<&'a str>,
asname: Option<&'a str>,
level: Option<u32>,
first_alias: Option<(&'a str, Option<&'a str>)>,
settings: &Settings,
) -> Ordering {
cmp_force_to_top(
import.name,
&import_from.module_name(),
&settings.force_to_top,
) -> ModuleKey<'a> {
let level = level
.map(i64::from)
.map(|l| match settings.relative_imports_order {
RelativeImportsOrder::ClosestToFurthest => l,
RelativeImportsOrder::FurthestToClosest => -l,
})
.unwrap_or_default();
let force_to_top = name.map(|name| !settings.force_to_top.contains(name)); // `false` < `true` so we get forced to top first
let maybe_lower_case_name = name
.and_then(|name| (!settings.case_sensitive).then_some(NatOrdString(name.to_lowercase())));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we avoid allocating here (name.to_lowercase()) in the event that the string is already lowercase?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess to do that we would need to use a Cow. What we could do is have a single NatOrdStr struct like this:

use std::borrow::Cow;

struct NatOrdStr<'a>(Cow<'a, str>);

whose contents are either owned or borrowed.

I haven't played around with Cows much, so I don't know if the overhead is low enough to make it worthwhile to avoid that allocation in case the string is already lowercase, what do you think? If you think it is I'll gladly tweak my implementation :) but in that case I think it's better to use that Cow-based struct everywhere rather than just for maybe_lower_case_name.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's worth it given that these are gonna lower-cased in the majority of cases.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just pushed the modification, I had it ready just in case :)

let module_name = name.map(NatOrdStr);
let asname = asname.map(NatOrdStr);
let first_alias = first_alias.map(|(name, asname)| member_key(name, asname, settings));

(
level,
force_to_top,
maybe_lower_case_name,
module_name,
asname,
first_alias,
)
.then_with(|| {
if settings.case_sensitive {
natord::compare(import.name, import_from.module.unwrap_or_default())
} else {
natord::compare_ignore_case(import.name, import_from.module.unwrap_or_default())
}
})
}

/// Compare two [`EitherImport`] enums which may be [`Import`] or [`ImportFrom`]
/// structs.
pub(crate) fn cmp_either_import(
a: &EitherImport,
b: &EitherImport,
type MemberKey<'a> = (
bool,
Option<MemberType>,
Option<NatOrdString>,
NatOrdStr<'a>,
Option<NatOrdStr<'a>>,
);

pub(crate) fn member_key<'a>(
name: &'a str,
asname: Option<&'a str>,
settings: &Settings,
) -> Ordering {
match (a, b) {
(Import((alias1, _)), Import((alias2, _))) => cmp_modules(alias1, alias2, settings),
(ImportFrom((import_from, ..)), Import((alias, _))) => {
cmp_import_import_from(alias, import_from, settings).reverse()
}
(Import((alias, _)), ImportFrom((import_from, ..))) => {
cmp_import_import_from(alias, import_from, settings)
}
(ImportFrom((import_from1, ..)), ImportFrom((import_from2, ..))) => {
cmp_import_from(import_from1, import_from2, settings)
}
}
) -> MemberKey<'a> {
let not_star_import = name != "*"; // `false` < `true` so we get star imports first
let member_type = settings
.order_by_type
.then_some(member_type(name, settings));
let maybe_lower_case_name =
(!settings.case_sensitive).then_some(NatOrdString(name.to_lowercase()));
let module_name = NatOrdStr(name);
let asname = asname.map(NatOrdStr);

(
not_star_import,
member_type,
maybe_lower_case_name,
module_name,
asname,
)
}
1 change: 1 addition & 0 deletions crates/ruff_linter/src/rules/isort/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ type ImportFrom<'a> = (
Vec<AliasDataWithComments<'a>>,
);

#[derive(Debug)]
pub(crate) enum EitherImport<'a> {
Import(Import<'a>),
ImportFrom(ImportFrom<'a>),
Expand Down
Loading