Skip to content

Commit

Permalink
perf(rust, python): Faster is_sorted when no flag set (pola-rs#9777)
Browse files Browse the repository at this point in the history
  • Loading branch information
magarick authored and c-peters committed Jul 14, 2023
1 parent b243fd9 commit c69cf4a
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 8 deletions.
30 changes: 24 additions & 6 deletions polars/polars-ops/src/series/ops/various.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ pub trait SeriesMethods: SeriesSealed {
}
}

fn is_sorted(&self, options: SortOptions) -> bool {
fn is_sorted(&self, options: SortOptions) -> PolarsResult<bool> {
let s = self.as_series();

// fast paths
Expand All @@ -50,12 +50,30 @@ pub trait SeriesMethods: SeriesSealed {
&& !options.nulls_last
&& matches!(s.is_sorted_flag(), IsSorted::Ascending))
{
return true;
return Ok(true);
}
let nc = s.null_count();
let slen = s.len() - nc - 1; // Number of comparisons we might have to do
if nc == s.len() {
// All nulls is all equal
return Ok(true);
}
if nc > 0 {
let nulls = s.chunks().iter().flat_map(|c| c.validity().unwrap());
let mut npairs = nulls.clone().zip(nulls.skip(1));
// A null never precedes (follows) a non-null iff all nulls are at the end (beginning)
if (options.nulls_last && npairs.any(|(a, b)| !a && b)) || npairs.any(|(a, b)| a && !b)
{
return Ok(false);
}
}
// Compare adjacent elements with no-copy slices that don't include any nulls
let offset = !options.nulls_last as i64 * nc as i64;
let (s1, s2) = (s.slice(offset, slen), s.slice(offset + 1, slen));
match options.descending {
true => Ok(Series::gt_eq(&s1, &s2)?.all()),
false => Ok(Series::lt_eq(&s1, &s2)?.all()),
}

// TODO! optimize
let out = s.sort_with(options);
out.eq(s)
}
}

Expand Down
4 changes: 2 additions & 2 deletions py-polars/src/series/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -651,13 +651,13 @@ impl PySeries {
})
}

fn is_sorted(&self, descending: bool) -> bool {
fn is_sorted(&self, descending: bool) -> PyResult<bool> {
let options = SortOptions {
descending,
nulls_last: descending,
multithreaded: true,
};
self.series.is_sorted(options)
Ok(self.series.is_sorted(options).map_err(PyPolarsErr::from)?)
}

fn clear(&self) -> Self {
Expand Down

0 comments on commit c69cf4a

Please sign in to comment.