From f1d435bf644288064609c326e8385a0f2bbef984 Mon Sep 17 00:00:00 2001 From: chielP Date: Thu, 18 Apr 2024 20:12:33 +0200 Subject: [PATCH] preserve order --- .../chunked_array/logical/categorical/ops/full.rs | 9 +++++++-- crates/polars-core/src/series/ops/null.rs | 10 +++++++--- py-polars/tests/unit/datatypes/test_categorical.py | 14 ++++++++++++++ 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/crates/polars-core/src/chunked_array/logical/categorical/ops/full.rs b/crates/polars-core/src/chunked_array/logical/categorical/ops/full.rs index eaebea346bb7..959717155ce3 100644 --- a/crates/polars-core/src/chunked_array/logical/categorical/ops/full.rs +++ b/crates/polars-core/src/chunked_array/logical/categorical/ops/full.rs @@ -1,7 +1,12 @@ use super::*; impl CategoricalChunked { - pub fn full_null(name: &str, is_enum: bool, length: usize) -> CategoricalChunked { + pub fn full_null( + name: &str, + is_enum: bool, + length: usize, + ordering: CategoricalOrdering, + ) -> CategoricalChunked { let cats = UInt32Chunked::full_null(name, length); unsafe { @@ -9,7 +14,7 @@ impl CategoricalChunked { cats, Arc::new(RevMapping::default()), is_enum, - Default::default(), + ordering, ) } } diff --git a/crates/polars-core/src/series/ops/null.rs b/crates/polars-core/src/series/ops/null.rs index ad2b8e2a221f..42d200ddcb56 100644 --- a/crates/polars-core/src/series/ops/null.rs +++ b/crates/polars-core/src/series/ops/null.rs @@ -12,9 +12,13 @@ impl Series { ArrayChunked::full_null_with_dtype(name, size, inner_dtype, *width).into_series() }, #[cfg(feature = "dtype-categorical")] - dt @ (DataType::Categorical(rev_map, _) | DataType::Enum(rev_map, _)) => { - let mut ca = - CategoricalChunked::full_null(name, matches!(dt, DataType::Enum(_, _)), size); + dt @ (DataType::Categorical(rev_map, ord) | DataType::Enum(rev_map, ord)) => { + let mut ca = CategoricalChunked::full_null( + name, + matches!(dt, DataType::Enum(_, _)), + size, + *ord, + ); // ensure we keep the rev-map of a cleared series if let Some(rev_map) = rev_map { unsafe { ca.set_rev_map(rev_map.clone(), false) } diff --git a/py-polars/tests/unit/datatypes/test_categorical.py b/py-polars/tests/unit/datatypes/test_categorical.py index 3ffabdc02d17..020a2bc2e10c 100644 --- a/py-polars/tests/unit/datatypes/test_categorical.py +++ b/py-polars/tests/unit/datatypes/test_categorical.py @@ -812,3 +812,17 @@ def test_cast_from_cat_to_numeric() -> None: s = pl.Series(["1", "2", "3"], dtype=pl.Categorical) assert s.cast(pl.UInt8).sum() == 6 + + +def test_cat_preserve_lexical_ordering_on_clear() -> None: + s = pl.Series("a", ["a", "b"], dtype=pl.Categorical(ordering="lexical")) + s2 = s.clear() + assert s.dtype == s2.dtype + + +def test_cat_preserve_lexical_ordering_on_concat() -> None: + dtype = pl.Categorical(ordering="lexical") + + df = pl.DataFrame({"x": ["b", "a", "c"]}).with_columns(pl.col("x").cast(dtype)) + df2 = pl.concat([df, df]) + assert df2["x"].dtype == dtype