Skip to content

Commit

Permalink
feat(rust, python): Expr.cat.get_categories expression (#9869)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Jul 14, 2023
1 parent 7b0527c commit bae44a0
Show file tree
Hide file tree
Showing 9 changed files with 84 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,19 @@ impl RevMapping {
!self.is_global()
}

/// Get the length of the [`RevMapping`]
pub fn len(&self) -> usize {
/// Get the categories in this RevMapping
pub fn get_categories(&self) -> &Utf8Array<i64> {
match self {
Self::Global(_, a, _) => a.len(),
Self::Local(a) => a.len(),
Self::Global(_, a, _) => a,
Self::Local(a) => a,
}
}

/// Get the length of the [`RevMapping`]
pub fn len(&self) -> usize {
self.get_categories().len()
}

/// Categorical to str
pub fn get(&self, idx: u32) -> &str {
match self {
Expand Down
5 changes: 5 additions & 0 deletions polars/polars-lazy/polars-plan/src/dsl/cat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,9 @@ impl CategoricalNameSpace {
self.0
.map_private(CategoricalFunction::SetOrdering { lexical }.into())
}

pub fn get_categories(self) -> Expr {
self.0
.map_private(CategoricalFunction::GetCategories.into())
}
}
17 changes: 16 additions & 1 deletion polars/polars-lazy/polars-plan/src/dsl/function_expr/cat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,16 @@ use crate::map;
#[derive(Clone, PartialEq, Debug, Eq, Hash)]
pub enum CategoricalFunction {
SetOrdering { lexical: bool },
GetCategories,
}

impl CategoricalFunction {
pub(super) fn get_field(&self, mapper: FieldsMapper) -> PolarsResult<Field> {
mapper.with_dtype(DataType::Boolean)
use CategoricalFunction::*;
match self {
SetOrdering { .. } => mapper.with_same_dtype(),
GetCategories => mapper.with_dtype(DataType::Utf8),
}
}
}

Expand All @@ -18,6 +23,7 @@ impl Display for CategoricalFunction {
use CategoricalFunction::*;
let s = match self {
SetOrdering { .. } => "set_ordering",
GetCategories => "get_categories",
};
write!(f, "{s}")
}
Expand All @@ -28,6 +34,7 @@ impl From<CategoricalFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
use CategoricalFunction::*;
match func {
SetOrdering { lexical } => map!(set_ordering, lexical),
GetCategories => map!(get_categories),
}
}
}
Expand All @@ -43,3 +50,11 @@ fn set_ordering(s: &Series, lexical: bool) -> PolarsResult<Series> {
ca.set_lexical_sorted(lexical);
Ok(ca.into_series())
}

fn get_categories(s: &Series) -> PolarsResult<Series> {
// categorical check
let ca = s.categorical()?;
let DataType::Categorical(Some(rev_map)) = ca.dtype() else { unreachable!() };
let arr = rev_map.get_categories().clone().boxed();
Series::try_from((ca.name(), arr))
}
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/expressions/categories.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ The following methods are available under the `expr.cat` attribute.
:toctree: api/
:template: autosummary/accessor_method.rst

Expr.cat.get_categories
Expr.cat.set_ordering
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/series/categories.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ The following methods are available under the `Series.cat` attribute.
:toctree: api/
:template: autosummary/accessor_method.rst

Series.cat.get_categories
Series.cat.set_ordering
24 changes: 24 additions & 0 deletions py-polars/polars/expr/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,27 @@ def set_ordering(self, ordering: CategoricalOrdering) -> Expr:
"""
return wrap_expr(self._pyexpr.cat_set_ordering(ordering))

def get_categories(self) -> Expr:
"""
Get the categories stored in this data type.
Examples
--------
>>> df = pl.Series(
... "cats", ["foo", "bar", "foo", "foo", "ham"], dtype=pl.Categorical
... ).to_frame()
>>> df.select(pl.col("cats").cat.get_categories())
shape: (3, 1)
┌──────┐
│ cats │
│ --- │
│ str │
╞══════╡
│ foo │
│ bar │
│ ham │
└──────┘
"""
return wrap_expr(self._pyexpr.cat_get_categories())
18 changes: 18 additions & 0 deletions py-polars/polars/series/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,21 @@ def set_ordering(self, ordering: CategoricalOrdering) -> Series:
└──────┴──────┘
"""

def get_categories(self) -> Series:
"""
Get the categories stored in this data type.
Examples
--------
>>> s = pl.Series(["foo", "bar", "foo", "foo", "ham"], dtype=pl.Categorical)
>>> s.cat.get_categories()
shape: (3,)
Series: '' [str]
[
"foo"
"bar"
"ham"
]
"""
4 changes: 4 additions & 0 deletions py-polars/src/expr/categorical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,8 @@ impl PyExpr {
fn cat_set_ordering(&self, ordering: Wrap<CategoricalOrdering>) -> Self {
self.inner.clone().cat().set_ordering(ordering.0).into()
}

fn cat_get_categories(&self) -> Self {
self.inner.clone().cat().get_categories().into()
}
}
6 changes: 6 additions & 0 deletions py-polars/tests/unit/namespaces/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,9 @@ def test_sort_categoricals_6014() -> None:
assert out.to_dict(False) == {"key": ["bbb", "aaa", "ccc"]}
out = df2.sort("key")
assert out.to_dict(False) == {"key": ["aaa", "bbb", "ccc"]}


def test_categorical_get_categories() -> None:
assert pl.Series(
"cats", ["foo", "bar", "foo", "foo", "ham"], dtype=pl.Categorical
).cat.get_categories().to_list() == ["foo", "bar", "ham"]

0 comments on commit bae44a0

Please sign in to comment.