-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
initialize nestedframe #5
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .core import * # noqa | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we really need star-import here? It would add |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,61 @@ | ||||||
# typing.Self and "|" union syntax don't exist in Python 3.9 | ||||||
from __future__ import annotations | ||||||
|
||||||
import pandas as pd | ||||||
|
||||||
from nested_pandas.series import packer | ||||||
|
||||||
|
||||||
class NestedFrame(pd.DataFrame): | ||||||
"""A Pandas Dataframe extension with support for nested structure. | ||||||
|
||||||
See https://pandas.pydata.org/docs/development/extending.html#subclassing-pandas-data-structures | ||||||
""" | ||||||
|
||||||
# normal properties | ||||||
_metadata = ["added_property"] | ||||||
|
||||||
@property | ||||||
def _constructor(self) -> Self: # type: ignore[name-defined] # noqa: F821 | ||||||
return NestedFrame | ||||||
|
||||||
@property | ||||||
def _constructor_expanddim(self) -> Self: # type: ignore[name-defined] # noqa: F821 | ||||||
return NestedFrame | ||||||
|
||||||
@property | ||||||
def all_columns(self) -> dict: | ||||||
"""returns a dictionary of columns for each base/nested dataframe""" | ||||||
all_columns = {"base": self.columns} | ||||||
for column in self.columns: | ||||||
if hasattr(self[column], "nest"): | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would check column's dtype instead, it should be from nested_pandas import NestedDtype
if isinstance(self[column].dtype, NestedDtype): |
||||||
nest_cols = self[column].iloc[0].columns # TODO: Improve access to columns | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
all_columns[column] = nest_cols | ||||||
return all_columns | ||||||
|
||||||
@property | ||||||
def nested_columns(self) -> list: | ||||||
"""retrieves the base column names for all nested dataframes""" | ||||||
nest_cols = [] | ||||||
for column in self.columns: | ||||||
if hasattr(self[column], "nest"): | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
nest_cols.append(column) | ||||||
return nest_cols | ||||||
|
||||||
def _is_known_hierarchical_column(self, colname) -> bool: | ||||||
"""Determine whether a string is a known hierarchical column name""" | ||||||
if "." in colname: | ||||||
left, right = colname.split(".") | ||||||
if left in self.nested_columns: | ||||||
return right in self.all_columns[left] | ||||||
else: | ||||||
return False | ||||||
else: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is a matter of taste, but I believe we don't need these |
||||||
return False | ||||||
|
||||||
def add_nested(self, nested, name) -> Self: # type: ignore[name-defined] # noqa: F821 | ||||||
"""Packs a dataframe into a nested column""" | ||||||
# Add sources to objects | ||||||
packed = packer.pack_flat(nested, name=name) | ||||||
label = packed.name | ||||||
return self.assign(**{f"{label}": packed}) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
import pandas as pd | ||
from nested_pandas import NestedFrame | ||
|
||
|
||
def test_nestedframe_construction(): | ||
"""Test NestedFrame construction""" | ||
base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6]}, index=[0, 1, 2]) | ||
|
||
assert isinstance(base, NestedFrame) | ||
|
||
|
||
def test_all_columns(): | ||
"""Test the all_columns function""" | ||
|
||
base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6]}, index=[0, 1, 2]) | ||
|
||
assert list(base.all_columns.keys()) == ["base"] | ||
assert list(base.all_columns["base"]) == list(base.columns) | ||
|
||
nested = pd.DataFrame( | ||
data={"c": [0, 2, 4, 1, 4, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]}, | ||
index=[0, 0, 0, 1, 1, 1, 2, 2, 2], | ||
) | ||
|
||
base = base.add_nested(nested, "nested") | ||
|
||
assert list(base.all_columns.keys()) == ["base", "nested"] | ||
assert list(base.all_columns["nested"]) == list(nested.columns) | ||
|
||
|
||
def test_nested_columns(): | ||
"""Test that nested_columns correctly retrieves the nested base columns""" | ||
|
||
base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6]}, index=[0, 1, 2]) | ||
|
||
nested = pd.DataFrame( | ||
data={"c": [0, 2, 4, 1, 4, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]}, | ||
index=[0, 0, 0, 1, 1, 1, 2, 2, 2], | ||
) | ||
|
||
base = base.add_nested(nested, "nested") | ||
|
||
assert base.nested_columns == ["nested"] | ||
|
||
|
||
def test_is_known_hierarchical_column(): | ||
"""Test that hierarchical column labels can be identified""" | ||
|
||
base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6]}, index=[0, 1, 2]) | ||
|
||
nested = pd.DataFrame( | ||
data={"c": [0, 2, 4, 1, 4, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]}, | ||
index=[0, 0, 0, 1, 1, 1, 2, 2, 2], | ||
) | ||
|
||
base = base.add_nested(nested, "nested") | ||
|
||
assert base._is_known_hierarchical_column("nested.c") | ||
assert not base._is_known_hierarchical_column("nested.b") | ||
assert not base._is_known_hierarchical_column("base.a") | ||
|
||
|
||
def test_add_nested(): | ||
"""Test that add_nested correctly adds a nested column to the base df""" | ||
|
||
base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6]}, index=[0, 1, 2]) | ||
|
||
nested = pd.DataFrame( | ||
data={"c": [0, 2, 4, 1, 4, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]}, | ||
index=[0, 0, 0, 1, 1, 1, 2, 2, 2], | ||
) | ||
|
||
base = base.add_nested(nested, "nested") | ||
|
||
assert "nested" in base.columns | ||
assert base.nested.nest.to_flat().equals(nested) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's remove
noqa
and add to__all__
?