Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

initialize nestedframe #5

Merged
merged 2 commits into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/nested_pandas/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from .example_module import greetings, meaning
from .nestedframe import NestedFrame

# Import for registering
from .series.accessor import NestSeriesAccessor # noqa: F401
from .series.dtype import NestedDtype

__all__ = ["greetings", "meaning", "NestedDtype"]
__all__ = ["greetings", "meaning", "NestedDtype", "NestedFrame"]
1 change: 1 addition & 0 deletions src/nested_pandas/nestedframe/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .core import NestedFrame # noqa
60 changes: 60 additions & 0 deletions src/nested_pandas/nestedframe/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# typing.Self and "|" union syntax don't exist in Python 3.9
from __future__ import annotations

import pandas as pd

from nested_pandas.series import packer
from nested_pandas.series.dtype import NestedDtype


class NestedFrame(pd.DataFrame):
"""A Pandas Dataframe extension with support for nested structure.

See https://pandas.pydata.org/docs/development/extending.html#subclassing-pandas-data-structures
"""

# normal properties
_metadata = ["added_property"]

@property
def _constructor(self) -> Self: # type: ignore[name-defined] # noqa: F821
return NestedFrame

@property
def _constructor_expanddim(self) -> Self: # type: ignore[name-defined] # noqa: F821
return NestedFrame

@property
def all_columns(self) -> dict:
"""returns a dictionary of columns for each base/nested dataframe"""
all_columns = {"base": self.columns}
for column in self.columns:
if isinstance(self[column].dtype, NestedDtype):
nest_cols = self[column].nest.fields
all_columns[column] = nest_cols
return all_columns

@property
def nested_columns(self) -> list:
"""retrieves the base column names for all nested dataframes"""
nest_cols = []
for column in self.columns:
if isinstance(self[column].dtype, NestedDtype):
nest_cols.append(column)
return nest_cols

def _is_known_hierarchical_column(self, colname) -> bool:
"""Determine whether a string is a known hierarchical column name"""
if "." in colname:
left, right = colname.split(".")
if left in self.nested_columns:
return right in self.all_columns[left]
return False
return False

def add_nested(self, nested, name) -> Self: # type: ignore[name-defined] # noqa: F821
"""Packs a dataframe into a nested column"""
# Add sources to objects
packed = packer.pack_flat(nested, name=name)
label = packed.name
return self.assign(**{f"{label}": packed})
76 changes: 76 additions & 0 deletions tests/nested_pandas/nestedframe/test_nestedframe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import pandas as pd
from nested_pandas import NestedFrame


def test_nestedframe_construction():
"""Test NestedFrame construction"""
base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6]}, index=[0, 1, 2])

assert isinstance(base, NestedFrame)


def test_all_columns():
"""Test the all_columns function"""

base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6]}, index=[0, 1, 2])

assert list(base.all_columns.keys()) == ["base"]
assert list(base.all_columns["base"]) == list(base.columns)

nested = pd.DataFrame(
data={"c": [0, 2, 4, 1, 4, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]},
index=[0, 0, 0, 1, 1, 1, 2, 2, 2],
)

base = base.add_nested(nested, "nested")

assert list(base.all_columns.keys()) == ["base", "nested"]
assert list(base.all_columns["nested"]) == list(nested.columns)


def test_nested_columns():
"""Test that nested_columns correctly retrieves the nested base columns"""

base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6]}, index=[0, 1, 2])

nested = pd.DataFrame(
data={"c": [0, 2, 4, 1, 4, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]},
index=[0, 0, 0, 1, 1, 1, 2, 2, 2],
)

base = base.add_nested(nested, "nested")

assert base.nested_columns == ["nested"]


def test_is_known_hierarchical_column():
"""Test that hierarchical column labels can be identified"""

base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6]}, index=[0, 1, 2])

nested = pd.DataFrame(
data={"c": [0, 2, 4, 1, 4, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]},
index=[0, 0, 0, 1, 1, 1, 2, 2, 2],
)

base = base.add_nested(nested, "nested")

assert base._is_known_hierarchical_column("nested.c")
assert not base._is_known_hierarchical_column("nested.b")
assert not base._is_known_hierarchical_column("base.a")


def test_add_nested():
"""Test that add_nested correctly adds a nested column to the base df"""

base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6]}, index=[0, 1, 2])

nested = pd.DataFrame(
data={"c": [0, 2, 4, 1, 4, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]},
index=[0, 0, 0, 1, 1, 1, 2, 2, 2],
)

base = base.add_nested(nested, "nested")

assert "nested" in base.columns
assert base.nested.nest.to_flat().equals(nested)
Loading