Skip to content

Commit

Permalink
Merge pull request #30 from lincc-frameworks/use_pandas_na
Browse files Browse the repository at this point in the history
use pandas NA
  • Loading branch information
dougbrn authored Apr 17, 2024
2 parents 1818c57 + 942f0f6 commit 3aff79e
Show file tree
Hide file tree
Showing 7 changed files with 23 additions and 111 deletions.
5 changes: 2 additions & 3 deletions src/nested_pandas/series/dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from pandas.core.arrays import ExtensionArray
from pandas.core.dtypes.base import ExtensionDtype

from nested_pandas.series.na import NA, NAType
from nested_pandas.series.utils import is_pa_type_a_list

__all__ = ["NestedDtype"]
Expand All @@ -29,9 +28,9 @@ class NestedDtype(ExtensionDtype):
"""Attributes to use as metadata for __eq__ and __hash__"""

@property
def na_value(self) -> NAType:
def na_value(self) -> Type[pd.NA]:
"""The missing value for this dtype"""
return NA
return pd.NA

type = pd.DataFrame
"""The type of the array's elements, always pd.DataFrame"""
Expand Down
3 changes: 2 additions & 1 deletion src/nested_pandas/series/ext_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,9 @@ def to_numpy(self, dtype: None = None, copy: bool = False, na_value: Any = no_de

# Hack with np.empty is the only way to force numpy to create 1-d array of objects
result = np.empty(shape=array.shape, dtype=object)

# We do copy=False here because user's 'copy' is already handled by ArrowExtensionArray.to_numpy
result[:] = [pd.DataFrame(value, copy=False) for value in array]
result[:] = [pd.DataFrame(value, copy=False) if not pd.isna(value) else pd.NA for value in array]
return result

def __setitem__(self, key, value) -> None:
Expand Down
55 changes: 0 additions & 55 deletions src/nested_pandas/series/na.py

This file was deleted.

2 changes: 2 additions & 0 deletions src/nested_pandas/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
def count_nested(df, nested, by=None, join=True) -> NestedFrame:
"""Counts the number of rows of a nested dataframe.
#TODO: Does not work when any nested dataframes are empty (NaN)
Parameters
----------
df: NestedFrame
Expand Down
16 changes: 16 additions & 0 deletions tests/nested_pandas/nestedframe/test_nestedframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,22 @@ def test_add_nested():
assert base.nested.nest.to_flat().equals(nested)


def test_add_nested_with_mismatched_index():
"""Test add_nested when index values of base are missing matches in nested"""

base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6]}, index=[0, 1, 2])

nested = pd.DataFrame(
data={"c": [0, 2, 4, 1, 4, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]},
index=[0, 0, 0, 1, 1, 1, 1, 1, 1], # no data for index value of "2"
)

base = base.add_nested(nested, "nested")

assert "nested" in base.columns
assert pd.isna(base.loc[2]["nested"])


def test_query():
"""Test that NestedFrame.query handles nested queries correctly"""

Expand Down
3 changes: 1 addition & 2 deletions tests/nested_pandas/series/test_dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import pytest
from nested_pandas.series.dtype import NestedDtype
from nested_pandas.series.ext_array import NestedExtensionArray
from nested_pandas.series.na import NA


@pytest.mark.parametrize(
Expand Down Expand Up @@ -62,7 +61,7 @@ def test_from_fields():
def test_na_value():
"""Test that NestedDtype.na_value is a singleton instance of NAType."""
dtype = NestedDtype(pa.struct([pa.field("a", pa.list_(pa.int64()))]))
assert dtype.na_value is NA
assert dtype.na_value is pd.NA


def test_fields():
Expand Down
50 changes: 0 additions & 50 deletions tests/nested_pandas/series/test_na.py

This file was deleted.

0 comments on commit 3aff79e

Please sign in to comment.