Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use bitmask for MaskedArray mask #59410

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ci/deps/actions-310-minimum_versions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ dependencies:
- zstandard=0.19.0

- pip:
- git+https://github.com/WillAyd/pandas-bitmask.git
- adbc-driver-postgresql==0.10.0
- adbc-driver-sqlite==0.8.0
- tzdata==2022.7
1 change: 1 addition & 0 deletions ci/deps/actions-310.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ dependencies:
- zstandard>=0.19.0

- pip:
- git+https://github.com/WillAyd/pandas-bitmask.git
- adbc-driver-postgresql>=0.10.0
- adbc-driver-sqlite>=0.8.0
- tzdata>=2022.7
1 change: 1 addition & 0 deletions ci/deps/actions-311-downstream_compat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ dependencies:
- pyyaml
- py
- pip:
- git+https://github.com/WillAyd/pandas-bitmask.git
- adbc-driver-postgresql>=0.10.0
- adbc-driver-sqlite>=0.8.0
- tzdata>=2022.7
1 change: 1 addition & 0 deletions ci/deps/actions-311-numpydev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ dependencies:
- pip

- pip:
- git+https://github.com/WillAyd/pandas-bitmask.git
- "--extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
- "--pre"
- "numpy"
Expand Down
1 change: 1 addition & 0 deletions ci/deps/actions-311-pyarrownightly.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ dependencies:
- pip

- pip:
- git+https://github.com/WillAyd/pandas-bitmask.git
- "tzdata>=2022.7"
- "--extra-index-url https://pypi.fury.io/arrow-nightlies/"
- "--prefer-binary"
Expand Down
1 change: 1 addition & 0 deletions ci/deps/actions-311.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,6 @@ dependencies:
- zstandard>=0.19.0

- pip:
- git+https://github.com/WillAyd/pandas-bitmask.git
- adbc-driver-postgresql>=0.10.0
- adbc-driver-sqlite>=0.8.0
1 change: 1 addition & 0 deletions ci/deps/actions-312.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ dependencies:
- zstandard>=0.19.0

- pip:
- git+https://github.com/WillAyd/pandas-bitmask.git
- adbc-driver-postgresql>=0.10.0
- adbc-driver-sqlite>=0.8.0
- tzdata>=2022.7
1 change: 1 addition & 0 deletions ci/deps/actions-pypy-39.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,5 @@ dependencies:
- numpy
- python-dateutil
- pip:
- git+https://github.com/WillAyd/pandas-bitmask.git
- tzdata>=2022.7
1 change: 1 addition & 0 deletions ci/deps/circle-311-arm64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,5 +57,6 @@ dependencies:
- xlsxwriter>=3.0.5
- zstandard>=0.19.0
- pip:
- git+https://github.com/WillAyd/pandas-bitmask.git
- adbc-driver-postgresql>=0.8.0
- adbc-driver-sqlite>=0.8.0
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ dependencies:
- pygments # Code highlighting

- pip:
- git+https://github.com/WillAyd/pandas-bitmask.git
- adbc-driver-postgresql>=0.10.0
- adbc-driver-sqlite>=0.8.0
- typing_extensions; python_version<"3.11"
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ cdef class IndexEngine:

cdef readonly:
ndarray values
ndarray mask
object mask
HashTable mapping
bint over_size_threshold

Expand Down
3 changes: 3 additions & 0 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import warnings

import numpy as np
from pandas_mask import PandasMaskArray

from pandas._libs import (
algos,
Expand Down Expand Up @@ -1173,6 +1174,8 @@ def take(
... )
array([ 10, 10, -10])
"""
if isinstance(arr, PandasMaskArray): # TODO: implement take directly on mask
arr = np.array(arr)
if not isinstance(
arr,
(np.ndarray, ABCExtensionArray, ABCIndex, ABCSeries, ABCNumpyExtensionArray),
Expand Down
12 changes: 8 additions & 4 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import warnings

import numpy as np
from pandas_mask import PandasMaskArray

from pandas._libs import (
lib,
Expand Down Expand Up @@ -112,20 +113,23 @@ class BaseMaskedArray(OpsMixin, ExtensionArray):

# our underlying data and mask are each ndarrays
_data: np.ndarray
_mask: npt.NDArray[np.bool_]
_mask: PandasMaskArray

@classmethod
def _simple_new(cls, values: np.ndarray, mask: npt.NDArray[np.bool_]) -> Self:
result = BaseMaskedArray.__new__(cls)
result._data = values
result._mask = mask
result._mask = PandasMaskArray(mask)
return result

def __init__(
self, values: np.ndarray, mask: npt.NDArray[np.bool_], copy: bool = False
) -> None:
# values is supposed to already be validated in the subclass
if not (isinstance(mask, np.ndarray) and mask.dtype == np.bool_):
if not (
(isinstance(mask, np.ndarray) and mask.dtype == np.bool_)
or isinstance(mask, PandasMaskArray)
):
raise TypeError(
"mask should be boolean numpy array. Use "
"the 'pd.array' function instead"
Expand Down Expand Up @@ -678,7 +682,7 @@ def __arrow_array__(self, type=None):
"""
import pyarrow as pa

return pa.array(self._data, mask=self._mask, type=type)
return pa.array(self._data, mask=np.asarray(self._mask), type=type)

@property
def _hasna(self) -> bool:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ def _cython_op_ndim_compat(
# expand to 2d, dispatch, then squeeze if appropriate
values2d = values[None, :]
if mask is not None:
mask = mask[None, :]
mask = np.array(mask)[None, :]
if result_mask is not None:
result_mask = result_mask[None, :]
res = self._call_cython_op(
Expand Down
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ feedparser
pyyaml
requests
pygments
git+https://github.com/WillAyd/pandas-bitmask.git
adbc-driver-postgresql>=0.10.0
adbc-driver-sqlite>=0.8.0
typing_extensions; python_version<"3.11"
Expand Down
Loading