Skip to content

Commit

Permalink
Change API a bit, align formatting with pandas
Browse files Browse the repository at this point in the history
Signed-off-by: Vasily Litvinov <[email protected]>
  • Loading branch information
vnlitvinov committed Apr 1, 2022
1 parent 63ba1e7 commit 825b6d7
Showing 1 changed file with 44 additions and 5 deletions.
49 changes: 44 additions & 5 deletions protocol/dataframe_protocol.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
from typing import Tuple, Optional, Dict, Any, Iterable, Sequence, TypedDict
from abc import (
ABC,
abstractmethod,
)
import enum
from abc import ABC, abstractmethod
from typing import (
Any,
Dict,
Iterable,
Optional,
Sequence,
Tuple,
TypedDict,
)


class DlpackDeviceType(enum.IntEnum):
Expand Down Expand Up @@ -89,6 +100,16 @@ class ColumnBuffers(TypedDict):
offsets: Optional[Tuple["Buffer", Any]]


class CategoricalDescription(TypedDict):
# whether the ordering of dictionary indices is semantically meaningful
is_ordered: bool
# whether a dictionary-style mapping of categorical values to other objects exists
is_dictionary: bool
# Python-level only (e.g. ``{int: str}``).
# None if not a dictionary-style categorical.
mapping: Optional[dict]


class Buffer(ABC):
"""
Data in the buffer is guaranteed to be contiguous in memory.
Expand Down Expand Up @@ -191,7 +212,7 @@ class Column(ABC):

@property
@abstractmethod
def size(self) -> Optional[int]:
def size(self) -> int:
"""
Size of the column, in elements.
Expand Down Expand Up @@ -246,15 +267,15 @@ def dtype(self) -> Tuple[DtypeKind, int, str, str]:

@property
@abstractmethod
def describe_categorical(self) -> Tuple[bool, bool, Optional[dict]]:
def describe_categorical(self) -> CategoricalDescription:
"""
If the dtype is categorical, there are two options:
- There are only values in the data buffer.
- There is a separate dictionary-style encoding for categorical values.
Raises TypeError if the dtype is not categorical
Returns the description on how to interpret the data buffer:
Returns the dictionary with description on how to interpret the data buffer:
- "is_ordered" : bool, whether the ordering of dictionary indices is
semantically meaningful.
- "is_dictionary" : bool, whether a dictionary-style mapping of
Expand Down Expand Up @@ -363,6 +384,24 @@ class DataFrame(ABC):

version = 0 # version of the protocol

@abstractmethod
def __dataframe__(
self, nan_as_null: bool = False, allow_copy: bool = True
) -> "DataFrame":
"""
Construct a new exchange object, potentially changing the parameters.
``nan_as_null`` is a keyword intended for the consumer to tell the
producer to overwrite null values in the data with ``NaN`` (or ``NaT``).
It is intended for cases where the consumer does not support the bit
mask or byte mask that is the producer's native representation.
``allow_copy`` is a keyword that defines whether or not the library is
allowed to make a copy of the data. For example, copying data would be
necessary if a library supports strided buffers, given that this protocol
specifies contiguous buffers.
"""
pass

@property
@abstractmethod
def metadata(self) -> Dict[str, Any]:
Expand Down

0 comments on commit 825b6d7

Please sign in to comment.