Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: add from_dataframe and DataFrame #331

Merged
merged 2 commits into from
Sep 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pandas-stubs/api/interchange/__init__.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from pandas.core.interchange.dataframe_protocol import DataFrame as DataFrame
from pandas.core.interchange.from_dataframe import from_dataframe as from_dataframe
4 changes: 4 additions & 0 deletions pandas-stubs/core/frame.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ from pandas.core.indexing import (
_IndexSliceTuple,
_LocIndexer,
)
from pandas.core.interchange.dataframe_protocol import DataFrame as DataFrameXchg
from pandas.core.resample import Resampler
from pandas.core.series import Series
from pandas.core.window import (
Expand Down Expand Up @@ -199,6 +200,9 @@ class DataFrame(NDFrame, OpsMixin):
dtype=...,
copy: _bool = ...,
) -> DataFrame: ...
def __dataframe__(
self, nan_as_null: bool = ..., allow_copy: bool = ...
) -> DataFrameXchg: ...
@property
def axes(self) -> list[Index]: ...
@property
Expand Down
Empty file.
117 changes: 117 additions & 0 deletions pandas-stubs/core/interchange/dataframe_protocol.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import abc
from abc import (
ABC,
abstractmethod,
)
import enum
from typing import (
Any,
Iterable,
Sequence,
TypedDict,
)

class DlpackDeviceType(enum.IntEnum):
CPU: int
CUDA: int
CPU_PINNED: int
OPENCL: int
VULKAN: int
METAL: int
VPI: int
ROCM: int

class DtypeKind(enum.IntEnum):
INT: int
UINT: int
FLOAT: int
BOOL: int
STRING: int
DATETIME: int
CATEGORICAL: int

class ColumnNullType(enum.IntEnum):
NON_NULLABLE: int
USE_NAN: int
USE_SENTINEL: int
USE_BITMASK: int
USE_BYTEMASK: int

class ColumnBuffers(TypedDict):
data: tuple[Buffer, Any]
validity: tuple[Buffer, Any] | None
offsets: tuple[Buffer, Any] | None

class CategoricalDescription(TypedDict):
is_ordered: bool
is_dictionary: bool
categories: Column | None
Dr-Irv marked this conversation as resolved.
Show resolved Hide resolved

class Buffer(ABC, metaclass=abc.ABCMeta):
@property
@abstractmethod
def bufsize(self) -> int: ...
@property
@abstractmethod
def ptr(self) -> int: ...
@abstractmethod
def __dlpack__(self): ...
@abstractmethod
def __dlpack_device__(self) -> tuple[DlpackDeviceType, int | None]: ...
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at https://data-apis.org/dataframe-protocol/latest/API.html, I don't think the second element of the tuple can be None

Copy link
Contributor Author

@bashtage bashtage Sep 27, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


class Column(ABC, metaclass=abc.ABCMeta):
@property
@abstractmethod
def size(self) -> int: ...
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the spec type might also be wrong here. The text indicates that the size is required, either the full DF size or the chunk size if a single chunk.

@property
@abstractmethod
def offset(self) -> int: ...
@property
@abstractmethod
def dtype(self) -> tuple[DtypeKind, int, str, str]: ...
@property
@abstractmethod
def describe_categorical(self) -> CategoricalDescription: ...
@property
@abstractmethod
def describe_null(self) -> tuple[ColumnNullType, Any]: ...
@property
@abstractmethod
def null_count(self) -> int | None: ...
@property
@abstractmethod
def metadata(self) -> dict[str, Any]: ...
@abstractmethod
def num_chunks(self) -> int: ...
@abstractmethod
def get_chunks(self, n_chunks: int | None = ...) -> Iterable[Column]: ...
@abstractmethod
def get_buffers(self) -> ColumnBuffers: ...

class DataFrame(ABC, metaclass=abc.ABCMeta):
version: int
@abstractmethod
def __dataframe__(self, nan_as_null: bool = ..., allow_copy: bool = ...): ...
@property
@abstractmethod
def metadata(self) -> dict[str, Any]: ...
@abstractmethod
def num_columns(self) -> int: ...
@abstractmethod
def num_rows(self) -> int | None: ...
@abstractmethod
def num_chunks(self) -> int: ...
@abstractmethod
def column_names(self) -> Iterable[str]: ...
@abstractmethod
def get_column(self, i: int) -> Column: ...
@abstractmethod
def get_column_by_name(self, name: str) -> Column: ...
@abstractmethod
def get_columns(self) -> Iterable[Column]: ...
@abstractmethod
def select_columns(self, indices: Sequence[int]) -> DataFrame: ...
@abstractmethod
def select_columns_by_name(self, names: Sequence[str]) -> DataFrame: ...
@abstractmethod
def get_chunks(self, n_chunks: int | None = ...) -> Iterable[DataFrame]: ...
3 changes: 3 additions & 0 deletions pandas-stubs/core/interchange/from_dataframe.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import pandas as pd

def from_dataframe(df, allow_copy: bool = ...) -> pd.DataFrame: ...