From 96557c64e154d677164ca1aeaaacb8b7f59477f7 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Mon, 26 Sep 2022 21:46:55 +0100 Subject: [PATCH 1/2] ENH: add from_dataframe and DataFrame --- pandas-stubs/api/interchange/__init__.pyi | 2 + pandas-stubs/core/interchange/__init__.pyi | 0 .../core/interchange/dataframe_protocol.pyi | 117 ++++++++++++++++++ .../core/interchange/from_dataframe.pyi | 3 + 4 files changed, 122 insertions(+) create mode 100644 pandas-stubs/api/interchange/__init__.pyi create mode 100644 pandas-stubs/core/interchange/__init__.pyi create mode 100644 pandas-stubs/core/interchange/dataframe_protocol.pyi create mode 100644 pandas-stubs/core/interchange/from_dataframe.pyi diff --git a/pandas-stubs/api/interchange/__init__.pyi b/pandas-stubs/api/interchange/__init__.pyi new file mode 100644 index 00000000..147ebac4 --- /dev/null +++ b/pandas-stubs/api/interchange/__init__.pyi @@ -0,0 +1,2 @@ +from pandas.core.interchange.dataframe_protocol import DataFrame as DataFrame +from pandas.core.interchange.from_dataframe import from_dataframe as from_dataframe diff --git a/pandas-stubs/core/interchange/__init__.pyi b/pandas-stubs/core/interchange/__init__.pyi new file mode 100644 index 00000000..e69de29b diff --git a/pandas-stubs/core/interchange/dataframe_protocol.pyi b/pandas-stubs/core/interchange/dataframe_protocol.pyi new file mode 100644 index 00000000..4fd1e013 --- /dev/null +++ b/pandas-stubs/core/interchange/dataframe_protocol.pyi @@ -0,0 +1,117 @@ +import abc +from abc import ( + ABC, + abstractmethod, +) +import enum +from typing import ( + Any, + Iterable, + Sequence, + TypedDict, +) + +class DlpackDeviceType(enum.IntEnum): + CPU: int + CUDA: int + CPU_PINNED: int + OPENCL: int + VULKAN: int + METAL: int + VPI: int + ROCM: int + +class DtypeKind(enum.IntEnum): + INT: int + UINT: int + FLOAT: int + BOOL: int + STRING: int + DATETIME: int + CATEGORICAL: int + +class ColumnNullType(enum.IntEnum): + NON_NULLABLE: int + USE_NAN: int + USE_SENTINEL: int + USE_BITMASK: int + USE_BYTEMASK: int + +class ColumnBuffers(TypedDict): + data: tuple[Buffer, Any] + validity: tuple[Buffer, Any] | None + offsets: tuple[Buffer, Any] | None + +class CategoricalDescription(TypedDict): + is_ordered: bool + is_dictionary: bool + categories: Column | None + +class Buffer(ABC, metaclass=abc.ABCMeta): + @property + @abstractmethod + def bufsize(self) -> int: ... + @property + @abstractmethod + def ptr(self) -> int: ... + @abstractmethod + def __dlpack__(self): ... + @abstractmethod + def __dlpack_device__(self) -> tuple[DlpackDeviceType, int | None]: ... + +class Column(ABC, metaclass=abc.ABCMeta): + @property + @abstractmethod + def size(self) -> int: ... + @property + @abstractmethod + def offset(self) -> int: ... + @property + @abstractmethod + def dtype(self) -> tuple[DtypeKind, int, str, str]: ... + @property + @abstractmethod + def describe_categorical(self) -> CategoricalDescription: ... + @property + @abstractmethod + def describe_null(self) -> tuple[ColumnNullType, Any]: ... + @property + @abstractmethod + def null_count(self) -> int | None: ... + @property + @abstractmethod + def metadata(self) -> dict[str, Any]: ... + @abstractmethod + def num_chunks(self) -> int: ... + @abstractmethod + def get_chunks(self, n_chunks: int | None = ...) -> Iterable[Column]: ... + @abstractmethod + def get_buffers(self) -> ColumnBuffers: ... + +class DataFrame(ABC, metaclass=abc.ABCMeta): + version: int + @abstractmethod + def __dataframe__(self, nan_as_null: bool = ..., allow_copy: bool = ...): ... + @property + @abstractmethod + def metadata(self) -> dict[str, Any]: ... + @abstractmethod + def num_columns(self) -> int: ... + @abstractmethod + def num_rows(self) -> int | None: ... + @abstractmethod + def num_chunks(self) -> int: ... + @abstractmethod + def column_names(self) -> Iterable[str]: ... + @abstractmethod + def get_column(self, i: int) -> Column: ... + @abstractmethod + def get_column_by_name(self, name: str) -> Column: ... + @abstractmethod + def get_columns(self) -> Iterable[Column]: ... + @abstractmethod + def select_columns(self, indices: Sequence[int]) -> DataFrame: ... + @abstractmethod + def select_columns_by_name(self, names: Sequence[str]) -> DataFrame: ... + @abstractmethod + def get_chunks(self, n_chunks: int | None = ...) -> Iterable[DataFrame]: ... diff --git a/pandas-stubs/core/interchange/from_dataframe.pyi b/pandas-stubs/core/interchange/from_dataframe.pyi new file mode 100644 index 00000000..e34d805e --- /dev/null +++ b/pandas-stubs/core/interchange/from_dataframe.pyi @@ -0,0 +1,3 @@ +import pandas as pd + +def from_dataframe(df, allow_copy: bool = ...) -> pd.DataFrame: ... From 3d8b759e6242fc7e03401c40aedc0b33074e923f Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Tue, 27 Sep 2022 08:36:37 +0100 Subject: [PATCH 2/2] ENH: Add __dataframe__ --- pandas-stubs/core/frame.pyi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 15edfa04..cd102b95 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -33,6 +33,7 @@ from pandas.core.indexing import ( _IndexSliceTuple, _LocIndexer, ) +from pandas.core.interchange.dataframe_protocol import DataFrame as DataFrameXchg from pandas.core.resample import Resampler from pandas.core.series import Series from pandas.core.window import ( @@ -199,6 +200,9 @@ class DataFrame(NDFrame, OpsMixin): dtype=..., copy: _bool = ..., ) -> DataFrame: ... + def __dataframe__( + self, nan_as_null: bool = ..., allow_copy: bool = ... + ) -> DataFrameXchg: ... @property def axes(self) -> list[Index]: ... @property