From 8eab8a2e44b04bca89983bd138aa5ecdf0eaaf59 Mon Sep 17 00:00:00 2001
From: Vasily Litvinov <vasilij.n.litvinov@intel.com>
Date: Thu, 10 Feb 2022 20:43:32 +0300
Subject: [PATCH 1/6] Declare enums explicitly, fix hints

Signed-off-by: Vasily Litvinov <vasilij.n.litvinov@intel.com>
---
 protocol/dataframe_protocol.py | 74 +++++++++++++++++-----------------
 1 file changed, 38 insertions(+), 36 deletions(-)

diff --git a/protocol/dataframe_protocol.py b/protocol/dataframe_protocol.py
index 14854133..9bd356c4 100644
--- a/protocol/dataframe_protocol.py
+++ b/protocol/dataframe_protocol.py
@@ -1,3 +1,32 @@
+from typing import Tuple, Optional, Dict, Any, Iterable, Sequence
+import enum
+
+class DlpackDeviceType(enum.IntEnum):
+    CPU = 1
+    CUDA = 2
+    CPU_PINNED = 3
+    OPENCL = 4
+    VULKAN = 7
+    METAL = 8
+    VPI = 9
+    ROCM = 10
+
+class DtypeKind(enum.IntEnum):
+    INT = 0
+    UINT = 1
+    FLOAT = 2
+    BOOL = 20
+    STRING = 21   # UTF-8
+    DATETIME = 22
+    CATEGORICAL = 23
+
+class ColumnNullType:
+    NON_NULLABLE = 0
+    USE_NAN = 1
+    USE_SENTINEL = 2
+    USE_BITMASK = 3
+    USE_BYTEMASK = 4
+
 class Buffer:
     """
     Data in the buffer is guaranteed to be contiguous in memory.
@@ -41,20 +70,11 @@ def __dlpack__(self):
         """
         raise NotImplementedError("__dlpack__")
 
-    def __dlpack_device__(self) -> Tuple[enum.IntEnum, int]:
+    def __dlpack_device__(self) -> Tuple[DlpackDeviceType, int]:
         """
         Device type and device ID for where the data in the buffer resides.
 
-        Uses device type codes matching DLPack. Enum members are::
-
-            - CPU = 1
-            - CUDA = 2
-            - CPU_PINNED = 3
-            - OPENCL = 4
-            - VULKAN = 7
-            - METAL = 8
-            - VPI = 9
-            - ROCM = 10
+        Uses device type codes matching DLPack.
 
         Note: must be implemented even if ``__dlpack__`` is not.
         """
@@ -128,20 +148,10 @@ def offset(self) -> int:
         pass
 
     @property
-    def dtype(self) -> Tuple[enum.IntEnum, int, str, str]:
+    def dtype(self) -> Tuple[DtypeKind, int, str, str]:
         """
         Dtype description as a tuple ``(kind, bit-width, format string, endianness)``.
 
-        Kind :
-
-            - INT = 0
-            - UINT = 1
-            - FLOAT = 2
-            - BOOL = 20
-            - STRING = 21   # UTF-8
-            - DATETIME = 22
-            - CATEGORICAL = 23
-
         Bit-width : the number of bits as an integer
         Format string : data type description format string in Apache Arrow C
                         Data Interface format.
@@ -194,19 +204,11 @@ def describe_categorical(self) -> dict[bool, bool, Optional[Column]]:
         pass
 
     @property
-    def describe_null(self) -> Tuple[int, Any]:
+    def describe_null(self) -> Tuple[ColumnNullType, Any]:
         """
         Return the missing value (or "null") representation the column dtype
         uses, as a tuple ``(kind, value)``.
 
-        Kind:
-
-            - 0 : non-nullable
-            - 1 : NaN/NaT
-            - 2 : sentinel value
-            - 3 : bit mask
-            - 4 : byte mask
-
         Value : if kind is "sentinel value", the actual value. If kind is a bit
         mask or a byte mask, the value (0 or 1) indicating a missing value. None
         otherwise.
@@ -235,7 +237,7 @@ def num_chunks(self) -> int:
         """
         pass
 
-    def get_chunks(self, n_chunks : Optional[int] = None) -> Iterable[Column]:
+    def get_chunks(self, n_chunks : Optional[int] = None) -> Iterable["Column"]:
         """
         Return an iterator yielding the chunks.
 
@@ -243,7 +245,7 @@ def get_chunks(self, n_chunks : Optional[int] = None) -> Iterable[Column]:
         """
         pass
 
-    def get_buffers(self) -> dict[Tuple[Buffer, Any], Optional[Tuple[Buffer, Any]], Optional[Tuple[Buffer, Any]]]:
+    def get_buffers(self) -> Dict[Tuple[Buffer, Any], Optional[Tuple[Buffer, Any]], Optional[Tuple[Buffer, Any]]]:
         """
         Return a dictionary containing the underlying buffers.
 
@@ -368,19 +370,19 @@ def get_columns(self) -> Iterable[Column]:
         """
         pass
 
-    def select_columns(self, indices: Sequence[int]) -> DataFrame:
+    def select_columns(self, indices: Sequence[int]) -> "DataFrame":
         """
         Create a new DataFrame by selecting a subset of columns by index.
         """
         pass
 
-    def select_columns_by_name(self, names: Sequence[str]) -> DataFrame:
+    def select_columns_by_name(self, names: Sequence[str]) -> "DataFrame":
         """
         Create a new DataFrame by selecting a subset of columns by name.
         """
         pass
 
-    def get_chunks(self, n_chunks : Optional[int] = None) -> Iterable[DataFrame]:
+    def get_chunks(self, n_chunks : Optional[int] = None) -> Iterable["DataFrame"]:
         """
         Return an iterator yielding the chunks.
 

From 2b35e5d71c6d3cdaa09bc11bf3d4561998f0083a Mon Sep 17 00:00:00 2001
From: Vasily Litvinov <vasilij.n.litvinov@intel.com>
Date: Thu, 24 Feb 2022 20:54:18 +0300
Subject: [PATCH 2/6] Align spec with existing implementations

Signed-off-by: Vasily Litvinov <vasilij.n.litvinov@intel.com>
---
 protocol/dataframe_protocol.py | 130 ++++++++++++++++++++++++---------
 1 file changed, 95 insertions(+), 35 deletions(-)

diff --git a/protocol/dataframe_protocol.py b/protocol/dataframe_protocol.py
index 9bd356c4..27313e74 100644
--- a/protocol/dataframe_protocol.py
+++ b/protocol/dataframe_protocol.py
@@ -1,7 +1,11 @@
-from typing import Tuple, Optional, Dict, Any, Iterable, Sequence
+from typing import Tuple, Optional, Dict, Any, Iterable, Sequence, TypedDict
 import enum
+from abc import ABC, abstractmethod
+
 
 class DlpackDeviceType(enum.IntEnum):
+    """Integer enum for device type codes matching DLPack."""
+
     CPU = 1
     CUDA = 2
     CPU_PINNED = 3
@@ -11,7 +15,29 @@ class DlpackDeviceType(enum.IntEnum):
     VPI = 9
     ROCM = 10
 
+
 class DtypeKind(enum.IntEnum):
+    """
+    Integer enum for data types.
+
+    Attributes
+    ----------
+    INT : int
+        Matches to signed integer data type.
+    UINT : int
+        Matches to unsigned integer data type.
+    FLOAT : int
+        Matches to floating point data type.
+    BOOL : int
+        Matches to boolean data type.
+    STRING : int
+        Matches to string data type (UTF-8 encoded).
+    DATETIME : int
+        Matches to datetime data type.
+    CATEGORICAL : int
+        Matches to categorical data type.
+    """
+
     INT = 0
     UINT = 1
     FLOAT = 2
@@ -20,14 +46,48 @@ class DtypeKind(enum.IntEnum):
     DATETIME = 22
     CATEGORICAL = 23
 
-class ColumnNullType:
+
+class ColumnNullType(enum.IntEnum):
+    """
+    Integer enum for null type representation.
+
+    Attributes
+    ----------
+    NON_NULLABLE : int
+        Non-nullable column.
+    USE_NAN : int
+        Use explicit float NaN/NaT value.
+    USE_SENTINEL : int
+        Sentinel value besides NaN/NaT.
+    USE_BITMASK : int
+        The bit is set/unset representing a null on a certain position.
+    USE_BYTEMASK : int
+        The byte is set/unset representing a null on a certain position.
+    """
+
     NON_NULLABLE = 0
     USE_NAN = 1
     USE_SENTINEL = 2
     USE_BITMASK = 3
     USE_BYTEMASK = 4
 
-class Buffer:
+
+class ColumnBuffers(TypedDict):
+    data: Tuple["Buffer", Any] # first element is a buffer containing the column data;
+                               # second element is the data buffer's associated dtype
+    validity: Optional[Tuple["Buffer", Any]] # first element is a buffer containing mask values
+                                             # indicating missing data and second element is
+                                             # the mask value buffer's associated dtype.
+                                             # None if the null representation is not a bit or byte mask
+    offsets: Optional[Tuple["Buffer", Any]] # first element is a buffer containing the
+                                            # offset values for variable-size binary data
+                                            # (e.g., variable-length strings) and
+                                            # second element is the offsets buffer's associated dtype.
+                                            # None if the data buffer does not have
+                                            # an associated offsets buffer
+
+
+class Buffer(ABC):
     """
     Data in the buffer is guaranteed to be contiguous in memory.
 
@@ -43,6 +103,7 @@ class Buffer:
     """
 
     @property
+    @abstractmethod
     def bufsize(self) -> int:
         """
         Buffer size in bytes.
@@ -50,12 +111,14 @@ def bufsize(self) -> int:
         pass
 
     @property
+    @abstractmethod
     def ptr(self) -> int:
         """
         Pointer to start of the buffer as an integer.
         """
         pass
 
+    @abstractmethod
     def __dlpack__(self):
         """
         Produce DLPack capsule (see array API standard).
@@ -70,18 +133,17 @@ def __dlpack__(self):
         """
         raise NotImplementedError("__dlpack__")
 
-    def __dlpack_device__(self) -> Tuple[DlpackDeviceType, int]:
+    @abstractmethod
+    def __dlpack_device__(self) -> Tuple[DlpackDeviceType, Optional[int]]:
         """
         Device type and device ID for where the data in the buffer resides.
-
         Uses device type codes matching DLPack.
-
         Note: must be implemented even if ``__dlpack__`` is not.
         """
         pass
 
 
-class Column:
+class Column(ABC):
     """
     A column object, with only the methods and properties required by the
     interchange protocol defined.
@@ -123,10 +185,10 @@ class Column:
 
     Note: this Column object can only be produced by ``__dataframe__``, so
           doesn't need its own version or ``__column__`` protocol.
-
     """
 
     @property
+    @abstractmethod
     def size(self) -> Optional[int]:
         """
         Size of the column, in elements.
@@ -137,6 +199,7 @@ def size(self) -> Optional[int]:
         pass
 
     @property
+    @abstractmethod
     def offset(self) -> int:
         """
         Offset of first element.
@@ -148,6 +211,7 @@ def offset(self) -> int:
         pass
 
     @property
+    @abstractmethod
     def dtype(self) -> Tuple[DtypeKind, int, str, str]:
         """
         Dtype description as a tuple ``(kind, bit-width, format string, endianness)``.
@@ -158,7 +222,6 @@ def dtype(self) -> Tuple[DtypeKind, int, str, str]:
         Endianness : current only native endianness (``=``) is supported
 
         Notes:
-
             - Kind specifiers are aligned with DLPack where possible (hence the
               jump to 20, leave enough room for future extension)
             - Masks must be specified as boolean with either bit width 1 (for bit
@@ -180,17 +243,16 @@ def dtype(self) -> Tuple[DtypeKind, int, str, str]:
         pass
 
     @property
+    @abstractmethod
     def describe_categorical(self) -> dict[bool, bool, Optional[Column]]:
         """
         If the dtype is categorical, there are two options:
-
         - There are only values in the data buffer.
         - There is a separate non-categorical Column encoding categorical values.
 
-        Raises RuntimeError if the dtype is not categorical
-
-        Content of returned dict:
+        Raises TypeError if the dtype is not categorical
 
+        Returns the description on how to interpret the data buffer:
             - "is_ordered" : bool, whether the ordering of dictionary indices is
                              semantically meaningful.
             - "is_dictionary" : bool, whether a mapping of
@@ -204,6 +266,7 @@ def describe_categorical(self) -> dict[bool, bool, Optional[Column]]:
         pass
 
     @property
+    @abstractmethod
     def describe_null(self) -> Tuple[ColumnNullType, Any]:
         """
         Return the missing value (or "null") representation the column dtype
@@ -216,6 +279,7 @@ def describe_null(self) -> Tuple[ColumnNullType, Any]:
         pass
 
     @property
+    @abstractmethod
     def null_count(self) -> Optional[int]:
         """
         Number of null elements, if known.
@@ -225,18 +289,21 @@ def null_count(self) -> Optional[int]:
         pass
 
     @property
+    @abstractmethod
     def metadata(self) -> Dict[str, Any]:
         """
         The metadata for the column. See `DataFrame.metadata` for more details.
         """
         pass
 
+    @abstractmethod
     def num_chunks(self) -> int:
         """
         Return the number of chunks the column consists of.
         """
         pass
 
+    @abstractmethod
     def get_chunks(self, n_chunks : Optional[int] = None) -> Iterable["Column"]:
         """
         Return an iterator yielding the chunks.
@@ -245,7 +312,8 @@ def get_chunks(self, n_chunks : Optional[int] = None) -> Iterable["Column"]:
         """
         pass
 
-    def get_buffers(self) -> Dict[Tuple[Buffer, Any], Optional[Tuple[Buffer, Any]], Optional[Tuple[Buffer, Any]]]:
+    @abstractmethod
+    def get_buffers(self) -> ColumnBuffers:
         """
         Return a dictionary containing the underlying buffers.
 
@@ -276,7 +344,7 @@ def get_buffers(self) -> Dict[Tuple[Buffer, Any], Optional[Tuple[Buffer, Any]],
 #        pass
 
 
-class DataFrame:
+class DataFrame(ABC):
     """
     A data frame class, with only the methods required by the interchange
     protocol defined.
@@ -290,29 +358,11 @@ class DataFrame:
     ``__dataframe__`` method of a public data frame class in a library adhering
     to the dataframe interchange protocol specification.
     """
-    def __dataframe__(self, nan_as_null : bool = False,
-                      allow_copy : bool = True) -> dict:
-        """
-        Produces a dictionary object following the dataframe protocol specification.
 
-        ``nan_as_null`` is a keyword intended for the consumer to tell the
-        producer to overwrite null values in the data with ``NaN`` (or ``NaT``).
-        It is intended for cases where the consumer does not support the bit
-        mask or byte mask that is the producer's native representation.
-
-        ``allow_copy`` is a keyword that defines whether or not the library is
-        allowed to make a copy of the data. For example, copying data would be
-        necessary if a library supports strided buffers, given that this protocol
-        specifies contiguous buffers.
-        """
-        self._nan_as_null = nan_as_null
-        self._allow_zero_zopy = allow_copy
-        return {
-            "dataframe": self,  # DataFrame object adhering to the protocol
-            "version": 0        # Version number of the protocol
-        }
+    version = 0 # version of the protocol
 
     @property
+    @abstractmethod
     def metadata(self) -> Dict[str, Any]:
         """
         The metadata for the data frame, as a dictionary with string keys. The
@@ -325,12 +375,14 @@ def metadata(self) -> Dict[str, Any]:
         """
         pass
 
+    @abstractmethod
     def num_columns(self) -> int:
         """
         Return the number of columns in the DataFrame.
         """
         pass
 
+    @abstractmethod
     def num_rows(self) -> Optional[int]:
         # TODO: not happy with Optional, but need to flag it may be expensive
         #       why include it if it may be None - what do we expect consumers
@@ -340,48 +392,56 @@ def num_rows(self) -> Optional[int]:
         """
         pass
 
+    @abstractmethod
     def num_chunks(self) -> int:
         """
         Return the number of chunks the DataFrame consists of.
         """
         pass
 
+    @abstractmethod
     def column_names(self) -> Iterable[str]:
         """
         Return an iterator yielding the column names.
         """
         pass
 
+    @abstractmethod
     def get_column(self, i: int) -> Column:
         """
         Return the column at the indicated position.
         """
         pass
 
+    @abstractmethod
     def get_column_by_name(self, name: str) -> Column:
         """
         Return the column whose name is the indicated name.
         """
         pass
 
+    @abstractmethod
     def get_columns(self) -> Iterable[Column]:
         """
         Return an iterator yielding the columns.
         """
         pass
 
+    @abstractmethod
     def select_columns(self, indices: Sequence[int]) -> "DataFrame":
         """
         Create a new DataFrame by selecting a subset of columns by index.
         """
         pass
 
+    @abstractmethod
     def select_columns_by_name(self, names: Sequence[str]) -> "DataFrame":
         """
         Create a new DataFrame by selecting a subset of columns by name.
         """
         pass
 
+    @abstractmethod
     def get_chunks(self, n_chunks : Optional[int] = None) -> Iterable["DataFrame"]:
         """
         Return an iterator yielding the chunks.

From 6b49f22d91e959f42a319f276492125ac4e42be8 Mon Sep 17 00:00:00 2001
From: Vasily Litvinov <vasilij.n.litvinov@intel.com>
Date: Thu, 24 Feb 2022 21:25:23 +0300
Subject: [PATCH 3/6] Format the spec with black

Signed-off-by: Vasily Litvinov <vasilij.n.litvinov@intel.com>
---
 protocol/dataframe_protocol.py | 35 ++++++++++++++++++----------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/protocol/dataframe_protocol.py b/protocol/dataframe_protocol.py
index 27313e74..a403ef37 100644
--- a/protocol/dataframe_protocol.py
+++ b/protocol/dataframe_protocol.py
@@ -42,7 +42,7 @@ class DtypeKind(enum.IntEnum):
     UINT = 1
     FLOAT = 2
     BOOL = 20
-    STRING = 21   # UTF-8
+    STRING = 21  # UTF-8
     DATETIME = 22
     CATEGORICAL = 23
 
@@ -73,18 +73,20 @@ class ColumnNullType(enum.IntEnum):
 
 
 class ColumnBuffers(TypedDict):
-    data: Tuple["Buffer", Any] # first element is a buffer containing the column data;
-                               # second element is the data buffer's associated dtype
-    validity: Optional[Tuple["Buffer", Any]] # first element is a buffer containing mask values
-                                             # indicating missing data and second element is
-                                             # the mask value buffer's associated dtype.
-                                             # None if the null representation is not a bit or byte mask
-    offsets: Optional[Tuple["Buffer", Any]] # first element is a buffer containing the
-                                            # offset values for variable-size binary data
-                                            # (e.g., variable-length strings) and
-                                            # second element is the offsets buffer's associated dtype.
-                                            # None if the data buffer does not have
-                                            # an associated offsets buffer
+    # first element is a buffer containing the column data;
+    # second element is the data buffer's associated dtype
+    data: Tuple["Buffer", Any]
+
+    # first element is a buffer containing mask values indicating missing data;
+    # second element is the mask value buffer's associated dtype.
+    # None if the null representation is not a bit or byte mask
+    validity: Optional[Tuple["Buffer", Any]]
+
+    # first element is a buffer containing the offset values for
+    # variable-size binary data (e.g., variable-length strings);
+    # second element is the offsets buffer's associated dtype.
+    # None if the data buffer does not have an associated offsets buffer
+    offsets: Optional[Tuple["Buffer", Any]]
 
 
 class Buffer(ABC):
@@ -304,7 +306,7 @@ def num_chunks(self) -> int:
         pass
 
     @abstractmethod
-    def get_chunks(self, n_chunks : Optional[int] = None) -> Iterable["Column"]:
+    def get_chunks(self, n_chunks: Optional[int] = None) -> Iterable["Column"]:
         """
         Return an iterator yielding the chunks.
 
@@ -336,6 +338,7 @@ def get_buffers(self) -> ColumnBuffers:
         """
         pass
 
+
 #    def get_children(self) -> Iterable[Column]:
 #        """
 #        Children columns underneath the column, each object in this iterator
@@ -359,7 +362,7 @@ class DataFrame(ABC):
     to the dataframe interchange protocol specification.
     """
 
-    version = 0 # version of the protocol
+    version = 0  # version of the protocol
 
     @property
     @abstractmethod
@@ -442,7 +445,7 @@ def select_columns_by_name(self, names: Sequence[str]) -> "DataFrame":
         pass
 
     @abstractmethod
-    def get_chunks(self, n_chunks : Optional[int] = None) -> Iterable["DataFrame"]:
+    def get_chunks(self, n_chunks: Optional[int] = None) -> Iterable["DataFrame"]:
         """
         Return an iterator yielding the chunks.
 

From d772b47781a54960360e4e57a5e348c36605debe Mon Sep 17 00:00:00 2001
From: Vasily Litvinov <vasilij.n.litvinov@intel.com>
Date: Fri, 1 Apr 2022 16:09:29 +0300
Subject: [PATCH 4/6] Change API a bit, align formatting with pandas

Signed-off-by: Vasily Litvinov <vasilij.n.litvinov@intel.com>
---
 protocol/dataframe_protocol.py | 49 ++++++++++++++++++++++++++++++----
 1 file changed, 44 insertions(+), 5 deletions(-)

diff --git a/protocol/dataframe_protocol.py b/protocol/dataframe_protocol.py
index a403ef37..6eaae7b4 100644
--- a/protocol/dataframe_protocol.py
+++ b/protocol/dataframe_protocol.py
@@ -1,6 +1,17 @@
-from typing import Tuple, Optional, Dict, Any, Iterable, Sequence, TypedDict
+from abc import (
+    ABC,
+    abstractmethod,
+)
 import enum
-from abc import ABC, abstractmethod
+from typing import (
+    Any,
+    Dict,
+    Iterable,
+    Optional,
+    Sequence,
+    Tuple,
+    TypedDict,
+)
 
 
 class DlpackDeviceType(enum.IntEnum):
@@ -89,6 +100,16 @@ class ColumnBuffers(TypedDict):
     offsets: Optional[Tuple["Buffer", Any]]
 
 
+class CategoricalDescription(TypedDict):
+    # whether the ordering of dictionary indices is semantically meaningful
+    is_ordered: bool
+    # whether a dictionary-style mapping of categorical values to other objects exists
+    is_dictionary: bool
+    # Python-level only (e.g. ``{int: str}``).
+    # None if not a dictionary-style categorical.
+    categories: Optional[Column]
+
+
 class Buffer(ABC):
     """
     Data in the buffer is guaranteed to be contiguous in memory.
@@ -191,7 +212,7 @@ class Column(ABC):
 
     @property
     @abstractmethod
-    def size(self) -> Optional[int]:
+    def size(self) -> int:
         """
         Size of the column, in elements.
 
@@ -246,7 +267,7 @@ def dtype(self) -> Tuple[DtypeKind, int, str, str]:
 
     @property
     @abstractmethod
-    def describe_categorical(self) -> dict[bool, bool, Optional[Column]]:
+    def describe_categorical(self) -> CategoricalDescription:
         """
         If the dtype is categorical, there are two options:
         - There are only values in the data buffer.
@@ -254,7 +275,7 @@ def describe_categorical(self) -> dict[bool, bool, Optional[Column]]:
 
         Raises TypeError if the dtype is not categorical
 
-        Returns the description on how to interpret the data buffer:
+        Returns the dictionary with description on how to interpret the data buffer:
             - "is_ordered" : bool, whether the ordering of dictionary indices is
                              semantically meaningful.
             - "is_dictionary" : bool, whether a mapping of
@@ -364,6 +385,24 @@ class DataFrame(ABC):
 
     version = 0  # version of the protocol
 
+    @abstractmethod
+    def __dataframe__(
+        self, nan_as_null: bool = False, allow_copy: bool = True
+    ) -> "DataFrame":
+        """
+        Construct a new exchange object, potentially changing the parameters.
+
+        ``nan_as_null`` is a keyword intended for the consumer to tell the
+        producer to overwrite null values in the data with ``NaN`` (or ``NaT``).
+        It is intended for cases where the consumer does not support the bit
+        mask or byte mask that is the producer's native representation.
+        ``allow_copy`` is a keyword that defines whether or not the library is
+        allowed to make a copy of the data. For example, copying data would be
+        necessary if a library supports strided buffers, given that this protocol
+        specifies contiguous buffers.
+        """
+        pass
+
     @property
     @abstractmethod
     def metadata(self) -> Dict[str, Any]:

From 0e9e17332c6d61c12bce40fd89efb2d6bf790d07 Mon Sep 17 00:00:00 2001
From: Ralf Gommers <ralf.gommers@gmail.com>
Date: Thu, 28 Jul 2022 14:21:11 +0200
Subject: [PATCH 5/6] Remove NaT (not-a-datetime) from the `USE_NAN`
 description.

This address the review comment that NaT is not a thing outside of
NumPy. Hence for not-a-datetime, all implementers should be using
sentinel values, because those are explicit.
---
 protocol/dataframe_protocol.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/protocol/dataframe_protocol.py b/protocol/dataframe_protocol.py
index 6eaae7b4..b2705e31 100644
--- a/protocol/dataframe_protocol.py
+++ b/protocol/dataframe_protocol.py
@@ -67,9 +67,9 @@ class ColumnNullType(enum.IntEnum):
     NON_NULLABLE : int
         Non-nullable column.
     USE_NAN : int
-        Use explicit float NaN/NaT value.
+        Use explicit float NaN value.
     USE_SENTINEL : int
-        Sentinel value besides NaN/NaT.
+        Sentinel value besides NaN.
     USE_BITMASK : int
         The bit is set/unset representing a null on a certain position.
     USE_BYTEMASK : int
@@ -393,7 +393,7 @@ def __dataframe__(
         Construct a new exchange object, potentially changing the parameters.
 
         ``nan_as_null`` is a keyword intended for the consumer to tell the
-        producer to overwrite null values in the data with ``NaN`` (or ``NaT``).
+        producer to overwrite null values in the data with ``NaN``.
         It is intended for cases where the consumer does not support the bit
         mask or byte mask that is the producer's native representation.
         ``allow_copy`` is a keyword that defines whether or not the library is

From f1f1eac5c9660097ce2838bf9cfe8b8916f35af2 Mon Sep 17 00:00:00 2001
From: Ralf Gommers <ralf.gommers@gmail.com>
Date: Fri, 29 Jul 2022 15:24:03 +0200
Subject: [PATCH 6/6] Change `Column.size` from a property to a method

---
 protocol/dataframe_protocol.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/protocol/dataframe_protocol.py b/protocol/dataframe_protocol.py
index b2705e31..adde1a48 100644
--- a/protocol/dataframe_protocol.py
+++ b/protocol/dataframe_protocol.py
@@ -210,7 +210,6 @@ class Column(ABC):
           doesn't need its own version or ``__column__`` protocol.
     """
 
-    @property
     @abstractmethod
     def size(self) -> int:
         """
@@ -218,6 +217,9 @@ def size(self) -> int:
 
         Corresponds to DataFrame.num_rows() if column is a single chunk;
         equal to size of this current chunk otherwise.
+
+        Is a method rather than a property because it may cause a (potentially
+        expensive) computation for some dataframe implementations.
         """
         pass