Skip to content

Commit

Permalink
refactor(annotation): Updated variable and function names to reflect …
Browse files Browse the repository at this point in the history
…their origin and function

feat(annotation): Factory function accepts json schema getter
fix(annotation): Native json schema incompatible with native serializer
chore(version): Bump MAJOR
  • Loading branch information
caniko authored and Can H. Tartanoglu committed Apr 12, 2024
1 parent 438b18c commit a9ae6ba
Show file tree
Hide file tree
Showing 8 changed files with 283 additions and 132 deletions.
194 changes: 98 additions & 96 deletions poetry.lock

Large diffs are not rendered by default.

156 changes: 129 additions & 27 deletions pydantic_numpy/helper/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
from pydantic import FilePath, GetJsonSchemaHandler, PositiveInt, validate_call
from pydantic.json_schema import JsonSchemaValue
from pydantic_core import core_schema
from typing_extensions import Annotated
from typing_extensions import Annotated, Final

from pydantic_numpy.helper.typing import NumpyDataDict, SupportedDTypes
from pydantic_numpy.helper.typing import NumpyArrayTypeData, SupportedDTypes
from pydantic_numpy.helper.validation import (
create_array_validator,
validate_multi_array_numpy_file,
Expand All @@ -18,7 +18,7 @@
from pydantic_numpy.model import MultiArrayNumpyFile


def serialize_numpy_array_to_data_dict(array_like: npt.ArrayLike) -> NumpyDataDict:
def pd_np_native_numpy_array_to_data_dict_serializer(array_like: npt.ArrayLike) -> NumpyArrayTypeData:
"""
Serialize a NumPy array into a data dictionary format suitable for frontend display or processing.
Expand All @@ -42,40 +42,117 @@ def serialize_numpy_array_to_data_dict(array_like: npt.ArrayLike) -> NumpyDataDi
Returns
-------
NumpyDataDict
NumpyArrayTypeData
A dictionary with two keys: 'data_type', a string representing the data type of the array,
and 'data', a list of values converted from the array. The conversion is to integer if the
original data type is datetime or timedelta, and to float for other data types.
Example
-------
>>> my_array = np.array([1, 2, 3])
>>> serialize_numpy_array_to_data_dict(my_array)
>>> pd_np_native_numpy_array_to_data_dict_serializer(my_array)
{'data_type': 'int64', 'data': [1.0, 2.0, 3.0]}
"""
array = np.array(array_like)

if issubclass(array.dtype.type, np.timedelta64) or issubclass(array.dtype.type, np.datetime64):
return NumpyDataDict(data_type=str(array.dtype), data=array.astype(int).tolist())
data = array.astype(int).tolist()
else:
data = array.astype(float).tolist()

return NumpyDataDict(data_type=str(array.dtype), data=array.astype(float).tolist())
return NumpyArrayTypeData(data_type=str(array.dtype), data=data)


def pd_np_native_numpy_array_json_schema_from_type_data(
_field_core_schema: core_schema.CoreSchema,
_handler: GetJsonSchemaHandler,
dimensions: Optional[PositiveInt] = None,
data_type: Optional[SupportedDTypes] = None,
) -> JsonSchemaValue:
"""
Generates a JSON schema for a NumPy array field within a Pydantic model.
This function constructs a JSON schema definition compatible with Pydantic models
that are intended to validate NumPy array inputs. It supports specifying the data type
and dimensions of the NumPy array, which are used to construct a schema that ensures
input data matches the expected structure and type.
Parameters
----------
_field_core_schema : core_schema.CoreSchema
The core schema component of the Pydantic model, used for building basic schema structures.
_handler : GetJsonSchemaHandler
A handler function or object responsible for converting Python types to JSON schema components.
dimensions : Optional[PositiveInt], optional
The dimensions (shape) of the NumPy array. If specified, the schema will enforce that the
input array matches this dimensionality. If `None`, no dimensionality constraint is applied,
by default None.
data_type : Optional[SupportedDTypes], optional
The expected data type of the NumPy array elements. If specified, the schema will enforce
that the input array's data type is compatible with this. If `None`, any data type is allowed,
by default None.
Returns
-------
JsonSchemaValue
A dictionary representing the JSON schema for a NumPy array field within a Pydantic model.
This schema includes details about the expected array dimensions and data type.
"""
array_shape = _dimensions_to_shape_type[dimensions] if dimensions else "Any"

if data_type and _data_type_resolver(data_type):
array_data_type = data_type.__name__
item_schema = core_schema.list_schema(
items_schema=core_schema.any_schema(metadata=f"Must be compatible with numpy.dtype: {array_data_type}"),
)
else:
array_data_type = "Any"
item_schema = core_schema.list_schema(items_schema=core_schema.any_schema())

if dimensions:
data_schema = core_schema.list_schema(items_schema=item_schema, min_length=dimensions, max_length=dimensions)
else:
data_schema = item_schema

return dict(
title="Numpy Array",
type=f"np.ndarray[{array_shape}, np.dtype[{array_data_type}]]",
required=["data_type", "data"],
properties=dict(
data_type={"title": "dtype", "default": array_data_type, "type": "string"},
data=data_schema,
),
)


class NpArrayPydanticAnnotation:
dimensions: ClassVar[Optional[PositiveInt]]

data_type: ClassVar[SupportedDTypes]

strict_data_typing: ClassVar[bool]

serialize_numpy_array_to_json: ClassVar[Callable[[npt.ArrayLike], Iterable]]
json_schema_from_type_data: ClassVar[
Callable[
[core_schema.CoreSchema, GetJsonSchemaHandler, Optional[PositiveInt], Optional[SupportedDTypes]],
JsonSchemaValue,
]
]

@classmethod
def factory(
cls,
*,
data_type: Optional[SupportedDTypes] = None,
dimensions: Optional[int] = None,
dimensions: Optional[PositiveInt] = None,
strict_data_typing: bool = False,
serialize_numpy_array_to_json: Callable[[npt.ArrayLike], Iterable] = serialize_numpy_array_to_data_dict,
serialize_numpy_array_to_json: Callable[
[npt.ArrayLike], Iterable
] = pd_np_native_numpy_array_to_data_dict_serializer,
json_schema_from_type_data: Callable[
[core_schema.CoreSchema, GetJsonSchemaHandler, Optional[PositiveInt], Optional[SupportedDTypes]],
JsonSchemaValue,
] = pd_np_native_numpy_array_json_schema_from_type_data,
) -> type:
"""
Create an instance NpArrayPydanticAnnotation that is configured for a specific dimension and dtype.
Expand All @@ -86,12 +163,15 @@ def factory(
Parameters
----------
data_type: SupportedDTypes
dimensions: Optional[int]
Number of dimensions determine the depth of the numpy array.
dimensions: Optional[PositiveInt]
If defined, the number of dimensions determine the depth of the numpy array. Defaults to None,
e.g. any number of dimensions
strict_data_typing: bool
If True, the dtype of the numpy array must be identical to the data_type. No conversion attempts.
serialize_numpy_array_to_json: Callable[[npt.ArrayLike], Iterable]
Json serialization function to use. Defaults to NumpyDataDict serializer.
Json serialization function to use. Defaults to NumpyArrayTypeData serializer.
json_schema_from_type_data: Callable
Json schema generation function to use. Defaults to NumpyArrayTypeData schema generator.
Returns
-------
Expand All @@ -112,6 +192,7 @@ def factory(
"data_type": data_type,
"strict_data_typing": strict_data_typing,
"serialize_numpy_array_to_json": serialize_numpy_array_to_json,
"json_schema_from_type_data": json_schema_from_type_data,
},
)

Expand All @@ -128,28 +209,26 @@ def __get_pydantic_core_schema__(
python_schema=core_schema.chain_schema([_common_numpy_array_validator, np_array_schema]),
json_schema=np_array_schema,
serialization=core_schema.plain_serializer_function_ser_schema(
cls.serialize_numpy_array_to_json, when_used="json-unless-none"
cls.serialize_numpy_array_to_json,
is_field_serializer=False,
when_used="json-unless-none",
),
)

@classmethod
def __get_pydantic_json_schema__(
cls, _core_schema: core_schema.CoreSchema, _handler: GetJsonSchemaHandler
cls, field_core_schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler
) -> JsonSchemaValue:
return dict(
type=(
f"np.ndarray[{_int_to_dim_type[cls.dimensions] if cls.dimensions else 'Any'}, "
f"np.dtype[{cls.data_type.__name__ if _data_type_resolver(cls.data_type) else 'Any'}]"
),
strict_data_typing=cls.strict_data_typing,
)
return cls.json_schema_from_type_data(field_core_schema, handler, cls.dimensions, cls.data_type)


def np_array_pydantic_annotated_typing(
data_type: Optional[SupportedDTypes] = None,
dimensions: Optional[int] = None,
strict_data_typing: bool = False,
serialize_numpy_array_to_json: Callable[[npt.ArrayLike], Iterable] = serialize_numpy_array_to_data_dict,
serialize_numpy_array_to_json: Callable[
[npt.ArrayLike], Iterable
] = pd_np_native_numpy_array_to_data_dict_serializer,
):
"""
Generates typing and pydantic annotation of a np.ndarray parametrized with given constraints
Expand All @@ -162,18 +241,25 @@ def np_array_pydantic_annotated_typing(
strict_data_typing: bool
If True, the dtype of the numpy array must be identical to the data_type. No conversion attempts.
serialize_numpy_array_to_json: Callable[[npt.ArrayLike], Iterable]
Json serialization function to use. Defaults to NumpyDataDict serializer.
Json serialization function to use. Defaults to NumpyArrayTypeData serializer.
Returns
-------
type-hint for np.ndarray with Pydantic support
Note
----
The function generates the type hints dynamically, and will not work with static type checkers such as mypy
or pyright. For that you need to create your types manually.
"""
return Annotated[
Union[
FilePath,
MultiArrayNumpyFile,
np.ndarray[ # type: ignore[misc]
_int_to_dim_type[dimensions] if dimensions else Any, # pyright: ignore
_dimensions_to_shape_type[dimensions] # pyright: ignore[reportGeneralTypeIssues]
if dimensions
else Any,
np.dtype[data_type] if _data_type_resolver(data_type) else data_type, # type: ignore[valid-type]
],
],
Expand All @@ -191,11 +277,27 @@ def _data_type_resolver(data_type: Optional[SupportedDTypes]) -> bool:


@validate_call
def _deserialize_numpy_array_from_data_dict(data_dict: NumpyDataDict) -> np.ndarray:
def _deserialize_numpy_array_from_data_dict(data_dict: NumpyArrayTypeData) -> np.ndarray:
return np.array(data_dict["data"]).astype(data_dict["data_type"])


_int_to_dim_type = {1: tuple[int], 2: tuple[int, int], 3: tuple[int, int, int]}
# IN_THE_FUTURE: Only works with 3.11 and above
# @validate_call
# def _dimension_type_from_depth(depth: PositiveInt) -> type[tuple[Any, ...]]:
# return tuple[*[Any] * depth] # type: ignore


_dimensions_to_shape_type: Final[dict[PositiveInt, type[tuple[Any, ...]]]] = {
1: tuple[Any],
2: tuple[Any, Any],
3: tuple[Any, Any, Any],
4: tuple[Any, Any, Any, Any],
5: tuple[Any, Any, Any, Any, Any],
6: tuple[Any, Any, Any, Any, Any, Any],
7: tuple[Any, Any, Any, Any, Any, Any, Any],
}


_common_numpy_array_validator = core_schema.union_schema(
[
core_schema.chain_schema(
Expand Down
2 changes: 1 addition & 1 deletion pydantic_numpy/helper/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
SupportedDTypes = type[np.generic]


class NumpyDataDict(TypedDict):
class NumpyArrayTypeData(TypedDict):
data_type: str
data: list
4 changes: 2 additions & 2 deletions pydantic_numpy/helper/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from numpy.lib.npyio import NpzFile
from pydantic import FilePath

from pydantic_numpy.helper.typing import NumpyDataDict, SupportedDTypes
from pydantic_numpy.helper.typing import NumpyArrayTypeData, SupportedDTypes
from pydantic_numpy.model import MultiArrayNumpyFile


Expand Down Expand Up @@ -36,7 +36,7 @@ def create_array_validator(
Validator for numpy array
"""

def array_validator(array_data: Union[npt.NDArray, NumpyDataDict]) -> npt.NDArray:
def array_validator(array_data: Union[npt.NDArray, NumpyArrayTypeData]) -> npt.NDArray:
array: npt.NDArray = (
np.array(array_data["data"], dtype=array_data.get("dtype", None))
if isinstance(array_data, dict)
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pydantic_numpy"
version = "4.2.0"
version = "5.0.0"
description = "Pydantic Model integration of the NumPy array"
authors = ["Can H. Tartanoglu", "Christoph Heindl"]
maintainers = ["Can H. Tartanoglu <[email protected]>"]
Expand Down Expand Up @@ -50,7 +50,7 @@ filterwarnings = [

[tool.black]
line-length = 120
target-version = ["py311"]
target-version = ["py312"]

[tool.isort]
profile = "black"
Expand Down
5 changes: 5 additions & 0 deletions tests/model.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
from pydantic_numpy import NpNDArray
from pydantic_numpy.model import NumpyModel
from pydantic_numpy.typing import Np1DArray


class NpNDArrayModel(NumpyModel):
array: NpNDArray


class N1DArrayModel(NumpyModel):
array: Np1DArray


class NpNDArrayModelWithNonArray(NpNDArrayModel):
non_array: int

Expand Down
48 changes: 45 additions & 3 deletions tests/test_pydantic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,56 @@

import numpy as np

from tests.model import NpNDArrayModel, NpNDArrayModelWithNonArray
from tests.model import N1DArrayModel, NpNDArrayModel, NpNDArrayModelWithNonArray

test_model_instance = NpNDArrayModelWithNonArray(array=np.zeros(10), non_array=2)


class TestModelValidation(unittest.TestCase):
def test_model_json_schema(self):
self.assertTrue(NpNDArrayModel.model_json_schema())
def test_model_json_schema_np_nd_array_model(self):
schema = NpNDArrayModel.model_json_schema()
expected = {
"properties": {
"array": {
"properties": {
"data_type": {"default": "Any", "title": "dtype", "type": "string"},
"data": {"items_schema": {"type": "any"}, "type": "list"},
},
"required": ["data_type", "data"],
"title": "Numpy Array",
"type": "np.ndarray[Any, np.dtype[Any]]",
}
},
"required": ["array"],
"title": "NpNDArrayModel",
"type": "object",
}
self.assertEqual(schema, expected)

def test_model_json_schema_np_1d_array_model(self):
schema = N1DArrayModel.model_json_schema()
expected = {
"properties": {
"array": {
"properties": {
"data": {
"items_schema": {"items_schema": {"type": "any"}, "type": "list"},
"max_length": 1,
"min_length": 1,
"type": "list",
},
"data_type": {"default": "Any", "title": "dtype", "type": "string"},
},
"required": ["data_type", "data"],
"title": "Numpy Array",
"type": "np.ndarray[tuple[int], np.dtype[Any]]",
}
},
"required": ["array"],
"title": "N1DArrayModel",
"type": "object",
}
self.assertEqual(schema, expected)

def test_validate_json(self):
json_str = test_model_instance.model_dump_json()
Expand Down
2 changes: 1 addition & 1 deletion tests/test_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def test_json_serialize_deserialize(numpy_dtype: SupportedDTypes, pydantic_typin
round_trip_result = numpy_model(array_field=dumped_model_json_loaded["array_field"]).array_field

if issubclass(numpy_dtype, np.timedelta64) or issubclass(numpy_dtype, np.datetime64):
assert hyp_array == round_trip_result
assert np.all(hyp_array == round_trip_result)
else:
assert_almost_equal(hyp_array, round_trip_result)

Expand Down

0 comments on commit a9ae6ba

Please sign in to comment.