From 5fe9dd0c2a5ada0162a005c880bac5296686a5aa Mon Sep 17 00:00:00 2001 From: Ian Eaves Date: Tue, 6 Feb 2024 14:32:20 -0600 Subject: [PATCH] 0.7.6 (#202) * bump * old * 3.10 * lint * lint * removed 3.8 from tests * remove mypy from precommit * flake8 complaint * black * test updates * "lint * numpy differences * lint * integer->float * integer->float * pandas numpy roundtripping variation * lint --- .github/workflows/pypi.yml | 30 ++--- .github/workflows/tests.yml | 6 +- .gitignore | 1 + .pre-commit-config.yaml | 25 ++-- mypy.ini | 5 + requirements.txt | 4 +- setup.py | 4 +- src/visions/backends/numpy/sequences.py | 7 +- src/visions/backends/numpy/test_utils.py | 1 + src/visions/backends/numpy/types/date_time.py | 3 +- src/visions/backends/numpy/types/integer.py | 7 +- src/visions/backends/pandas/sequences.py | 2 +- src/visions/backends/pandas/series_utils.py | 5 +- src/visions/backends/pandas/test_utils.py | 5 +- src/visions/backends/pandas/types/complex.py | 30 +++-- .../backends/pandas/types/date_time.py | 20 ++- src/visions/backends/shared/nan_handling.py | 6 +- src/visions/dtypes/boolean.py | 10 +- src/visions/relations/__init__.py | 1 + src/visions/test/series.py | 5 +- src/visions/test/series_sparse.py | 10 +- src/visions/types/type.py | 36 +++--- src/visions/typesets/typeset.py | 2 +- src/visions/version.py | 2 +- tests/numpy_/typesets/test_standard_set.py | 115 ++++++++++-------- tests/pandas_/typesets/test_complete_set.py | 6 +- .../typesets/test_standard_set_sparse.py | 54 ++++---- .../typesets/test_python_complete_set.py | 6 +- tests/test_typeset_external_type.py | 1 + 29 files changed, 231 insertions(+), 178 deletions(-) create mode 100644 mypy.ini diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index 2326ea4e2..f59371b8f 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -2,23 +2,23 @@ name: Deploy to PyPi on: release: - types: [created] - + types: [ created ] + jobs: release: if: github.event_name == 'release' && github.event.action == 'created' name: PyPi Release runs-on: ubuntu-latest - + steps: - uses: actions/checkout@v2 name: Checkout repo - - - name: Set up Python 3.8 + + - name: Set up Python 3.12 uses: actions/setup-python@v1 with: - python-version: 3.8 - + python-version: 3.12 + - uses: actions/cache@v1 name: Cache pip dependencies with: @@ -26,23 +26,23 @@ jobs: key: ${{ runner.os }}-pip-${{ hashFiles('requirements*.txt') }} restore-keys: | ${{ runner.os }}-pip- - + - name: Install pip dependencies run: | - pip install --upgrade pip - pip install -r requirements.txt - pip install -r requirements_dev.txt - pip install -r requirements_test.txt - + pip install --upgrade pip + pip install -r requirements.txt + pip install -r requirements_dev.txt + pip install -r requirements_test.txt + - name: Install run: make install - + - name: Make distribution run: | check-manifest python setup.py sdist bdist_wheel twine check dist/* - + - name: Publish a Python distribution to PyPI uses: pypa/gh-action-pypi-publish@master with: diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7d960e415..1906d6b74 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -10,10 +10,10 @@ jobs: test: strategy: matrix: - os: [ubuntu-latest] - python: [3.7, 3.8, 3.9] + os: [ ubuntu-latest ] + python: [ 3.8, 3.9, "3.10", 3.11, 3.12 ] runs-on: ${{ matrix.os }} - + steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python }} diff --git a/.gitignore b/.gitignore index 66f5e4a10..a626f37b1 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ docsrc/source/visions/api/_autosummary/ docsrc/build/ plots/*/ playground/ +.vscode/ # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ef436e62f..e4a921808 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,22 +1,21 @@ repos: - repo: https://github.com/asottile/pyupgrade - rev: v3.2.0 + rev: v3.15.0 hooks: - id: pyupgrade args: ['--py36-plus','--exit-zero-even-if-changed'] - repo: https://github.com/pycqa/isort - rev: 5.10.1 + rev: 5.13.2 hooks: - id: isort - files: '.*' - args: ['--profile=black'] + args: ["--profile", "black", "--filter-files"] - repo: https://github.com/psf/black - rev: 22.10.0 + rev: 24.1.1 hooks: - id: black language_version: python -- repo: https://gitlab.com/pycqa/flake8 - rev: 3.9.2 +- repo: https://github.com/PyCQA/flake8 + rev: 7.0.0 hooks: - id: flake8 # TODO: remove F40x, E772 and fix errors @@ -26,17 +25,11 @@ repos: # - flake8-print - flake8-2020 - repo: https://github.com/mgedmin/check-manifest - rev: "0.48" + rev: "0.49" hooks: - id: check-manifest -- repo: https://github.com/pre-commit/mirrors-mypy - rev: 'v0.982' - hooks: - - id: mypy - files: '^src/' - additional_dependencies: - - types-attrs +# g - repo: https://github.com/pre-commit/pygrep-hooks - rev: v1.9.0 + rev: v1.10.0 hooks: - id: rst-backticks diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 000000000..8ca8d08ea --- /dev/null +++ b/mypy.ini @@ -0,0 +1,5 @@ +[mypy] +strict = True + +[mypy-tests.*] +disable_error_code = ["empty-body"] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 76b3a8d07..ea1a7b861 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -numpy -pandas>=0.25.3 +numpy>=1.23.2 +pandas>=2.0.0 attrs>=19.3.0 networkx>=2.4 multimethod>=1.4 diff --git a/setup.py b/setup.py index 62a097df4..cecd178bb 100644 --- a/setup.py +++ b/setup.py @@ -60,10 +60,10 @@ zip_safe=False, classifiers=[ "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ], ) diff --git a/src/visions/backends/numpy/sequences.py b/src/visions/backends/numpy/sequences.py index 3a7f150c0..f74b33465 100644 --- a/src/visions/backends/numpy/sequences.py +++ b/src/visions/backends/numpy/sequences.py @@ -44,12 +44,7 @@ def get_sequences() -> Dict[str, Sequence]: complex(3, -1), ], "bool_series3": np.array([1, 0, 0, 1], dtype=np.bool_), - "complex_series_nan": [ - complex(0, 0), - complex(1, 2), - complex(3, -1), - complex(np.nan, np.nan), - ], + "complex_series_nan": [complex(0, 0), complex(1, 2), complex(3, -1), None], "complex_series_nan_2": [ complex(0, 0), complex(1, 2), diff --git a/src/visions/backends/numpy/test_utils.py b/src/visions/backends/numpy/test_utils.py index 139869862..fcfbd2c6d 100644 --- a/src/visions/backends/numpy/test_utils.py +++ b/src/visions/backends/numpy/test_utils.py @@ -1,6 +1,7 @@ """ A selection of testing utilities for visions. """ + import functools from typing import Callable, Dict, List, Optional, Type, Union diff --git a/src/visions/backends/numpy/types/date_time.py b/src/visions/backends/numpy/types/date_time.py index eaa37bc5a..9d1206740 100644 --- a/src/visions/backends/numpy/types/date_time.py +++ b/src/visions/backends/numpy/types/date_time.py @@ -6,6 +6,7 @@ from visions.backends.numpy import test_utils from visions.backends.numpy.array_utils import array_handle_nulls, array_not_empty +from visions.backends.pandas.types.date_time import pandas_infer_datetime from visions.types import DateTime, String @@ -32,7 +33,7 @@ def string_is_datetime(array: np.ndarray, state: dict) -> bool: @DateTime.register_transformer(String, np.ndarray) def string_to_datetime(array: np.ndarray, state: dict) -> np.ndarray: # return array.astype(np.datetime64) - return pd.to_datetime(array).to_numpy() + return pandas_infer_datetime(pd.Series(array), state).to_numpy() @DateTime.contains_op.register diff --git a/src/visions/backends/numpy/types/integer.py b/src/visions/backends/numpy/types/integer.py index 0ca8feb82..c7634be3b 100644 --- a/src/visions/backends/numpy/types/integer.py +++ b/src/visions/backends/numpy/types/integer.py @@ -6,15 +6,16 @@ @Integer.register_relationship(Float, np.ndarray) -@array_handle_nulls def float_is_integer(series: np.ndarray, state: dict) -> bool: - return (series.astype(np.int_) == series).all() + return np.all(np.mod(series[~np.isnan(series)], 1) == 0) +# TODO: The array_handle_nulls is actually removing nulls from the result. This is _far_ from ideal but there is no +# other native way to represent nullable integers in numpy @Integer.register_transformer(Float, np.ndarray) @array_handle_nulls def float_to_integer(series: np.ndarray, state: dict) -> np.ndarray: - return series.astype(np.int_) + return series.astype(int) @Integer.contains_op.register diff --git a/src/visions/backends/pandas/sequences.py b/src/visions/backends/pandas/sequences.py index 2fc9b3553..e80312eb3 100644 --- a/src/visions/backends/pandas/sequences.py +++ b/src/visions/backends/pandas/sequences.py @@ -68,7 +68,7 @@ def get_sequences() -> Dict[str, Iterable]: pd.date_range( start="2013-05-18 12:00:01", periods=2, - freq="H", + freq="h", tz="Europe/Brussels", ) ), diff --git a/src/visions/backends/pandas/series_utils.py b/src/visions/backends/pandas/series_utils.py index 26f930ba1..b4533c28c 100644 --- a/src/visions/backends/pandas/series_utils.py +++ b/src/visions/backends/pandas/series_utils.py @@ -2,7 +2,6 @@ from typing import Callable import pandas as pd -from pandas.api import types as pdt # For future reference: get the dtype from the subtype when the series is sparse @@ -11,7 +10,7 @@ def series_handle_sparse_dtype(fn: Callable[..., bool]) -> Callable[..., bool]: @functools.wraps(fn) def inner(series: pd.Series, state: dict, *args, **kwargs) -> bool: - if pdt.is_sparse(series): + if isinstance(series.dtype, pd.SparseDtype): dtype = series.dtype.subtype else: dtype = series.dtype @@ -43,7 +42,7 @@ def series_not_sparse(fn: Callable[..., bool]) -> Callable[..., bool]: @functools.wraps(fn) def inner(series: pd.Series, *args, **kwargs) -> bool: - if pdt.is_sparse(series): + if isinstance(series, pd.SparseDtype): return False return fn(series, *args, **kwargs) diff --git a/src/visions/backends/pandas/test_utils.py b/src/visions/backends/pandas/test_utils.py index 09ea42f23..bd3076dcd 100644 --- a/src/visions/backends/pandas/test_utils.py +++ b/src/visions/backends/pandas/test_utils.py @@ -1,6 +1,7 @@ """ A selection of testing utilities for visions. """ + import functools from typing import Callable, Dict, List, Optional, Type, Union @@ -160,10 +161,10 @@ def coercion_map(mapping: Union[List[Dict], Dict]) -> Callable[[pd.Series], pd.S Returns: A callable that maps the series. """ - if type(mapping) == list: + if isinstance(mapping, list): mapping = {k: v for d in mapping for k, v in d.items()} - elif type(mapping) != dict: + elif not isinstance(mapping, dict): raise ValueError("Mapping should be dict or list of dicts") def f(series: pd.Series) -> pd.Series: diff --git a/src/visions/backends/pandas/types/complex.py b/src/visions/backends/pandas/types/complex.py index 7fa1da84d..4405787ea 100644 --- a/src/visions/backends/pandas/types/complex.py +++ b/src/visions/backends/pandas/types/complex.py @@ -1,34 +1,50 @@ +import math +from typing import Union + +import numpy as np import pandas as pd from pandas.api import types as pdt from visions.backends.pandas import test_utils from visions.backends.pandas.series_utils import series_not_empty, series_not_sparse -from visions.backends.pandas.types.float import string_is_float from visions.backends.shared.parallelization_engines import pandas_apply from visions.types.complex import Complex from visions.types.string import String -def imaginary_in_string(series: pd.Series, imaginary_indicator: tuple = ("j", "i")): +def imaginary_in_string( + series: pd.Series, imaginary_indicator: tuple = ("j", "i") +) -> bool: return any(any(v in s for v in imaginary_indicator) for s in series) +def convert_val_to_complex(val: str) -> Union[complex, float]: + result = complex(val) + return ( + np.nan if any(math.isnan(val) for val in (result.real, result.imag)) else result + ) + + +def convert_to_complex_series(series: pd.Series) -> pd.Series: + return pandas_apply(series, convert_val_to_complex) + + @Complex.register_relationship(String, pd.Series) def string_is_complex(series: pd.Series, state: dict) -> bool: - def f(s: pd.Series) -> pd.Series: - return pandas_apply(s, complex) + coerced_series = test_utils.option_coercion_evaluator(convert_to_complex_series)( + series + ) - coerced_series = test_utils.option_coercion_evaluator(f)(series) return ( coerced_series is not None - and not string_is_float(series, state) + and not all(v.imag == 0 for v in coerced_series.dropna()) and imaginary_in_string(series) ) @Complex.register_transformer(String, pd.Series) def string_to_complex(series: pd.Series, state: dict) -> pd.Series: - return pandas_apply(series, complex) + return convert_to_complex_series(series) @Complex.contains_op.register diff --git a/src/visions/backends/pandas/types/date_time.py b/src/visions/backends/pandas/types/date_time.py index 154e738b0..a57e48e0e 100644 --- a/src/visions/backends/pandas/types/date_time.py +++ b/src/visions/backends/pandas/types/date_time.py @@ -1,5 +1,3 @@ -from functools import partial - import pandas as pd from pandas.api import types as pdt @@ -12,14 +10,26 @@ from visions.types import DateTime, String +def pandas_infer_datetime(series: pd.Series, state: dict) -> pd.Series: + try: + return pd.to_datetime(series) + except Exception: + pass + + return pd.to_datetime(series, format="mixed") + + @DateTime.register_relationship(String, pd.Series) @series_handle_nulls def string_is_datetime(series: pd.Series, state: dict) -> bool: - exceptions = [OverflowError, TypeError] + def string_to_datetime_func(series: pd.Series) -> pd.Series: + return pandas_infer_datetime(series, state) + exceptions = [OverflowError, TypeError] coerced_series = test_utils.option_coercion_evaluator( - partial(string_to_datetime, state=state), exceptions + string_to_datetime_func, exceptions )(series) + if coerced_series is None: return False else: @@ -28,7 +38,7 @@ def string_is_datetime(series: pd.Series, state: dict) -> bool: @DateTime.register_transformer(String, pd.Series) def string_to_datetime(series: pd.Series, state: dict) -> pd.Series: - return pd.to_datetime(series) + return pandas_infer_datetime(series, state) @DateTime.contains_op.register diff --git a/src/visions/backends/shared/nan_handling.py b/src/visions/backends/shared/nan_handling.py index eaccdf375..cc2e6dcea 100644 --- a/src/visions/backends/shared/nan_handling.py +++ b/src/visions/backends/shared/nan_handling.py @@ -45,11 +45,9 @@ def is_missing(x): else: return False + nb.extending.overload(is_missing)(lambda x: is_missing) - nb.extending.overload(is_missing)(is_missing) - - - @nb.jit + @nb.jit(nopython=True) def hasna(x: np.ndarray) -> bool: for item in x: if is_missing(item): diff --git a/src/visions/dtypes/boolean.py b/src/visions/dtypes/boolean.py index 5051bd072..508b20659 100644 --- a/src/visions/dtypes/boolean.py +++ b/src/visions/dtypes/boolean.py @@ -197,7 +197,7 @@ def coerce_to_array(values, dtype, mask=None, copy=False): values = np.array(values, dtype=int, copy=copy) elif not ( - is_integer_dtype(values) or is_float_dtype(values) or is_bool_dtype(values) + is_integer_dtype(values) or is_float_dtype(values) or is_bool_dtype(values) ): raise TypeError(f"{values.dtype} cannot be converted to an IntegerDtype") @@ -302,9 +302,9 @@ def dtype(self): def __init__(self, values, mask, copy=False): if not ( - isinstance(values, np.ndarray) - and is_integer_dtype(values.dtype) - or is_bool_dtype(values.dtype) + isinstance(values, np.ndarray) + and is_integer_dtype(values.dtype) + or is_bool_dtype(values.dtype) ): raise TypeError( "values should be integer numpy array. Use " @@ -677,7 +677,7 @@ def _maybe_mask_result(self, result, mask, other, op_name): # a float result # or our op is a divide if (is_float_dtype(other) or is_float(other)) or ( - op_name in ["rtruediv", "truediv"] + op_name in ["rtruediv", "truediv"] ): result[mask] = np.nan return result diff --git a/src/visions/relations/__init__.py b/src/visions/relations/__init__.py index a28248f94..b48c2456d 100644 --- a/src/visions/relations/__init__.py +++ b/src/visions/relations/__init__.py @@ -1,4 +1,5 @@ """This module contains (predefined) relations.""" + from visions.relations.relations import ( IdentityRelation, InferenceRelation, diff --git a/src/visions/test/series.py b/src/visions/test/series.py index e656945b5..fe1557775 100644 --- a/src/visions/test/series.py +++ b/src/visions/test/series.py @@ -11,7 +11,10 @@ def get_series() -> Dict[str, pd.Series]: sequences = get_builtin_sequences() sequences.update(get_numpy_sequences()) - test_series = {name: pd.Series(sequence) for name, sequence in sequences.items()} + test_series = {} + for name, sequence in sequences.items(): + test_series[name] = pd.Series(sequence) + test_series.update(get_pandas_sequences()) assert all(isinstance(v, pd.Series) for v in test_series.values()) diff --git a/src/visions/test/series_sparse.py b/src/visions/test/series_sparse.py index 5a94238ed..83ada9cc4 100644 --- a/src/visions/test/series_sparse.py +++ b/src/visions/test/series_sparse.py @@ -6,7 +6,7 @@ from visions.backends.pandas.test_utils import pandas_version not_pandas_1_0_5 = not ( - (pandas_version[0] == 1) and (pandas_version[1] == 0) and (pandas_version[2] == 5) + (pandas_version[0] == 1) and (pandas_version[1] == 0) and (pandas_version[2] == 5) ) @@ -14,7 +14,7 @@ def get_sparse_series() -> Dict[str, pd.Series]: test_series = { "int_sparse": pd.Series([-1, 0, 1, 2, 3], dtype=pd.SparseDtype(np.int32, 0)), "float_sparse": pd.Series( - [np.nan, 0, 1, 2, 3], + [np.nan, 0.2, 1, 2, 3], dtype=pd.SparseDtype(np.float64, np.nan), ), "complex_sparse": pd.Series( @@ -29,11 +29,11 @@ def get_sparse_series() -> Dict[str, pd.Series]: pd.arrays.SparseArray([None, None, "gold", "black", "silver"]), ), # Pending https://github.com/pandas-dev/pandas/issues/35762 - # pd.Series([NoneT, 0, 1, 2, 3, 4], name="datetime_sparse", dtype=pd.SparseDtype(np.datetime64)), + # pd.Series([None, 0, 1, 2, 3, 4], name="datetime_sparse", dtype=pd.SparseDtype(np.datetime64)), # Pandas dtypes "pd_int64_sparse": pd.Series( [0, 1, 2, 3, None], - dtype=pd.SparseDtype(pd.Int64Dtype()), + dtype=pd.SparseDtype("int", np.nan), ), # Pending https://github.com/pandas-dev/pandas/issues/35793 # pd.Series( @@ -50,7 +50,7 @@ def get_sparse_series() -> Dict[str, pd.Series]: ) test_series["pd_bool_sparse"] = pd.Series( [True, False, False, None], - dtype=pd.SparseDtype(pd.BooleanDtype(), None), + dtype=pd.SparseDtype("bool", pd.NA), ) return test_series diff --git a/src/visions/types/type.py b/src/visions/types/type.py index b4e6218f9..702dcf3b5 100644 --- a/src/visions/types/type.py +++ b/src/visions/types/type.py @@ -58,21 +58,27 @@ def relations(cls) -> RelationsIterManager: if cls._relations is None: cls._relations = RelationsIterManager( [ - attr.evolve( - r, - type=cls, - relationship=cls.contains_op - if r.relationship is None - else r.relationship, - ) - if isinstance(r, IdentityRelation) - else attr.evolve( - r, - type=cls, - relationship=multimethod(r.relationship) - if r.relationship is not None - else None, - transformer=multimethod(r.transformer), + ( + attr.evolve( + r, + type=cls, + relationship=( + cls.contains_op + if r.relationship is None + else r.relationship + ), + ) + if isinstance(r, IdentityRelation) + else attr.evolve( + r, + type=cls, + relationship=( + multimethod(r.relationship) + if r.relationship is not None + else None + ), + transformer=multimethod(r.transformer), + ) ) for r in cls.get_relations() ] diff --git a/src/visions/typesets/typeset.py b/src/visions/typesets/typeset.py index f42fa3021..3f4289052 100644 --- a/src/visions/typesets/typeset.py +++ b/src/visions/typesets/typeset.py @@ -143,6 +143,7 @@ def traverse_graph_with_series( for vision_type in graph.successors(base_type): relation = graph[base_type][vision_type]["relationship"] + if relation.is_relation(series, state): series = relation.transform(series, state) return traverse_graph_with_series(vision_type, series, graph, path, state) @@ -299,7 +300,6 @@ def infer(self, data: Sequence) -> Tuple[Sequence, Any, dict]: return traverse_graph(data, self.root_node, self.relation_graph) def infer_type(self, data: Sequence) -> Union[T, Dict[str, T]]: - """The inferred type found using all type relations. Args: diff --git a/src/visions/version.py b/src/visions/version.py index ab55bb1af..aed57348c 100644 --- a/src/visions/version.py +++ b/src/visions/version.py @@ -1 +1 @@ -__version__ = "0.7.5" +__version__ = "0.7.6" diff --git a/tests/numpy_/typesets/test_standard_set.py b/tests/numpy_/typesets/test_standard_set.py index 61618b92e..10e03920c 100644 --- a/tests/numpy_/typesets/test_standard_set.py +++ b/tests/numpy_/typesets/test_standard_set.py @@ -33,33 +33,40 @@ def reload_series_to_numpy(s): return np.array(s.tolist()) +def fix_nan(series: pd.Series): + mask = [val is pd.NA for val in series] + series[mask] = np.nan + return reload_series_to_numpy(series) + + array = get_series() array.update(get_geometry_series()) array = {k: v.to_numpy() for k, v in array.items()} # Pandas doesn't correctly handle complex categoricals pending # https://github.com/pandas-dev/pandas/pull/36482/ -array.pop("categorical_complex_series") +# array.pop("categorical_complex_series") # Some sequences don't round trip correctly from pandas (i.e. Series.to_numpy() # is not equivalent to np.array(Series.tolist()) array["Int64_int_series"] = reload_series_to_numpy(array["Int64_int_series"]) array["pd_uint32"] = reload_series_to_numpy(array["pd_uint32"]) +array["Int64_int_nan_series"] = fix_nan(array["Int64_int_nan_series"]) -typeset = StandardSet() - Categorical +typeset = StandardSet() # - Categorical contains_map = { Integer: { - "int_series", "Int64_int_series", + "int_series", "int_range", - "Int64_int_nan_series", "int_series_boolean", "np_uint32", "pd_uint32", "categorical_int_series", }, Float: { + "Int64_int_nan_series", "float_series", "float_series2", "float_series3", @@ -72,6 +79,7 @@ def reload_series_to_numpy(s): "float_series6", "categorical_float_series", }, + Categorical: set(), Boolean: { "bool_series", "bool_series2", @@ -88,7 +96,7 @@ def reload_series_to_numpy(s): "complex_series_nan_2", "complex_series_float", "complex_series_py_float", - # "categorical_complex_series", + "categorical_complex_series", }, DateTime: { "timestamp_series", @@ -133,56 +141,55 @@ def reload_series_to_numpy(s): "string_dtype_series", "ordinal", }, + Object: { + "path_series_linux", + "path_series_linux_missing", + "path_series_windows", + "url_series", + "url_nan_series", + "url_none_series", + "file_test_py", + "file_mixed_ext", + "file_test_py_missing", + "image_png", + "image_png_missing", + "image_png", + "image_png_missing", + "email_address", + "email_address_missing", + "uuid_series", + "uuid_series_missing", + "ip", + "ip_mixed_v4andv6", + "ip_missing", + "geometry_series", + "geometry_series_missing", + "mixed_list[str,int]", + "mixed_dict", + "callable", + "module", + "mixed_integer", + "mixed_list", + "date", + "time", + "ordinal", + }, + Generic: { + "empty", + "empty_bool", + "empty_float", + "empty_int64", + "empty_object", + "all_null_none", + "all_null_nan", + "all_null_nat", + "nan_series", + "nan_series_2", + }, } -contains_map[Object] = { - "path_series_linux", - "path_series_linux_missing", - "path_series_windows", - "url_series", - "url_nan_series", - "url_none_series", - "file_test_py", - "file_mixed_ext", - "file_test_py_missing", - "image_png", - "image_png_missing", - "image_png", - "image_png_missing", - "email_address", - "email_address_missing", - "uuid_series", - "uuid_series_missing", - "ip", - "ip_mixed_v4andv6", - "ip_missing", - "geometry_series", - "geometry_series_missing", - "mixed_list[str,int]", - "mixed_dict", - "callable", - "module", - "mixed_integer", - "mixed_list", - "date", - "time", - "ordinal", -} - # Empty series -contains_map[Generic] = { - "empty", - "empty_bool", - "empty_float", - "empty_int64", - "empty_object", - "all_null_none", - "all_null_nan", - "all_null_nat", - "nan_series", - "nan_series_2", -} @pytest.mark.parametrize(**get_contains_cases(array, contains_map, typeset)) @@ -194,6 +201,7 @@ def test_contains(name, series, contains_type, member): contains_type: the type to test against member: the result """ + result, message = contains(name, series, contains_type, member) assert result, message @@ -248,7 +256,6 @@ def test_contains(name, series, contains_type, member): "complex_series_nan_2": Complex, "complex_series_py_nan": Complex, "complex_series_py": Complex, - # "categorical_complex_series": Complex, "timestamp_series": DateTime, "timestamp_series_nat": DateTime, "timestamp_aware_series": DateTime, @@ -311,6 +318,7 @@ def test_contains(name, series, contains_type, member): "all_null_nat": Generic, "all_null_empty_str": String, "string_dtype_series": String, + "categorical_complex_series": Complex, } @@ -329,7 +337,7 @@ def test_inference(name, series, inference_type, typeset, difference): # Conversions in one single step convert_map = [ # Model type, Relation type - (Integer, Float, {"int_nan_series", "float_series2"}), + (Integer, Float, {"int_nan_series", "float_series2", "Int64_int_nan_series"}), (Complex, String, {"str_complex", "str_complex_nan"}), ( Float, @@ -404,6 +412,7 @@ def test_conversion(name, source_type, relation_type, series, member): "textual_float": pd.Series([1.1, 2.0], dtype=np.float64), "textual_float_nan": pd.Series([1.1, 2.0, np.nan], dtype=np.float64), "mixed": np.array([True, False, np.nan]), + "Int64_int_nan_series": np.array([1, 2, 3, np.nan]), } cast_results = { k: v.to_numpy() if isinstance(v, pd.Series) else v for k, v in cast_results.items() diff --git a/tests/pandas_/typesets/test_complete_set.py b/tests/pandas_/typesets/test_complete_set.py index 9628f7803..31517b7bf 100644 --- a/tests/pandas_/typesets/test_complete_set.py +++ b/tests/pandas_/typesets/test_complete_set.py @@ -445,7 +445,11 @@ def test_conversion(name, source_type, relation_type, series, member): [datetime.date(1941, 5, 24), datetime.date(2016, 10, 13)] ), "py_datetime_str": pd.Series( - [datetime.date(1941, 5, 24), datetime.date(2016, 10, 13)] + [ + datetime.datetime(1941, 5, 24, 0, 5, 0), + datetime.datetime(2016, 10, 13, 0, 10, 0), + ], + dtype="datetime64[ns]", ), "string_date": pd.Series([datetime.date(1937, 5, 6), datetime.date(2014, 4, 20)]), } diff --git a/tests/pandas_/typesets/test_standard_set_sparse.py b/tests/pandas_/typesets/test_standard_set_sparse.py index 959db84e6..a64810895 100644 --- a/tests/pandas_/typesets/test_standard_set_sparse.py +++ b/tests/pandas_/typesets/test_standard_set_sparse.py @@ -1,4 +1,4 @@ -from typing import Dict, Set, Type, cast +from typing import Dict, Set, Type import pytest @@ -31,26 +31,35 @@ DateTime: set(), TimeDelta: set(), Categorical: set(), - Object: set(), - Integer: set(), - Complex: set(), - Float: set(), - Boolean: set(), - String: set(), - Generic: { + Object: { + "pd_string_sparse", + "str_obj_sparse", + }, + Integer: { "int_sparse", "pd_int64_sparse", + }, + Complex: { + "complex_sparse", + }, + Float: { "float_sparse", + }, + Boolean: { "bool_sparse", - "complex_sparse", + "pd_bool_sparse", + }, + String: { + "pd_string_sparse", "str_obj_sparse", }, + Generic: set(), } -if pandas_version[0] >= 1 and not_pandas_1_0_5: - contains_map[Generic].add("pd_bool_sparse") - contains_map[Generic].add("pd_string_sparse") +# if pandas_version[0] >= 1 and not_pandas_1_0_5: +# contains_map[Generic].add("pd_bool_sparse") +# contains_map[String].add("pd_string_sparse") @pytest.mark.parametrize(**get_contains_cases(series, contains_map, typeset)) @@ -67,21 +76,18 @@ def test_contains(name, series, contains_type, member): inference_map: Dict[str, Type[VisionsBaseType]] = { - "int_sparse": Generic, - "pd_int64_sparse": Generic, - "float_sparse": Generic, - "bool_sparse": Generic, - "pd_bool_sparse": Generic, - "complex_sparse": Generic, - "str_obj_sparse": Generic, + "int_sparse": Integer, + "pd_int64_sparse": Integer, + "float_sparse": Float, + "bool_sparse": Boolean, + "pd_bool_sparse": Boolean, + "complex_sparse": Complex, + "str_obj_sparse": String, + "pd_string_sparse": String, "pd_categorical_sparse": Generic, - # "datetime_sparse": Generic, + "datetime_sparse": Generic, } -if pandas_version[0] >= 1 and not_pandas_1_0_5: - inference_map["pd_bool_sparse"] = Generic - inference_map["pd_string_sparse"] = Generic - @pytest.mark.parametrize(**get_inference_cases(series, inference_map, typeset)) def test_inference(name, series, inference_type, typeset, difference): diff --git a/tests/python_/typesets/test_python_complete_set.py b/tests/python_/typesets/test_python_complete_set.py index da89b0fe0..e5692b4a7 100644 --- a/tests/python_/typesets/test_python_complete_set.py +++ b/tests/python_/typesets/test_python_complete_set.py @@ -11,7 +11,6 @@ from visions import CompleteSet from visions.backends.pandas.test_utils import pandas_version -from visions.backends.pandas.types.boolean import hasnan_bool_name from visions.test.series import get_series from visions.test.series_geometry import get_geometry_series from visions.test.utils import ( @@ -444,7 +443,10 @@ def test_conversion(name, source_type, relation_type, series, member): datetime.date(1941, 5, 24), datetime.date(2016, 10, 13), ], - "py_datetime_str": [datetime.date(1941, 5, 24), datetime.date(2016, 10, 13)], + "py_datetime_str": [ + datetime.datetime(1941, 5, 24, 0, 5, 0), + datetime.datetime(2016, 10, 13, 0, 10, 0), + ], "string_date": [datetime.date(1937, 5, 6), datetime.date(2014, 4, 20)], } diff --git a/tests/test_typeset_external_type.py b/tests/test_typeset_external_type.py index 726210b47..d8513d17c 100644 --- a/tests/test_typeset_external_type.py +++ b/tests/test_typeset_external_type.py @@ -2,6 +2,7 @@ Test for issue 85 https://github.com/dylan-profiler/visions/issues/85 """ + import pytest from visions import VisionsTypeset