diff --git a/Makefile b/Makefile
index 60b5acb60..02dc3610d 100644
--- a/Makefile
+++ b/Makefile
@@ -18,7 +18,7 @@ pypi_package:
check-manifest
python setup.py sdist bdist_wheel
twine check dist/*
- twine upload --skip-existing dist/*
+ twine upload --skip-existing dist/* -u __token__
## Run black linting
lint:
diff --git a/README.md b/README.md
index c1c706578..0a6849302 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,6 @@
And these visions of data types, they kept us up past the dawn.
-
@@ -121,7 +120,8 @@ df.head(2)
-The most important abstraction in `visions` are Types - these represent semantic notions about data. You have access to a
+The most important abstraction in `visions` are Types - these represent semantic notions about data. You have access to
+a
range of well tested types like `Integer`, `Float`, and `Files` covering the most common software development use cases.
Types can be bundled together into typesets. Behind the scenes, `visions` builds a traversable graph for any collection
of types.
diff --git a/build/lib/visions/__init__.py b/build/lib/visions/__init__.py
new file mode 100644
index 000000000..57086c4e0
--- /dev/null
+++ b/build/lib/visions/__init__.py
@@ -0,0 +1,15 @@
+"""Core functionality"""
+
+from visions import types, typesets, utils
+from visions.backends import *
+from visions.declarative import create_type
+from visions.functional import (
+ cast_to_detected,
+ cast_to_inferred,
+ detect_type,
+ infer_type,
+)
+from visions.types import *
+from visions.typesets import *
+
+__version__ = "0.7.6"
diff --git a/build/lib/visions/backends/__init__.py b/build/lib/visions/backends/__init__.py
new file mode 100644
index 000000000..3bdda55da
--- /dev/null
+++ b/build/lib/visions/backends/__init__.py
@@ -0,0 +1,45 @@
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+try:
+ import pandas as pd
+
+ import visions.backends.pandas
+ from visions.backends.pandas.test_utils import pandas_version
+
+ if pandas_version[0] < 1:
+ from visions.dtypes.boolean import BoolDtype
+ logger.info(f"Pandas backend loaded {pd.__version__}")
+
+except ImportError:
+ logger.info("Pandas backend NOT loaded")
+
+
+try:
+ import numpy as np
+
+ import visions.backends.numpy
+
+ logger.info(f"Numpy backend loaded {np.__version__}")
+except ImportError:
+ logger.info("Numpy backend NOT loaded")
+
+
+try:
+ import pyspark
+
+ import visions.backends.spark
+
+ logger.info(f"Pyspark backend loaded {pyspark.__version__}")
+except ImportError:
+ logger.info("Pyspark backend NOT loaded")
+
+
+try:
+ import visions.backends.python
+
+ logger.info("Python backend loaded")
+except ImportError:
+ logger.info("Python backend NOT loaded")
diff --git a/build/lib/visions/backends/numpy/__init__.py b/build/lib/visions/backends/numpy/__init__.py
new file mode 100644
index 000000000..868b4572c
--- /dev/null
+++ b/build/lib/visions/backends/numpy/__init__.py
@@ -0,0 +1,3 @@
+# https://het.as.utexas.edu/HET/Software/Numpy/reference/arrays.scalars.html
+import visions.backends.numpy.types
+from visions.backends.numpy.array_utils import array_handle_nulls, array_not_empty
diff --git a/build/lib/visions/backends/numpy/array_utils.py b/build/lib/visions/backends/numpy/array_utils.py
new file mode 100644
index 000000000..373d1a39b
--- /dev/null
+++ b/build/lib/visions/backends/numpy/array_utils.py
@@ -0,0 +1,67 @@
+import functools
+from typing import Callable, Sequence, Tuple, TypeVar, Union
+
+import numpy as np
+
+from visions.backends.shared.nan_handling import nan_mask
+from visions.backends.shared.utilities import has_import
+
+has_numba = has_import("numba")
+
+if has_numba:
+ import numba as nb
+
+T = TypeVar("T")
+
+
+def array_handle_nulls(fn: Callable[..., bool]) -> Callable[..., bool]:
+ """Decorator for nullable arrays"""
+
+ handles_missing = array_not_empty(fn)
+
+ @functools.wraps(fn)
+ def inner(array: np.ndarray, *args, **kwargs) -> bool:
+ array = array[nan_mask(array)]
+ return handles_missing(array, *args, **kwargs)
+
+ return inner
+
+
+def array_not_empty(fn: Callable[..., bool]) -> Callable[..., bool]:
+ """Decorator to exclude empty arrays"""
+
+ @functools.wraps(fn)
+ def inner(array: np.ndarray, *args, **kwargs) -> bool:
+ if array.shape[0] == 0:
+ return False
+ return fn(array, *args, **kwargs)
+
+ return inner
+
+
+def _base_all_type(array: np.ndarray, dtypes: Union[type, Tuple[type, ...]]) -> bool:
+ return all(isinstance(v, dtypes) for v in array)
+
+
+if has_numba:
+ # TODO: This only works when the numpy array dtype falls under a few categories
+ # There are alternative implementations with forceobj=True which work in all cases
+ # including the use of isinstance, but in those cases worst case performance can be substantially worse
+ # than the default python implementation.
+ def all_type_numba(dtype: Union[Tuple, T]):
+ @nb.jit(nopython=True)
+ def inner(array: np.ndarray) -> bool:
+ for i in nb.prange(array.size):
+ if type(array[i]) is not dtype:
+ return False
+ return True
+
+ return inner
+
+ def all_type(array: np.ndarray, dtypes: Union[type, Tuple[type, ...]]) -> bool:
+ return _base_all_type(array, dtypes)
+
+else:
+
+ def all_type(array: np.ndarray, dtypes: Union[type, Tuple[type, ...]]) -> bool:
+ return _base_all_type(array, dtypes)
diff --git a/build/lib/visions/backends/numpy/sequences.py b/build/lib/visions/backends/numpy/sequences.py
new file mode 100644
index 000000000..f74b33465
--- /dev/null
+++ b/build/lib/visions/backends/numpy/sequences.py
@@ -0,0 +1,61 @@
+from typing import Dict, Sequence
+from urllib.parse import urlparse
+
+import numpy as np
+
+
+def get_sequences() -> Dict[str, Sequence]:
+ sequences = {
+ "complex_series_float": [
+ complex(0, 0),
+ complex(1, 0),
+ complex(3, 0),
+ complex(-1, 0),
+ ],
+ "url_nan_series": [
+ urlparse("http://www.cwi.nl:80/%7Eguido/Python.html"),
+ urlparse("https://github.com/dylan-profiling/hurricane"),
+ np.nan,
+ ],
+ "mixed": [True, False, np.nan],
+ "float_nan_series": [1.0, 2.5, np.nan],
+ "float_series5": [np.nan, 1.2],
+ "float_with_inf": [np.inf, np.NINF, np.PINF, 1000000.0, 5.5],
+ "inf_series": [np.inf, np.NINF, np.Infinity, np.PINF],
+ "int_nan_series": [1, 2, np.nan],
+ "nan_series": [np.nan],
+ "nan_series_2": [np.nan, np.nan, np.nan, np.nan],
+ "string_num_nan": ["1.0", "2.0", np.nan],
+ "string_with_sep_num_nan": ["1,000.0", "2.1", np.nan],
+ "string_flt_nan": ["1.0", "45.67", np.nan],
+ "string_str_nan": [
+ "I was only robbing the register,",
+ "I hope you understand",
+ "One of us had better call up the cops",
+ "In the hot New Jersey night",
+ np.nan,
+ ],
+ "float_series3": np.array([1.2, 2, 3, 4], dtype=np.float64),
+ "np_uint32": np.array([1, 2, 3, 4], dtype=np.uint32),
+ "string_np_unicode_series": np.array(["upper", "hall"], dtype=np.unicode_),
+ "complex_series": [
+ complex(0, 0),
+ complex(1, 2),
+ complex(3, -1),
+ ],
+ "bool_series3": np.array([1, 0, 0, 1], dtype=np.bool_),
+ "complex_series_nan": [complex(0, 0), complex(1, 2), complex(3, -1), None],
+ "complex_series_nan_2": [
+ complex(0, 0),
+ complex(1, 2),
+ complex(3, -1),
+ np.nan,
+ ],
+ "complex_series_py_nan": [
+ complex(0, 0),
+ complex(1, 2),
+ complex(3, -1),
+ np.nan,
+ ],
+ }
+ return sequences
diff --git a/build/lib/visions/backends/numpy/test_utils.py b/build/lib/visions/backends/numpy/test_utils.py
new file mode 100644
index 000000000..fcfbd2c6d
--- /dev/null
+++ b/build/lib/visions/backends/numpy/test_utils.py
@@ -0,0 +1,172 @@
+"""
+A selection of testing utilities for visions.
+"""
+
+import functools
+from typing import Callable, Dict, List, Optional, Type, Union
+
+import numpy as np
+
+from visions.backends.numpy.array_utils import array_handle_nulls
+
+
+def option_coercion_evaluator(
+ fn: Callable[[np.ndarray], np.ndarray],
+ extra_errors: Optional[List[Type[Exception]]] = None,
+) -> Callable[[np.ndarray], Optional[np.ndarray]]:
+ """A coercion test evaluator
+ Evaluates a coercion function and optionally returns the coerced array.
+ Args:
+ fn: A function coercing a array to another array.
+ extra_errors: Additional exceptions to catch
+ Returns:
+ The coerced array if the coercion succeeds otherwise None.
+ """
+
+ error_list = [ValueError, TypeError, AttributeError]
+ if extra_errors:
+ error_list.extend(extra_errors)
+
+ @functools.wraps(fn)
+ def f(array: np.ndarray) -> Optional[np.ndarray]:
+ try:
+ return fn(array)
+ except tuple(error_list):
+ return None
+
+ return f
+
+
+def coercion_test(
+ fn: Callable[[np.ndarray], np.ndarray],
+ extra_errors: Optional[List[Type[Exception]]] = None,
+) -> Callable[[np.ndarray], bool]:
+ """A coercion test generator
+ Creates a coercion test based on a provided coercion function.
+ Args:
+ fn: A function coercing a array to another type.
+ extra_errors: Additional exceptions to catch
+ Returns:
+ Whether the coercion failed or was successful.
+ """
+ # Returns True or False if the coercion succeeds
+ tester = option_coercion_evaluator(fn, extra_errors)
+
+ @functools.wraps(fn)
+ def f(array: np.ndarray) -> bool:
+ result = tester(array)
+ return True if result is not None else False
+
+ return f
+
+
+def coercion_true_test(
+ fn: Callable[[np.ndarray], np.ndarray],
+ extra_errors: Optional[List[Type[Exception]]] = None,
+) -> Callable[[np.ndarray], bool]:
+ """A coercion equality test generator
+ Creates a coercion test based on a provided coercion function which also enforces
+ equality constraints on the output. This is useful when you want to change the
+ data type of a array without necessarily changing the data, for example,
+ when converting an integer to a float.
+ Args:
+ fn: A function coercing a array to another type.
+ extra_errors: Additional exceptions to catch
+ Returns:
+ Whether the coercion failed or was successful.
+ """
+ tester = option_coercion_evaluator(fn, extra_errors)
+
+ @functools.wraps(tester)
+ def f(array: np.ndarray) -> bool:
+ result = tester(array)
+ return False if result is None else array.all()
+
+ return f
+
+
+def coercion_equality_test(
+ fn: Callable[[np.ndarray], np.ndarray]
+) -> Callable[[np.ndarray], bool]:
+ """A coercion equality test generator
+ Creates a coercion test based on a provided coercion function which also enforces
+ equality constraints on the output. This is useful when you want to change the
+ data type of a array without necessarily changing the data, for example,
+ when converting an integer to a float.
+ Args:
+ fn: A function coercing a array to another type.
+ Returns:
+ Whether the coercion failed or was successful.
+ """
+ tester = option_coercion_evaluator(fn)
+
+ @functools.wraps(tester)
+ def f(array: np.ndarray) -> bool:
+ result = tester(array)
+ return False if result is None else np.array_equal(array, result)
+
+ return f
+
+
+def coercion_single_map_test(mapping: List[Dict]) -> Callable[[np.ndarray, Dict], bool]:
+ @array_handle_nulls
+ def f(array: np.ndarray, state: dict = {}) -> bool:
+ return any(
+ np.isin(array, list(single_map.keys())).all() for single_map in mapping
+ )
+
+ return f
+
+
+def coercion_multi_map_test(mapping: Dict) -> Callable[[np.ndarray, Dict], bool]:
+ @array_handle_nulls
+ def f(array: np.ndarray, state: dict = {}) -> bool:
+ return np.isin(array, list(mapping.keys())).all()
+
+ return f
+
+
+def coercion_map_test(
+ mapping: Union[List[Dict], Dict]
+) -> Callable[[np.ndarray, Dict], bool]:
+ """Create a testing function for a single mapping or a list of mappings.
+ Args:
+ mapping: A dict with a mapping or a list of dicts
+ Returns:
+ Callable that checks if a array consists of the mappable values
+ Examples:
+ >>> coercion_map_test({"Yes": True, "No": False})
+ >>> coercion_map_test(
+ >>> [
+ >>> {"Yes": True, "No": False},
+ >>> {"Y": True, "N": False},
+ >>> ]
+ >>> )
+ """
+
+ if isinstance(mapping, list):
+ f = coercion_single_map_test(mapping)
+ elif isinstance(mapping, dict):
+ f = coercion_multi_map_test(mapping)
+ else:
+ raise ValueError("Mapping should be dict or list of dicts")
+ return f
+
+
+def coercion_map(
+ mapping: Union[List[Dict], Dict]
+) -> Callable[[np.ndarray], np.ndarray]:
+ """Maps a array given a mapping
+ Args:
+ mapping: a dict to map, or a list of dicts.
+ Returns:
+ A callable that maps the array.
+ """
+ if isinstance(mapping, list):
+ mapping = {k: v for d in mapping for k, v in d.items()}
+ elif not isinstance(mapping, dict):
+ raise ValueError("Mapping should be dict or list of dicts")
+
+ f = np.vectorize(lambda value: mapping.get(value, np.nan))
+
+ return f
diff --git a/build/lib/visions/backends/numpy/types/__init__.py b/build/lib/visions/backends/numpy/types/__init__.py
new file mode 100644
index 000000000..e1dea331c
--- /dev/null
+++ b/build/lib/visions/backends/numpy/types/__init__.py
@@ -0,0 +1,8 @@
+import visions.backends.numpy.types.boolean
+import visions.backends.numpy.types.complex
+import visions.backends.numpy.types.date_time
+import visions.backends.numpy.types.float
+import visions.backends.numpy.types.integer
+import visions.backends.numpy.types.object
+import visions.backends.numpy.types.string
+import visions.backends.numpy.types.time_delta
diff --git a/build/lib/visions/backends/numpy/types/boolean.py b/build/lib/visions/backends/numpy/types/boolean.py
new file mode 100644
index 000000000..96ad43eab
--- /dev/null
+++ b/build/lib/visions/backends/numpy/types/boolean.py
@@ -0,0 +1,61 @@
+from typing import Dict, List
+
+import numpy as np
+
+from visions.backends.numpy.array_utils import (
+ all_type,
+ array_handle_nulls,
+ array_not_empty,
+)
+from visions.backends.numpy.test_utils import coercion_map, coercion_map_test
+from visions.backends.python.types.boolean import get_boolean_coercions
+from visions.backends.shared.nan_handling import nan_mask
+from visions.types.boolean import Boolean
+from visions.types.object import Object
+from visions.types.string import String
+
+string_coercions = get_boolean_coercions("en")
+
+
+@Boolean.register_relationship(Object, np.ndarray)
+@array_handle_nulls
+def object_is_boolean(array: np.ndarray, state: dict) -> bool:
+ return all_type(array, bool)
+
+
+@Boolean.register_transformer(Object, np.ndarray)
+def object_to_boolean(array: np.ndarray, state: dict) -> np.ndarray:
+ return array
+
+
+@Boolean.register_relationship(String, np.ndarray)
+def string_is_boolean(array: np.ndarray, state: dict) -> bool:
+ try:
+ mask = nan_mask(array)
+ # TODO: Nan handling not implemented for generators yet
+ val_generator = np.array([val.lower() for val in array[mask]])
+ return coercion_map_test(string_coercions)(val_generator, state)
+ except (ValueError, TypeError, AttributeError):
+ return False
+
+
+@Boolean.register_transformer(String, np.ndarray)
+def string_to_boolean(array: np.ndarray, state: dict) -> np.ndarray:
+ array = array.copy()
+ mask = nan_mask(array)
+ # TODO: Nan handling not implemented for generators yet
+ val_generator = np.array([val.lower() for val in array[mask]])
+ array[mask] = object_to_boolean(
+ coercion_map(string_coercions)(val_generator), state
+ )
+ return array
+
+
+@Boolean.contains_op.register
+@array_handle_nulls
+@array_not_empty
+def boolean_contains(array: np.ndarray, state: dict) -> bool:
+ if np.issubdtype(array.dtype, np.bool_):
+ return True
+
+ return all_type(array, bool)
diff --git a/build/lib/visions/backends/numpy/types/complex.py b/build/lib/visions/backends/numpy/types/complex.py
new file mode 100644
index 000000000..9eeebaf87
--- /dev/null
+++ b/build/lib/visions/backends/numpy/types/complex.py
@@ -0,0 +1,40 @@
+import numpy as np
+from packaging import version
+
+from visions.backends.numpy import test_utils
+from visions.backends.numpy.array_utils import array_not_empty
+from visions.backends.numpy.types.float import string_is_float
+from visions.types.complex import Complex
+from visions.types.string import String
+
+_OLD_NUMPY = version.parse(np.version.version) <= version.parse("1.19.0")
+
+
+def imaginary_in_string(array: np.ndarray, imaginary_indicator: tuple = ("j", "i")):
+ return any(any(v in s for v in imaginary_indicator) for s in array)
+
+
+@Complex.register_transformer(String, np.ndarray)
+def string_to_complex(array: np.array, state: dict) -> np.ndarray:
+ if _OLD_NUMPY:
+ return np.array([complex(v) for v in array])
+ else:
+ return array.astype(complex)
+
+
+@Complex.register_relationship(String, np.ndarray)
+def string_is_complex(array: np.ndarray, state: dict) -> bool:
+ coerced_array = test_utils.option_coercion_evaluator(
+ lambda x: string_to_complex(x, state)
+ )(array)
+ return (
+ coerced_array is not None
+ and not string_is_float(array, state)
+ and imaginary_in_string(array)
+ )
+
+
+@Complex.contains_op.register
+@array_not_empty
+def complex_contains(array: np.ndarray, state: dict) -> bool:
+ return np.issubdtype(array.dtype, complex)
diff --git a/build/lib/visions/backends/numpy/types/date_time.py b/build/lib/visions/backends/numpy/types/date_time.py
new file mode 100644
index 000000000..9d1206740
--- /dev/null
+++ b/build/lib/visions/backends/numpy/types/date_time.py
@@ -0,0 +1,46 @@
+from datetime import datetime
+from functools import partial
+
+import numpy as np
+import pandas as pd
+
+from visions.backends.numpy import test_utils
+from visions.backends.numpy.array_utils import array_handle_nulls, array_not_empty
+from visions.backends.pandas.types.date_time import pandas_infer_datetime
+from visions.types import DateTime, String
+
+
+@DateTime.register_relationship(String, np.ndarray)
+@array_handle_nulls
+def string_is_datetime(array: np.ndarray, state: dict) -> bool:
+ exceptions = [OverflowError, TypeError]
+
+ if len(array) == 0:
+ return False
+
+ coerced_array = test_utils.option_coercion_evaluator(
+ partial(string_to_datetime, state=state), exceptions
+ )(array)
+
+ if coerced_array is None:
+ return False
+ elif np.isnat(coerced_array).any():
+ return False
+
+ return True
+
+
+@DateTime.register_transformer(String, np.ndarray)
+def string_to_datetime(array: np.ndarray, state: dict) -> np.ndarray:
+ # return array.astype(np.datetime64)
+ return pandas_infer_datetime(pd.Series(array), state).to_numpy()
+
+
+@DateTime.contains_op.register
+@array_handle_nulls
+@array_not_empty
+def datetime_contains(array: np.ndarray, state: dict) -> bool:
+ if np.issubdtype(array.dtype, np.datetime64):
+ return True
+
+ return all(isinstance(v, datetime) for v in array)
diff --git a/build/lib/visions/backends/numpy/types/float.py b/build/lib/visions/backends/numpy/types/float.py
new file mode 100644
index 000000000..a6eaabaf3
--- /dev/null
+++ b/build/lib/visions/backends/numpy/types/float.py
@@ -0,0 +1,48 @@
+import numpy as np
+
+from visions.backends.numpy import test_utils
+from visions.backends.numpy.array_utils import array_handle_nulls, array_not_empty
+from visions.types.complex import Complex
+from visions.types.float import Float
+from visions.types.string import String
+from visions.utils.warning_handling import suppress_warnings
+
+
+def test_string_leading_zeros(array: np.ndarray, coerced_array: np.ndarray):
+ return not any(s[0] == "0" for s in array[coerced_array > 1])
+
+
+@Float.register_relationship(String, np.ndarray)
+@array_handle_nulls
+def string_is_float(array: np.ndarray, state: dict) -> bool:
+ coerced_array = test_utils.option_coercion_evaluator(lambda s: s.astype(np.float_))(
+ array
+ )
+
+ return (
+ coerced_array is not None
+ and float_contains(coerced_array, state)
+ and test_string_leading_zeros(array, coerced_array)
+ )
+
+
+@Float.register_transformer(String, np.ndarray)
+def string_to_float(array: np.array, state: dict) -> np.ndarray:
+ return array.astype(np.float_)
+
+
+@Float.register_relationship(Complex, np.ndarray)
+def complex_is_float(array: np.array, state: dict) -> bool:
+ return all(np.imag(array) == 0)
+
+
+@Float.register_transformer(Complex, np.ndarray)
+def complex_to_float(array: np.array, state: dict) -> np.ndarray:
+ return suppress_warnings(lambda s: s.astype(np.float_))(array)
+
+
+@Float.contains_op.register
+@array_handle_nulls
+@array_not_empty
+def float_contains(array: np.ndarray, state: dict) -> bool:
+ return np.issubdtype(array.dtype, np.floating)
diff --git a/build/lib/visions/backends/numpy/types/integer.py b/build/lib/visions/backends/numpy/types/integer.py
new file mode 100644
index 000000000..c7634be3b
--- /dev/null
+++ b/build/lib/visions/backends/numpy/types/integer.py
@@ -0,0 +1,31 @@
+import numpy as np
+
+from visions.backends.numpy.array_utils import array_handle_nulls
+from visions.types.float import Float
+from visions.types.integer import Integer
+
+
+@Integer.register_relationship(Float, np.ndarray)
+def float_is_integer(series: np.ndarray, state: dict) -> bool:
+ return np.all(np.mod(series[~np.isnan(series)], 1) == 0)
+
+
+# TODO: The array_handle_nulls is actually removing nulls from the result. This is _far_ from ideal but there is no
+# other native way to represent nullable integers in numpy
+@Integer.register_transformer(Float, np.ndarray)
+@array_handle_nulls
+def float_to_integer(series: np.ndarray, state: dict) -> np.ndarray:
+ return series.astype(int)
+
+
+@Integer.contains_op.register
+@array_handle_nulls
+def integer_contains(sequence: np.ndarray, state: dict) -> bool:
+ if sequence.shape[0] == 0 or np.issubdtype(sequence.dtype, np.timedelta64):
+ return False
+ elif np.issubdtype(sequence.dtype, np.integer):
+ return True
+ elif np.issubdtype(sequence.dtype, np.object_):
+ return all(isinstance(v, int) and not isinstance(v, bool) for v in sequence)
+
+ return False
diff --git a/build/lib/visions/backends/numpy/types/object.py b/build/lib/visions/backends/numpy/types/object.py
new file mode 100644
index 000000000..c1b07b1f8
--- /dev/null
+++ b/build/lib/visions/backends/numpy/types/object.py
@@ -0,0 +1,32 @@
+from datetime import datetime
+
+import numpy as np
+
+from visions.backends.numpy.array_utils import (
+ all_type,
+ array_handle_nulls,
+ array_not_empty,
+)
+from visions.types.object import Object
+
+
+def not_excluded_type(array: np.ndarray, excludes) -> bool:
+
+ if len(array) == 0 or not isinstance(array[0], excludes):
+ return True
+
+ dtype = type(array[0])
+ return not all_type(array, dtype)
+
+
+@Object.contains_op.register
+@array_handle_nulls
+@array_not_empty
+def object_contains(array: np.ndarray, state: dict) -> bool:
+ if np.issubdtype(array.dtype, np.str_):
+ return True
+
+ if not np.issubdtype(array.dtype, np.object_):
+ return False
+
+ return not_excluded_type(array, (bool, int, datetime))
diff --git a/build/lib/visions/backends/numpy/types/string.py b/build/lib/visions/backends/numpy/types/string.py
new file mode 100644
index 000000000..ee956599e
--- /dev/null
+++ b/build/lib/visions/backends/numpy/types/string.py
@@ -0,0 +1,23 @@
+import numpy as np
+
+from visions.backends.numpy.array_utils import array_handle_nulls, array_not_empty
+from visions.types.string import String
+
+
+@array_handle_nulls
+def _is_string(array: np.ndarray, state: dict):
+ if not all(isinstance(v, str) for v in array[0:5]):
+ return False
+ try:
+ return (array.astype(str) == array).all()
+ except (TypeError, ValueError):
+ return False
+
+
+@String.contains_op.register
+@array_not_empty
+def string_contains(array: np.ndarray, state: dict) -> bool:
+ if np.issubdtype(array.dtype, np.str_):
+ return True
+
+ return _is_string(array, state)
diff --git a/build/lib/visions/backends/numpy/types/time_delta.py b/build/lib/visions/backends/numpy/types/time_delta.py
new file mode 100644
index 000000000..e5e941491
--- /dev/null
+++ b/build/lib/visions/backends/numpy/types/time_delta.py
@@ -0,0 +1,16 @@
+import numpy as np
+
+from visions.backends.numpy.array_utils import array_not_empty
+from visions.types.time_delta import TimeDelta
+
+
+@TimeDelta.contains_op.register
+@array_not_empty
+def time_delta_contains(array: np.ndarray, state: dict) -> bool:
+ """
+ Example:
+ >>> x = pd.array([pd.Timedelta(days=i) for i in range(3)])
+ >>> x in visions.Timedelta
+ True
+ """
+ return np.issubdtype(array.dtype, np.timedelta64)
diff --git a/build/lib/visions/backends/pandas/__init__.py b/build/lib/visions/backends/pandas/__init__.py
new file mode 100644
index 000000000..9911dfad7
--- /dev/null
+++ b/build/lib/visions/backends/pandas/__init__.py
@@ -0,0 +1,2 @@
+import visions.backends.pandas.traversal
+import visions.backends.pandas.types
diff --git a/build/lib/visions/backends/pandas/sequences.py b/build/lib/visions/backends/pandas/sequences.py
new file mode 100644
index 000000000..e80312eb3
--- /dev/null
+++ b/build/lib/visions/backends/pandas/sequences.py
@@ -0,0 +1,127 @@
+import datetime
+from typing import Dict, Iterable
+
+import numpy as np
+import pandas as pd
+
+from visions.backends.pandas.test_utils import pandas_version
+from visions.backends.pandas.types.boolean import hasnan_bool_name
+
+
+def get_sequences() -> Dict[str, Iterable]:
+ sequences = {
+ "float_series6": pd.Series([np.nan, 1.1], dtype=np.single),
+ "bool_series2": pd.Series([True, False, False, True], dtype=bool),
+ "nullable_bool_series": pd.Series([True, False, None], dtype=hasnan_bool_name),
+ "int_str_range": pd.Series(range(20)).astype("str"),
+ "Int64_int_series": pd.Series([1, 2, 3], dtype="Int64"),
+ "Int64_int_nan_series": pd.Series([1, 2, 3, np.nan], dtype="Int64"),
+ "pd_uint32": pd.Series([1, 2, 3, 4], dtype="UInt32"),
+ "categorical_int_series": pd.Series([1, 2, 3], dtype="category"),
+ "categorical_char": pd.Series(
+ pd.Categorical(
+ ["A", "B", "C", "C", "B", "A"],
+ categories=["A", "B", "C"],
+ ordered=False,
+ ),
+ ),
+ "categorical_float_series": pd.Series([1.0, 2.0, 3.1], dtype="category"),
+ "categorical_string_series": pd.Series(["Georgia", "Sam"], dtype="category"),
+ "categorical_complex_series": pd.Series(
+ [complex(0, 0), complex(1, 2), complex(3, -1)],
+ dtype="category",
+ ),
+ "ordinal": pd.Series(
+ pd.Categorical(
+ ["A", "B", "C", "C", "B", "A"],
+ categories=["A", "B", "C"],
+ ordered=True,
+ ),
+ ),
+ "timestamp_series": pd.to_datetime(
+ pd.Series(
+ [
+ datetime.datetime(2017, 3, 5, 12, 2),
+ datetime.datetime(2019, 12, 4),
+ ],
+ )
+ ),
+ "timestamp_series_nat": pd.to_datetime(
+ pd.Series(
+ [
+ datetime.datetime(2017, 3, 5),
+ datetime.datetime(2019, 12, 4, 3, 2, 0),
+ pd.NaT,
+ ],
+ )
+ ),
+ "date_series_nat": pd.to_datetime(
+ pd.Series(
+ [
+ datetime.datetime(2017, 3, 5),
+ datetime.datetime(2019, 12, 4),
+ pd.NaT,
+ ],
+ )
+ ),
+ "timestamp_aware_series": pd.Series(
+ pd.date_range(
+ start="2013-05-18 12:00:01",
+ periods=2,
+ freq="h",
+ tz="Europe/Brussels",
+ )
+ ),
+ "datetime": pd.to_datetime(
+ pd.Series(
+ [
+ datetime.date(2011, 1, 1),
+ datetime.date(2012, 1, 2),
+ datetime.date(2013, 1, 1),
+ ],
+ )
+ ),
+ # http://pandas-docs.github.io/pandas-docs-travis/user_guide/timeseries.html#timestamp-limitations
+ # pd.to_datetime(
+ # pd.Series(
+ # [
+ # datetime.datetime(year=1, month=1, day=1, hour=8, minute=43, second=12),
+ # datetime.datetime(year=1, month=1, day=1, hour=9, minute=43, second=12),
+ # datetime.datetime(
+ # year=1, month=1, day=1, hour=10, minute=43, second=12
+ # ),
+ # ],
+ # name="datetime_to_time",
+ # )
+ # ),
+ "timedelta_series": pd.Series([pd.Timedelta(days=i) for i in range(3)]),
+ "timedelta_series_nat": pd.Series(
+ [pd.Timedelta(days=i) for i in range(3)] + [pd.NaT],
+ ),
+ "timedelta_negative": pd.Series(
+ [
+ pd.Timedelta("1 days 00:03:43"),
+ pd.Timedelta("5 days 12:33:57"),
+ pd.Timedelta("0 days 01:25:07"),
+ pd.Timedelta("-2 days 13:46:56"),
+ pd.Timedelta("1 days 23:49:25"),
+ ],
+ ),
+ "empty_float": pd.Series([], dtype=float),
+ "empty_int64": pd.Series([], dtype="Int64"),
+ "empty_object": pd.Series([], dtype="object"),
+ "empty_bool": pd.Series([], dtype=bool),
+ "float_series4": pd.Series([1, 2, 3.05, 4], dtype=np.float64),
+ # Null Sequences
+ "all_null_none": pd.Series([None, None]),
+ "all_null_nan": pd.Series([np.nan, np.nan]),
+ "all_null_nat": pd.Series([pd.NaT, pd.NaT]),
+ "all_null_empty_str": pd.Series(["", ""]),
+ }
+
+ if pandas_version[0] >= 1:
+ sequences["string_dtype_series"] = pd.Series(
+ ["Patty", "Valentine"], dtype="string"
+ )
+
+ return sequences
diff --git a/build/lib/visions/backends/pandas/series_utils.py b/build/lib/visions/backends/pandas/series_utils.py
new file mode 100644
index 000000000..b4533c28c
--- /dev/null
+++ b/build/lib/visions/backends/pandas/series_utils.py
@@ -0,0 +1,93 @@
+import functools
+from typing import Callable
+
+import pandas as pd
+
+
+# For future reference: get the dtype from the subtype when the series is sparse
+def series_handle_sparse_dtype(fn: Callable[..., bool]) -> Callable[..., bool]:
+ """Decorator to include the dtype of a sparse subtype."""
+
+ @functools.wraps(fn)
+ def inner(series: pd.Series, state: dict, *args, **kwargs) -> bool:
+ if isinstance(series.dtype, pd.SparseDtype):
+ dtype = series.dtype.subtype
+ else:
+ dtype = series.dtype
+ state["dtype"] = dtype
+
+ return fn(series, state, *args, **kwargs)
+
+ return inner
+
+
+def series_handle_nulls(fn: Callable[..., bool]) -> Callable[..., bool]:
+ """Decorator for nullable series"""
+
+ @functools.wraps(fn)
+ def inner(series: pd.Series, *args, **kwargs) -> bool:
+ if series.hasnans:
+ series = series.dropna()
+ # TODO: use series_not_empty?
+ if series.empty:
+ return False
+
+ return fn(series, *args, **kwargs)
+
+ return inner
+
+
+def series_not_sparse(fn: Callable[..., bool]) -> Callable[..., bool]:
+ """Decorator to exclude sparse series"""
+
+ @functools.wraps(fn)
+ def inner(series: pd.Series, *args, **kwargs) -> bool:
+ if isinstance(series, pd.SparseDtype):
+ return False
+ return fn(series, *args, **kwargs)
+
+ return inner
+
+
+def series_not_empty(fn: Callable[..., bool]) -> Callable[..., bool]:
+ """Decorator to exclude empty series"""
+
+ @functools.wraps(fn)
+ def inner(series: pd.Series, *args, **kwargs) -> bool:
+ if series.empty:
+ return False
+ return fn(series, *args, **kwargs)
+
+ return inner
+
+
+# TODO: What is the type signature on is_method????
+def _contains_instance_attrs(
+ series: pd.Series, is_method, class_name: str, attrs: list, sample_size: int = 1
+) -> bool:
+ # TODO: user configurable .head or .sample
+ # TODO: performance testing for series[0], series.iloc[0], series.head, series.sample
+ if not all(is_method(x, class_name) for x in series.head(sample_size)):
+ return False
+
+ try:
+ return all(all(hasattr(x, attr) for attr in attrs) for x in series)
+ except AttributeError:
+ return False
+
+
+# TODO: What is the type signature on class_name????
+def class_name_attrs(
+ series: pd.Series, class_name, attrs: list, sample_size: int = 1
+) -> bool:
+ def func(instance, class_name):
+ return instance.__class__.__name__ == class_name.__name__
+
+ return _contains_instance_attrs(series, func, class_name, attrs, sample_size)
+
+
+# TODO: What is the type signature on class_name????
+def isinstance_attrs(
+ series: pd.Series, class_name, attrs: list, sample_size: int = 1
+) -> bool:
+ return _contains_instance_attrs(series, isinstance, class_name, attrs, sample_size)
diff --git a/build/lib/visions/backends/pandas/test_utils.py b/build/lib/visions/backends/pandas/test_utils.py
new file mode 100644
index 000000000..bd3076dcd
--- /dev/null
+++ b/build/lib/visions/backends/pandas/test_utils.py
@@ -0,0 +1,173 @@
+"""
+A selection of testing utilities for visions.
+"""
+
+import functools
+from typing import Callable, Dict, List, Optional, Type, Union
+
+import pandas as pd
+
+from visions.backends.pandas.series_utils import series_handle_nulls
+
+pandas_version = tuple(int(i) for i in pd.__version__.split("."))
+pandas_na_value = pd.NA if hasattr(pd, "NA") else None
+
+
+def option_coercion_evaluator(
+ fn: Callable[[pd.Series], pd.Series],
+ extra_errors: Optional[List[Type[Exception]]] = None,
+) -> Callable[[pd.Series], Optional[pd.Series]]:
+ """A coercion test evaluator
+ Evaluates a coercion function and optionally returns the coerced series.
+ Args:
+ fn: A function coercing a Series to another Series.
+ extra_errors: Additional exceptions to catch
+ Returns:
+ The coerced series if the coercion succeeds otherwise None.
+ """
+
+ error_list = [ValueError, TypeError, AttributeError]
+ if extra_errors:
+ error_list.extend(extra_errors)
+
+ @functools.wraps(fn)
+ def f(series: pd.Series) -> Optional[pd.Series]:
+ try:
+ return fn(series)
+ except tuple(error_list):
+ return None
+
+ return f
+
+
+def coercion_test(
+ fn: Callable[[pd.Series], pd.Series],
+ extra_errors: Optional[List[Type[Exception]]] = None,
+) -> Callable[[pd.Series], bool]:
+ """A coercion test generator
+ Creates a coercion test based on a provided coercion function.
+ Args:
+ fn: A function coercing a Series to another type.
+ extra_errors: Additional exceptions to catch
+ Returns:
+ Whether the coercion failed or was successful.
+ """
+ # Returns True or False if the coercion succeeds
+ tester = option_coercion_evaluator(fn, extra_errors)
+
+ @functools.wraps(fn)
+ def f(series: pd.Series) -> bool:
+ result = tester(series)
+ return True if result is not None else False
+
+ return f
+
+
+def coercion_true_test(
+ fn: Callable[[pd.Series], pd.Series],
+ extra_errors: Optional[List[Type[Exception]]] = None,
+) -> Callable[[pd.Series], bool]:
+ """A coercion equality test generator
+ Creates a coercion test based on a provided coercion function which also enforces
+ equality constraints on the output. This is useful when you want to change the
+ data type of a series without necessarily changing the data, for example,
+ when converting an integer to a float.
+ Args:
+ fn: A function coercing a Series to another type.
+ extra_errors: Additional exceptions to catch
+ Returns:
+ Whether the coercion failed or was successful.
+ """
+ tester = option_coercion_evaluator(fn, extra_errors)
+
+ @functools.wraps(tester)
+ def f(series: pd.Series) -> bool:
+ result = tester(series)
+ return False if result is None else series.all()
+
+ return f
+
+
+def coercion_equality_test(
+ fn: Callable[[pd.Series], pd.Series]
+) -> Callable[[pd.Series], bool]:
+ """A coercion equality test generator
+ Creates a coercion test based on a provided coercion function which also enforces
+ equality constraints on the output. This is useful when you want to change the
+ data type of a series without necessarily changing the data, for example,
+ when converting an integer to a float.
+ Args:
+ fn: A function coercing a Series to another type.
+ Returns:
+ Whether the coercion failed or was successful.
+ """
+ tester = option_coercion_evaluator(fn)
+
+ @functools.wraps(tester)
+ def f(series: pd.Series) -> bool:
+ result = tester(series)
+ return False if result is None else series.eq(result).all()
+
+ return f
+
+
+def coercion_single_map_test(mapping: List[Dict]) -> Callable[[pd.Series, Dict], bool]:
+ @series_handle_nulls
+ def f(series: pd.Series, state: dict = {}) -> bool:
+ return any(series.isin(list(single_map.keys())).all() for single_map in mapping)
+
+ return f
+
+
+def coercion_multi_map_test(mapping: Dict) -> Callable[[pd.Series, Dict], bool]:
+ @series_handle_nulls
+ def f(series: pd.Series, state: dict = {}) -> bool:
+ return series.isin(list(mapping.keys())).all()
+
+ return f
+
+
+def coercion_map_test(
+ mapping: Union[List[Dict], Dict]
+) -> Callable[[pd.Series, Dict], bool]:
+ """Create a testing function for a single mapping or a list of mappings.
+ Args:
+ mapping: A dict with a mapping or a list of dicts
+ Returns:
+ Callable that checks if a series consists of the mappable values
+ Examples:
+ >>> coercion_map_test({"Yes": True, "No": False})
+ >>> coercion_map_test(
+ >>> [
+ >>> {"Yes": True, "No": False},
+ >>> {"Y": True, "N": False},
+ >>> ]
+ >>> )
+ """
+
+ if isinstance(mapping, list):
+ f = coercion_single_map_test(mapping)
+ elif isinstance(mapping, dict):
+ f = coercion_multi_map_test(mapping)
+ else:
+ raise ValueError("Mapping should be dict or list of dicts")
+ return f
+
+
+def coercion_map(mapping: Union[List[Dict], Dict]) -> Callable[[pd.Series], pd.Series]:
+ """Maps a series given a mapping
+ Args:
+ mapping: a dict to map, or a list of dicts.
+ Returns:
+ A callable that maps the series.
+ """
+ if isinstance(mapping, list):
+ mapping = {k: v for d in mapping for k, v in d.items()}
+
+ elif not isinstance(mapping, dict):
+ raise ValueError("Mapping should be dict or list of dicts")
+
+ def f(series: pd.Series) -> pd.Series:
+ return series.map(mapping)
+
+ return f
diff --git a/build/lib/visions/backends/pandas/traversal.py b/build/lib/visions/backends/pandas/traversal.py
new file mode 100644
index 000000000..07f6491e6
--- /dev/null
+++ b/build/lib/visions/backends/pandas/traversal.py
@@ -0,0 +1,37 @@
+from typing import Dict, List, Tuple, Type
+
+import networkx as nx
+import pandas as pd
+
+from visions.types.type import VisionsBaseType
+from visions.typesets.typeset import traverse_graph, traverse_graph_with_series
+
+T = Type[VisionsBaseType]
+
+
+@traverse_graph.register(pd.Series)
+def _traverse_graph_series(
+ series: pd.Series, root_node: T, graph: nx.DiGraph
+) -> Tuple[pd.Series, List[T], dict]:
+ return traverse_graph_with_series(root_node, series, graph)
+
+
+@traverse_graph.register(pd.DataFrame)
+def _traverse_graph_dataframe(
+ df: pd.DataFrame, root_node: T, graph: nx.DiGraph
+) -> Tuple[pd.DataFrame, Dict[str, List[T]], Dict[str, dict]]:
+ inferred_values = {
+ col: traverse_graph(df[col], root_node, graph) for col in df.columns
+ }
+
+ inferred_series = {}
+ inferred_paths: Dict[str, List[T]] = {}
+ inferred_states: Dict[str, dict] = {}
+ for col, (inf_series, inf_path, inf_state) in inferred_values.items():
+ assert isinstance(inf_path, list) # Placate the MyPy Gods
+
+ inferred_series[col] = inf_series
+ inferred_paths[col] = inf_path
+ inferred_states[col] = inf_state
+
+ return pd.DataFrame(inferred_series), inferred_paths, inferred_states
diff --git a/build/lib/visions/backends/pandas/types/__init__.py b/build/lib/visions/backends/pandas/types/__init__.py
new file mode 100644
index 000000000..7a83235d7
--- /dev/null
+++ b/build/lib/visions/backends/pandas/types/__init__.py
@@ -0,0 +1,23 @@
+import visions.backends.pandas.types.boolean
+import visions.backends.pandas.types.categorical
+import visions.backends.pandas.types.complex
+import visions.backends.pandas.types.count
+import visions.backends.pandas.types.date
+import visions.backends.pandas.types.date_time
+import visions.backends.pandas.types.email_address
+import visions.backends.pandas.types.file
+import visions.backends.pandas.types.float
+import visions.backends.pandas.types.geometry
+import visions.backends.pandas.types.image
+import visions.backends.pandas.types.integer
+import visions.backends.pandas.types.ip_address
+import visions.backends.pandas.types.numeric
+import visions.backends.pandas.types.object
+import visions.backends.pandas.types.ordinal
+import visions.backends.pandas.types.path
+import visions.backends.pandas.types.sparse
+import visions.backends.pandas.types.string
+import visions.backends.pandas.types.time
+import visions.backends.pandas.types.time_delta
+import visions.backends.pandas.types.url
+import visions.backends.pandas.types.uuid
diff --git a/build/lib/visions/backends/pandas/types/boolean.py b/build/lib/visions/backends/pandas/types/boolean.py
new file mode 100644
index 000000000..083bade99
--- /dev/null
+++ b/build/lib/visions/backends/pandas/types/boolean.py
@@ -0,0 +1,63 @@
+from typing import Dict, List
+
+import pandas as pd
+import pandas.api.types as pdt
+
+from visions.backends.pandas.series_utils import (
+ series_handle_nulls,
+ series_not_empty,
+ series_not_sparse,
+)
+from visions.backends.pandas.test_utils import (
+ coercion_map,
+ coercion_map_test,
+ pandas_version,
+)
+from visions.backends.python.types.boolean import get_boolean_coercions
+from visions.types.boolean import Boolean
+from visions.types.object import Object
+from visions.types.string import String
+
+hasnan_bool_name = "boolean" if pandas_version[0] >= 1 else "Bool"
+
+
+string_coercions = get_boolean_coercions("en")
+
+
+@Boolean.register_relationship(Object, pd.Series)
+@series_handle_nulls
+def object_is_boolean(series: pd.Series, state: dict) -> bool:
+ bool_set = {True, False}
+ try:
+ ret = all(item in bool_set for item in series.values)
+ except (ValueError, TypeError, AttributeError):
+ ret = False
+
+ return ret
+
+
+@Boolean.register_transformer(Object, pd.Series)
+def object_to_boolean(series: pd.Series, state: dict) -> pd.Series:
+ dtype = hasnan_bool_name if series.hasnans else bool
+ return series.astype(dtype)
+
+
+@Boolean.register_relationship(String, pd.Series)
+def string_is_boolean(series: pd.Series, state: dict) -> bool:
+ try:
+ return coercion_map_test(string_coercions)(series.str.lower(), state)
+ except (ValueError, TypeError, AttributeError):
+ return False
+
+
+@Boolean.register_transformer(String, pd.Series)
+def string_to_boolean(series: pd.Series, state: dict) -> pd.Series:
+ return object_to_boolean(coercion_map(string_coercions)(series.str.lower()), state)
+
+
+@Boolean.contains_op.register
+@series_not_sparse
+@series_handle_nulls
+@series_not_empty
+def boolean_contains(series: pd.Series, state: dict) -> bool:
+ return pdt.is_bool_dtype(series) and not pdt.is_categorical_dtype(series)
diff --git a/build/lib/visions/backends/pandas/types/categorical.py b/build/lib/visions/backends/pandas/types/categorical.py
new file mode 100644
index 000000000..9a72d878f
--- /dev/null
+++ b/build/lib/visions/backends/pandas/types/categorical.py
@@ -0,0 +1,12 @@
+import pandas as pd
+from pandas.api import types as pdt
+
+from visions.backends.pandas.series_utils import series_not_empty, series_not_sparse
+from visions.types.categorical import Categorical
+
+
+@Categorical.contains_op.register
+@series_not_sparse
+@series_not_empty
+def categorical_contains(series: pd.Series, state: dict) -> bool:
+ return pdt.is_categorical_dtype(series)
diff --git a/build/lib/visions/backends/pandas/types/complex.py b/build/lib/visions/backends/pandas/types/complex.py
new file mode 100644
index 000000000..4405787ea
--- /dev/null
+++ b/build/lib/visions/backends/pandas/types/complex.py
@@ -0,0 +1,54 @@
+import math
+from typing import Union
+
+import numpy as np
+import pandas as pd
+from pandas.api import types as pdt
+
+from visions.backends.pandas import test_utils
+from visions.backends.pandas.series_utils import series_not_empty, series_not_sparse
+from visions.backends.shared.parallelization_engines import pandas_apply
+from visions.types.complex import Complex
+from visions.types.string import String
+
+
+def imaginary_in_string(
+ series: pd.Series, imaginary_indicator: tuple = ("j", "i")
+) -> bool:
+ return any(any(v in s for v in imaginary_indicator) for s in series)
+
+
+def convert_val_to_complex(val: str) -> Union[complex, float]:
+ result = complex(val)
+ return (
+ np.nan if any(math.isnan(val) for val in (result.real, result.imag)) else result
+ )
+
+
+def convert_to_complex_series(series: pd.Series) -> pd.Series:
+ return pandas_apply(series, convert_val_to_complex)
+
+
+@Complex.register_relationship(String, pd.Series)
+def string_is_complex(series: pd.Series, state: dict) -> bool:
+ coerced_series = test_utils.option_coercion_evaluator(convert_to_complex_series)(
+ series
+ )
+
+ return (
+ coerced_series is not None
+ and not all(v.imag == 0 for v in coerced_series.dropna())
+ and imaginary_in_string(series)
+ )
+
+
+@Complex.register_transformer(String, pd.Series)
+def string_to_complex(series: pd.Series, state: dict) -> pd.Series:
+ return convert_to_complex_series(series)
+
+
+@Complex.contains_op.register
+@series_not_sparse
+@series_not_empty
+def complex_contains(series: pd.Series, state: dict) -> bool:
+ return pdt.is_complex_dtype(series)
diff --git a/build/lib/visions/backends/pandas/types/count.py b/build/lib/visions/backends/pandas/types/count.py
new file mode 100644
index 000000000..fe0c4de80
--- /dev/null
+++ b/build/lib/visions/backends/pandas/types/count.py
@@ -0,0 +1,12 @@
+import pandas as pd
+from pandas.api import types as pdt
+
+from visions.backends.pandas.series_utils import series_not_empty, series_not_sparse
+from visions.types.count import Count
+
+
+@Count.contains_op.register
+@series_not_sparse
+@series_not_empty
+def count_contains(series: pd.Series, state: dict) -> bool:
+ return pdt.is_unsigned_integer_dtype(series)
diff --git a/build/lib/visions/backends/pandas/types/date.py b/build/lib/visions/backends/pandas/types/date.py
new file mode 100644
index 000000000..353db2d4e
--- /dev/null
+++ b/build/lib/visions/backends/pandas/types/date.py
@@ -0,0 +1,31 @@
+from datetime import date, time
+
+import pandas as pd
+
+from visions.backends.pandas.series_utils import (
+ class_name_attrs,
+ series_handle_nulls,
+ series_not_empty,
+)
+from visions.types.date import Date
+from visions.types.date_time import DateTime
+
+
+@Date.register_relationship(DateTime, pd.Series)
+@series_handle_nulls
+def datetime_is_date(series: pd.Series, state: dict) -> bool:
+ dtseries = series.dt.time
+ value = time(0, 0)
+ return all(v == value for v in dtseries)
+
+
+@Date.register_transformer(DateTime, pd.Series)
+def datetime_to_date(series: pd.Series, state: dict) -> pd.Series:
+ return series.dt.date
+
+
+@Date.contains_op.register
+@series_handle_nulls
+@series_not_empty
+def date_contains(series: pd.Series, state: dict) -> bool:
+ return class_name_attrs(series, date, ["year", "month", "day"])
diff --git a/build/lib/visions/backends/pandas/types/date_time.py b/build/lib/visions/backends/pandas/types/date_time.py
new file mode 100644
index 000000000..a57e48e0e
--- /dev/null
+++ b/build/lib/visions/backends/pandas/types/date_time.py
@@ -0,0 +1,49 @@
+import pandas as pd
+from pandas.api import types as pdt
+
+from visions.backends.pandas import test_utils
+from visions.backends.pandas.series_utils import (
+ series_handle_nulls,
+ series_not_empty,
+ series_not_sparse,
+)
+from visions.types import DateTime, String
+
+
+def pandas_infer_datetime(series: pd.Series, state: dict) -> pd.Series:
+ try:
+ return pd.to_datetime(series)
+ except Exception:
+ pass
+
+ return pd.to_datetime(series, format="mixed")
+
+
+@DateTime.register_relationship(String, pd.Series)
+@series_handle_nulls
+def string_is_datetime(series: pd.Series, state: dict) -> bool:
+ def string_to_datetime_func(series: pd.Series) -> pd.Series:
+ return pandas_infer_datetime(series, state)
+
+ exceptions = [OverflowError, TypeError]
+ coerced_series = test_utils.option_coercion_evaluator(
+ string_to_datetime_func, exceptions
+ )(series)
+
+ if coerced_series is None:
+ return False
+ else:
+ return not coerced_series.dropna().empty
+
+
+@DateTime.register_transformer(String, pd.Series)
+def string_to_datetime(series: pd.Series, state: dict) -> pd.Series:
+ return pandas_infer_datetime(series, state)
+
+
+@DateTime.contains_op.register
+@series_not_sparse
+@series_handle_nulls
+@series_not_empty
+def datetime_contains(series: pd.Series, state: dict) -> bool:
+ return pdt.is_datetime64_any_dtype(series)
diff --git a/build/lib/visions/backends/pandas/types/email_address.py b/build/lib/visions/backends/pandas/types/email_address.py
new file mode 100644
index 000000000..5947ab464
--- /dev/null
+++ b/build/lib/visions/backends/pandas/types/email_address.py
@@ -0,0 +1,31 @@
+import pandas as pd
+
+from visions.backends.pandas import test_utils
+from visions.backends.pandas.series_utils import (
+ isinstance_attrs,
+ series_handle_nulls,
+ series_not_empty,
+)
+from visions.backends.shared.parallelization_engines import pandas_apply
+from visions.types.email_address import FQDA, EmailAddress, _to_email
+from visions.types.string import String
+
+
+@EmailAddress.register_relationship(String, pd.Series)
+def string_is_email(series: pd.Series, state: dict) -> bool:
+ def test_email(s):
+ return pandas_apply(pandas_apply(s, _to_email), lambda x: x.local and x.fqdn)
+
+ return test_utils.coercion_true_test(test_email)(series)
+
+
+@EmailAddress.register_transformer(String, pd.Series)
+def string_to_email(series: pd.Series, state: dict) -> pd.Series:
+ return pandas_apply(series, _to_email)
+
+
+@EmailAddress.contains_op.register
+@series_not_empty
+@series_handle_nulls
+def email_address_contains(series: pd.Series, state: dict) -> bool:
+ return isinstance_attrs(series, FQDA, ["local", "fqdn"])
diff --git a/build/lib/visions/backends/pandas/types/file.py b/build/lib/visions/backends/pandas/types/file.py
new file mode 100644
index 000000000..b6d7cb97c
--- /dev/null
+++ b/build/lib/visions/backends/pandas/types/file.py
@@ -0,0 +1,13 @@
+import pathlib
+
+import pandas as pd
+
+from visions.backends.pandas.series_utils import series_handle_nulls, series_not_empty
+from visions.types.file import File
+
+
+@File.contains_op.register
+@series_not_empty
+@series_handle_nulls
+def file_contains(series: pd.Series, state: dict) -> bool:
+ return all(isinstance(p, pathlib.Path) and p.exists() for p in series)
diff --git a/build/lib/visions/backends/pandas/types/float.py b/build/lib/visions/backends/pandas/types/float.py
new file mode 100644
index 000000000..34a3290dc
--- /dev/null
+++ b/build/lib/visions/backends/pandas/types/float.py
@@ -0,0 +1,66 @@
+import numpy as np
+import pandas as pd
+from pandas.api import types as pdt
+
+from visions.backends.pandas import test_utils
+from visions.backends.pandas.series_utils import (
+ series_handle_nulls,
+ series_not_empty,
+ series_not_sparse,
+)
+from visions.types.complex import Complex
+from visions.types.float import Float
+from visions.types.string import String
+from visions.utils.warning_handling import suppress_warnings
+
+
+def test_string_leading_zeros(series: pd.Series, coerced_series: pd.Series):
+ if coerced_series.hasnans:
+ notna = coerced_series.notna()
+ coerced_series = coerced_series[notna]
+
+ if coerced_series.empty:
+ return False
+ series = series[notna]
+ return not any(s[0] == "0" for s in series[coerced_series > 1])
+
+
+@Float.register_relationship(String, pd.Series)
+@series_handle_nulls
+def string_is_float(series: pd.Series, state: dict) -> bool:
+ coerced_series = test_utils.option_coercion_evaluator(lambda s: s.astype(float))(
+ series
+ )
+
+ return (
+ coerced_series is not None
+ and float_contains(coerced_series, state)
+ and test_string_leading_zeros(series, coerced_series)
+ )
+
+
+@Float.register_transformer(String, pd.Series)
+def string_to_float(series: pd.Series, state: dict) -> pd.Series:
+ # Slightly faster to check for the character if it's not present than to
+ # attempt the replacement
+ # if any("," in x for x in series):
+ # series = series.str.replace(",", "")
+ return series.astype(float)
+
+
+@Float.register_relationship(Complex, pd.Series)
+def complex_is_float(series: pd.Series, state: dict) -> bool:
+ return all(np.imag(series.values) == 0)
+
+
+@Float.register_transformer(Complex, pd.Series)
+def complex_to_float(series: pd.Series, state: dict) -> pd.Series:
+ return suppress_warnings(lambda s: s.astype(float))(series)
+
+
+@Float.contains_op.register
+@series_not_sparse
+@series_handle_nulls
+@series_not_empty
+def float_contains(series: pd.Series, state: dict) -> bool:
+ return pdt.is_float_dtype(series)
diff --git a/build/lib/visions/backends/pandas/types/geometry.py b/build/lib/visions/backends/pandas/types/geometry.py
new file mode 100644
index 000000000..5d6da3639
--- /dev/null
+++ b/build/lib/visions/backends/pandas/types/geometry.py
@@ -0,0 +1,43 @@
+import os
+import sys
+
+import pandas as pd
+
+from visions.backends.pandas.series_utils import series_handle_nulls, series_not_empty
+from visions.types.geometry import Geometry
+from visions.types.string import String
+
+
+# TODO: Evaluate https://jorisvandenbossche.github.io/blog/2019/08/13/geopandas-extension-array-refactor/
+@Geometry.register_relationship(String, pd.Series)
+def string_is_geometry(sequence: pd.Series, state: dict) -> bool:
+ """Shapely logs failures at a silly severity, just trying to suppress it's output on failures."""
+ from shapely import wkt
+ from shapely.errors import WKTReadingError
+
+ # only way to get rid of sys output when wkt.loads hits a bad value
+ # TODO: use coercion wrapper for this
+ sys.stderr = open(os.devnull, "w")
+ try:
+ result = all(wkt.loads(value) for value in sequence)
+ except (WKTReadingError, AttributeError, UnicodeEncodeError, TypeError):
+ result = False
+ finally:
+ sys.stderr = sys.__stderr__
+ return result
+
+
+@Geometry.register_transformer(String, pd.Series)
+def string_to_geometry(series: pd.Series, state: dict) -> pd.Series:
+ from shapely import wkt
+
+ return pd.Series([wkt.loads(value) for value in series])
+
+
+@Geometry.contains_op.register
+@series_not_empty
+@series_handle_nulls
+def geometry_contains(series: pd.Series, state: dict) -> bool:
+ from shapely.geometry.base import BaseGeometry
+
+ return all(issubclass(type(x), BaseGeometry) for x in series)
diff --git a/build/lib/visions/backends/pandas/types/image.py b/build/lib/visions/backends/pandas/types/image.py
new file mode 100644
index 000000000..5130a2876
--- /dev/null
+++ b/build/lib/visions/backends/pandas/types/image.py
@@ -0,0 +1,14 @@
+import imghdr
+from pathlib import Path
+
+import pandas as pd
+
+from visions.backends.pandas.series_utils import series_handle_nulls, series_not_empty
+from visions.types.image import Image
+
+
+@Image.contains_op.register
+@series_not_empty
+@series_handle_nulls
+def image_contains(series: pd.Series, state: dict) -> bool:
+ return all(isinstance(p, Path) and p.exists() and imghdr.what(p) for p in series)
diff --git a/build/lib/visions/backends/pandas/types/integer.py b/build/lib/visions/backends/pandas/types/integer.py
new file mode 100644
index 000000000..b9f84089f
--- /dev/null
+++ b/build/lib/visions/backends/pandas/types/integer.py
@@ -0,0 +1,38 @@
+import numpy as np
+import pandas as pd
+from pandas.api import types as pdt
+
+from visions.backends.pandas.series_utils import (
+ series_handle_nulls,
+ series_not_empty,
+ series_not_sparse,
+)
+from visions.types.float import Float
+from visions.types.integer import Integer
+
+
+@Integer.register_relationship(Float, pd.Series)
+@series_handle_nulls
+def float_is_integer(series: pd.Series, state: dict) -> bool:
+ def check_equality(series):
+ try:
+ if not np.isfinite(series).all():
+ return False
+ return series.eq(series.astype(int)).all()
+ except (ValueError, TypeError, AttributeError):
+ return False
+
+ return check_equality(series)
+
+
+@Integer.register_transformer(Float, pd.Series)
+def float_to_integer(series: pd.Series, state: dict) -> pd.Series:
+ dtype = "Int64" if series.hasnans else np.int64
+ return series.astype(dtype)
+
+
+@Integer.contains_op.register
+@series_not_sparse
+@series_not_empty
+def integer_contains(series: pd.Series, state: dict) -> bool:
+ return pdt.is_integer_dtype(series)
diff --git a/build/lib/visions/backends/pandas/types/ip_address.py b/build/lib/visions/backends/pandas/types/ip_address.py
new file mode 100644
index 000000000..41f1b293f
--- /dev/null
+++ b/build/lib/visions/backends/pandas/types/ip_address.py
@@ -0,0 +1,26 @@
+from ipaddress import _BaseAddress, ip_address
+
+import pandas as pd
+
+from visions.backends.pandas import test_utils
+from visions.backends.pandas.series_utils import series_handle_nulls, series_not_empty
+from visions.backends.shared.parallelization_engines import pandas_apply
+from visions.types.ip_address import IPAddress
+from visions.types.string import String
+
+
+@IPAddress.register_relationship(String, pd.Series)
+def string_is_ip_address(series: pd.Series, state: dict) -> bool:
+ return test_utils.coercion_test(lambda s: pandas_apply(s, ip_address))(series)
+
+
+@IPAddress.register_transformer(String, pd.Series)
+def string_to_ip_address(series: pd.Series, state: dict) -> pd.Series:
+ return pandas_apply(series, ip_address)
+
+
+@IPAddress.contains_op.register
+@series_not_empty
+@series_handle_nulls
+def ip_address_contains(series: pd.Series, state: dict) -> bool:
+ return all(isinstance(x, _BaseAddress) for x in series)
diff --git a/build/lib/visions/backends/pandas/types/numeric.py b/build/lib/visions/backends/pandas/types/numeric.py
new file mode 100644
index 000000000..dff865a7b
--- /dev/null
+++ b/build/lib/visions/backends/pandas/types/numeric.py
@@ -0,0 +1,12 @@
+import pandas as pd
+from pandas.api import types as pdt
+
+from visions.backends.pandas.series_utils import series_not_empty, series_not_sparse
+from visions.types.numeric import Numeric
+
+
+@Numeric.contains_op.register
+@series_not_sparse
+@series_not_empty
+def numeric_contains_op(series: pd.Series, state: dict) -> bool:
+ return pdt.is_numeric_dtype(series)
diff --git a/build/lib/visions/backends/pandas/types/object.py b/build/lib/visions/backends/pandas/types/object.py
new file mode 100644
index 000000000..49352ff3d
--- /dev/null
+++ b/build/lib/visions/backends/pandas/types/object.py
@@ -0,0 +1,26 @@
+import pandas as pd
+from pandas.api import types as pdt
+
+from visions.backends.pandas.series_utils import (
+ series_handle_nulls,
+ series_not_empty,
+ series_not_sparse,
+)
+from visions.types.object import Object
+
+pandas_has_string_dtype_flag = hasattr(pdt, "is_string_dtype")
+
+
+@Object.contains_op.register
+@series_not_sparse
+@series_handle_nulls
+@series_not_empty
+def object_contains(series: pd.Series, state: dict) -> bool:
+ is_object = pdt.is_object_dtype(series)
+ if is_object:
+ ret = True
+ elif pandas_has_string_dtype_flag:
+ ret = pdt.is_string_dtype(series) and not pdt.is_categorical_dtype(series)
+ else:
+ ret = False
+ return ret
diff --git a/build/lib/visions/backends/pandas/types/ordinal.py b/build/lib/visions/backends/pandas/types/ordinal.py
new file mode 100644
index 000000000..4f253fd33
--- /dev/null
+++ b/build/lib/visions/backends/pandas/types/ordinal.py
@@ -0,0 +1,17 @@
+import pandas as pd
+from pandas.api import types as pdt
+
+from visions.backends.pandas.series_utils import series_not_empty
+from visions.types.ordinal import Ordinal
+
+# @Ordinal.register_transformer(Categorical, pd.Series)
+# def categorical_to_ordinal(series: pd.Series) -> pd.Categorical:
+# return pd.Categorical(
+# series, categories=sorted(series.dropna().unique()), ordered=True
+# )
+
+
+@Ordinal.contains_op.register
+@series_not_empty
+def ordinal_contains(series: pd.Series, state: dict) -> bool:
+ return pdt.is_categorical_dtype(series) and series.cat.ordered
diff --git a/build/lib/visions/backends/pandas/types/path.py b/build/lib/visions/backends/pandas/types/path.py
new file mode 100644
index 000000000..c373d1854
--- /dev/null
+++ b/build/lib/visions/backends/pandas/types/path.py
@@ -0,0 +1,33 @@
+import pathlib
+
+import pandas as pd
+
+from visions.backends.pandas.series_utils import series_handle_nulls, series_not_empty
+from visions.backends.shared.parallelization_engines import pandas_apply
+from visions.types.path import Path
+from visions.types.string import String
+
+
+@Path.register_relationship(String, pd.Series)
+def string_is_path(series: pd.Series, state: dict) -> bool:
+ try:
+ s = string_to_path(series.copy(), state)
+ return pandas_apply(s, lambda x: x.is_absolute()).all()
+ except TypeError:
+ return False
+
+
+@Path.register_transformer(String, pd.Series)
+def string_to_path(series: pd.Series, state: dict) -> pd.Series:
+ s = pandas_apply(series, pathlib.PureWindowsPath)
+ if not pandas_apply(s, lambda x: x.is_absolute()).all():
+ return pandas_apply(series, pathlib.PurePosixPath)
+ else:
+ return s
+
+
+@Path.contains_op.register
+@series_not_empty
+@series_handle_nulls
+def path_contains(series: pd.Series, state: dict) -> bool:
+ return all(isinstance(x, pathlib.PurePath) and x.is_absolute() for x in series)
diff --git a/build/lib/visions/backends/pandas/types/sparse.py b/build/lib/visions/backends/pandas/types/sparse.py
new file mode 100644
index 000000000..331f5e179
--- /dev/null
+++ b/build/lib/visions/backends/pandas/types/sparse.py
@@ -0,0 +1,9 @@
+import pandas as pd
+from pandas.api import types as pdt
+
+from visions.types.sparse import Sparse
+
+
+@Sparse.contains_op.register
+def sparse_contains(series: pd.Series, state: dict) -> bool:
+ return pdt.is_sparse(series)
diff --git a/build/lib/visions/backends/pandas/types/string.py b/build/lib/visions/backends/pandas/types/string.py
new file mode 100644
index 000000000..fabc9c44e
--- /dev/null
+++ b/build/lib/visions/backends/pandas/types/string.py
@@ -0,0 +1,33 @@
+import pandas as pd
+from pandas.api import types as pdt
+
+from visions.backends.pandas.series_utils import (
+ series_handle_nulls,
+ series_not_empty,
+ series_not_sparse,
+)
+from visions.types.string import String
+
+pandas_has_string_dtype_flag = hasattr(pdt, "is_string_dtype")
+
+
+@series_handle_nulls
+def _is_string(series: pd.Series, state: dict):
+ if not all(isinstance(v, str) for v in series.values[0:5]):
+ return False
+ try:
+ return (series.astype(str).values == series.values).all()
+ except (TypeError, ValueError):
+ return False
+
+
+@String.contains_op.register
+@series_not_sparse
+@series_not_empty
+def string_contains(series: pd.Series, state: dict) -> bool:
+ if pdt.is_categorical_dtype(series):
+ return False
+ elif not pdt.is_object_dtype(series):
+ return pandas_has_string_dtype_flag and pdt.is_string_dtype(series)
+
+ return _is_string(series, state)
diff --git a/build/lib/visions/backends/pandas/types/time.py b/build/lib/visions/backends/pandas/types/time.py
new file mode 100644
index 000000000..8776212aa
--- /dev/null
+++ b/build/lib/visions/backends/pandas/types/time.py
@@ -0,0 +1,29 @@
+from datetime import time
+
+import pandas as pd
+
+from visions.backends.pandas.series_utils import (
+ class_name_attrs,
+ series_handle_nulls,
+ series_not_empty,
+)
+from visions.types.time import Time
+
+# @Time.register_relationship(DateTime, pd.Series)
+# @series_handle_nulls
+# def datetime_is_time(series: pd.Series) -> bool:
+# dtseries = series.dt.date
+# value = date(1, 1, 1)
+# return all(v == value for v in dtseries)
+#
+#
+# @Time.register_transformer(DateTime, pd.Series)
+# def datetime_to_time(series: pd.Series, state: dict) -> pd.Series:
+# return series.dt.time
+
+
+@Time.contains_op.register
+@series_handle_nulls
+@series_not_empty
+def time_contains(series: pd.Series, state: dict) -> bool:
+ return class_name_attrs(series, time, ["microsecond", "hour"])
diff --git a/build/lib/visions/backends/pandas/types/time_delta.py b/build/lib/visions/backends/pandas/types/time_delta.py
new file mode 100644
index 000000000..cc6d0cc7a
--- /dev/null
+++ b/build/lib/visions/backends/pandas/types/time_delta.py
@@ -0,0 +1,18 @@
+import pandas as pd
+from pandas.api import types as pdt
+
+from visions.backends.pandas.series_utils import series_not_empty, series_not_sparse
+from visions.types.time_delta import TimeDelta
+
+
+@TimeDelta.contains_op.register
+@series_not_sparse
+@series_not_empty
+def time_delta_contains(series: pd.Series, state: dict) -> bool:
+ """
+ Example:
+ >>> x = pd.Series([pd.Timedelta(days=i) for i in range(3)])
+ >>> x in visions.Timedelta
+ True
+ """
+ return pdt.is_timedelta64_dtype(series)
diff --git a/build/lib/visions/backends/pandas/types/url.py b/build/lib/visions/backends/pandas/types/url.py
new file mode 100644
index 000000000..9ecd5a901
--- /dev/null
+++ b/build/lib/visions/backends/pandas/types/url.py
@@ -0,0 +1,35 @@
+from urllib.parse import ParseResult, urlparse
+
+import pandas as pd
+
+from visions.backends.pandas.series_utils import (
+ isinstance_attrs,
+ series_handle_nulls,
+ series_not_empty,
+)
+from visions.backends.shared.parallelization_engines import pandas_apply
+from visions.types.string import String
+from visions.types.url import URL
+
+
+@URL.register_relationship(String, pd.Series)
+@series_handle_nulls
+def string_is_url(series: pd.Series, state: dict) -> bool:
+ try:
+ return pandas_apply(
+ string_to_url(series, state), lambda x: x.netloc and x.scheme
+ ).all()
+ except AttributeError:
+ return False
+
+
+@URL.register_transformer(String, pd.Series)
+def string_to_url(series: pd.Series, state: dict) -> pd.Series:
+ return pandas_apply(series, urlparse)
+
+
+@URL.contains_op.register
+@series_handle_nulls
+@series_not_empty
+def url_contains(series: pd.Series, state: dict) -> bool:
+ return isinstance_attrs(series, ParseResult, ["netloc", "scheme"])
diff --git a/build/lib/visions/backends/pandas/types/uuid.py b/build/lib/visions/backends/pandas/types/uuid.py
new file mode 100644
index 000000000..b6e0e29c0
--- /dev/null
+++ b/build/lib/visions/backends/pandas/types/uuid.py
@@ -0,0 +1,33 @@
+import uuid
+
+import pandas as pd
+
+from visions.backends.pandas.series_utils import (
+ isinstance_attrs,
+ series_handle_nulls,
+ series_not_empty,
+)
+from visions.backends.pandas.test_utils import coercion_true_test
+from visions.backends.shared.parallelization_engines import pandas_apply
+from visions.types.string import String
+from visions.types.uuid import UUID
+
+
+@UUID.register_relationship(String, pd.Series)
+def uuid_is_string(series: pd.Series, state: dict) -> bool:
+ def f(s):
+ return pandas_apply(s, uuid.UUID)
+
+ return coercion_true_test(f)(series)
+
+
+@UUID.register_transformer(String, pd.Series)
+def uuid_to_string(series: pd.Series, state: dict) -> pd.Series:
+ return pandas_apply(series, uuid.UUID)
+
+
+@UUID.contains_op.register
+@series_not_empty
+@series_handle_nulls
+def uuid_contains(series: pd.Series, state: dict) -> bool:
+ return isinstance_attrs(series, uuid.UUID, ["time_low", "hex"])
diff --git a/build/lib/visions/backends/python/__init__.py b/build/lib/visions/backends/python/__init__.py
new file mode 100644
index 000000000..daf7b9291
--- /dev/null
+++ b/build/lib/visions/backends/python/__init__.py
@@ -0,0 +1 @@
+import visions.backends.python.types
diff --git a/build/lib/visions/backends/python/sequences.py b/build/lib/visions/backends/python/sequences.py
new file mode 100644
index 000000000..71eab8f1a
--- /dev/null
+++ b/build/lib/visions/backends/python/sequences.py
@@ -0,0 +1,186 @@
+import datetime
+import os
+import uuid
+from ipaddress import IPv4Address, IPv6Address
+from pathlib import Path, PurePosixPath, PureWindowsPath
+from typing import Dict, Sequence, cast
+from urllib.parse import urlparse
+
+from visions.types.email_address import FQDA
+
+
+def get_sequences() -> Dict[str, Sequence]:
+ base_path = Path(__file__).parent.parent.parent.absolute()
+
+ sequences = {
+ "int_series": [1, 2, 3],
+ "int_range": range(10),
+ "int_series_boolean": [1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0],
+ "float_series": [1.0, 2.1, 3.0],
+ "float_series2": [1.0, 2.0, 3.0, 4.0],
+ "string_series": ["Patty", "Valentine"],
+ "string_unicode_series": ["mack", "the", "finger"],
+ "string_num": ["1.0", "2.0", "3.0"],
+ "string_flt": ["1.0", "45.67", "3.5"],
+ "string_bool_nan": ["True", "False", None],
+ "str_url": [
+ "http://www.cwi.nl:80/%7Eguido/Python.html",
+ "https://github.com/dylan-profiling/hurricane",
+ ],
+ "path_series_windows_str": [
+ r"C:\\home\\user\\file.txt",
+ r"C:\\home\\user\\test2.txt",
+ ],
+ "path_series_linux_str": [r"/home/user/file.txt", r"/home/user/test2.txt"],
+ "str_int_leading_zeros": ["0011", "12"],
+ "str_float_non_leading_zeros": ["0.0", "0.04", "0"],
+ "str_int_zeros": ["0.0", "0.000", "0", "2"],
+ "bool_series": [True, False],
+ "bool_nan_series": [True, False, None],
+ "str_complex": ["(1+1j)", "(2+2j)", "(10+100j)"],
+ "str_complex_nan": ["(1+1j)", "(2+2j)", "(10+100j)", "NaN"],
+ "complex_series_py": [complex(0, 0), complex(1, 2), complex(3, -1)],
+ "complex_series_py_float": [complex(0, 0), complex(1, 0), complex(3, 0)],
+ "string_date": ["1937-05-06", "20/4/2014"],
+ "timestamp_string_series": ["1941-05-24", "13/10/2016"],
+ "py_datetime_str": ["1941-05-24 00:05:00", "2016-10-13 00:10:00"],
+ "date": [
+ datetime.date(2011, 1, 1),
+ datetime.date(2012, 1, 2),
+ datetime.date(2013, 1, 1),
+ ],
+ "time": [
+ datetime.time(8, 43, 12),
+ datetime.time(9, 43, 12),
+ datetime.time(10, 43, 12),
+ ],
+ "path_series_linux": [
+ PurePosixPath("/home/user/file.txt"),
+ PurePosixPath("/home/user/test2.txt"),
+ ],
+ "path_series_linux_missing": [
+ PurePosixPath("/home/user/file.txt"),
+ PurePosixPath("/home/user/test2.txt"),
+ None,
+ ],
+ "path_series_windows": [
+ PureWindowsPath("C:\\home\\user\\file.txt"),
+ PureWindowsPath("C:\\home\\user\\test2.txt"),
+ ],
+ "url_series": [
+ urlparse("http://www.cwi.nl:80/%7Eguido/Python.html"),
+ urlparse("https://github.com/dylan-profiling/hurricane"),
+ ],
+ "url_none_series": [
+ urlparse("http://www.cwi.nl:80/%7Eguido/Python.html"),
+ urlparse("https://github.com/dylan-profiling/hurricane"),
+ None,
+ ],
+ "uuid_series": [
+ uuid.UUID("0b8a22ca-80ad-4df5-85ac-fa49c44b7ede"),
+ uuid.UUID("aaa381d6-8442-4f63-88c8-7c900e9a23c6"),
+ uuid.UUID("00000000-0000-0000-0000-000000000000"),
+ ],
+ "uuid_series_missing": [
+ uuid.UUID("0b8a22ca-80ad-4df5-85ac-fa49c44b7ede"),
+ uuid.UUID("aaa381d6-8442-4f63-88c8-7c900e9a23c6"),
+ uuid.UUID("00000000-0000-0000-0000-000000000000"),
+ None,
+ ],
+ "uuid_series_str": [
+ "0b8a22ca-80ad-4df5-85ac-fa49c44b7ede",
+ "aaa381d6-8442-4f63-88c8-7c900e9a23c6",
+ "00000000-0000-0000-0000-000000000000",
+ ],
+ "mixed_list[str,int]": [[1, ""], [2, "Rubin"], [3, "Carter"]],
+ "mixed_dict": [
+ {"why": "did you"},
+ {"bring him": "in for he"},
+ {"aint": "the guy"},
+ ],
+ "callable": [os.getcwd, os.stat, os.kill],
+ "module": [os, uuid],
+ "textual_float": ["1.1", "2"],
+ "textual_float_nan": ["1.1", "2", "NAN"],
+ "mixed_integer": ["a", 1],
+ "mixed_list": [[True], [False], [False]],
+ "ip_str": ["127.0.0.1", "127.0.0.1"],
+ "empty": [],
+ "ip": [IPv4Address("127.0.0.1"), IPv4Address("127.0.0.1")],
+ "ip_missing": [IPv4Address("127.0.0.1"), None, IPv4Address("127.0.0.1")],
+ "ip_mixed_v4andv6": [IPv6Address("0:0:0:0:0:0:0:1"), IPv4Address("127.0.0.1")],
+ "file_test_py": [
+ Path(os.path.join(base_path, "test/series.py")).absolute(),
+ Path(os.path.join(base_path, "test/__init__.py")).absolute(),
+ Path(os.path.join(base_path, "test/utils.py")).absolute(),
+ ],
+ "file_mixed_ext": [
+ Path(os.path.join(base_path, "py.typed")).absolute(),
+ Path(os.path.join(base_path, "test/data", "file.html")).absolute(),
+ Path(os.path.join(base_path, "test/series.py")).absolute(),
+ ],
+ "file_test_py_missing": [
+ Path(os.path.join(base_path, "test/series.py")).absolute(),
+ None,
+ Path(os.path.join(base_path, "test/__init__.py")).absolute(),
+ None,
+ Path(os.path.join(base_path, "test/utils.py")).absolute(),
+ ],
+ "image_png": [
+ Path(
+ os.path.join(
+ base_path,
+ "test/data",
+ "img.png",
+ )
+ ).absolute(),
+ Path(
+ os.path.join(
+ base_path,
+ "test/data",
+ "img.jpeg",
+ )
+ ).absolute(),
+ Path(
+ os.path.join(
+ base_path,
+ "test/data",
+ "img.jpg",
+ )
+ ).absolute(),
+ ],
+ "image_png_missing": [
+ Path(
+ os.path.join(
+ base_path,
+ "test/data",
+ "img.png",
+ )
+ ).absolute(),
+ Path(
+ os.path.join(
+ base_path,
+ "test/data",
+ "img.jpeg",
+ )
+ ).absolute(),
+ None,
+ Path(
+ os.path.join(
+ base_path,
+ "test/data",
+ "img.jpg",
+ )
+ ).absolute(),
+ None,
+ ],
+ "email_address": [FQDA("test", "example.com"), FQDA("info", "example.eu")],
+ "email_address_missing": [
+ FQDA("test", "example.com"),
+ FQDA("info", "example.eu"),
+ None,
+ ],
+ "email_address_str": ["test@example.com", "info@example.eu"],
+ }
+ assert all(isinstance(v, Sequence) for v in sequences.values())
+ return cast(Dict[str, Sequence], sequences)
diff --git a/build/lib/visions/backends/python/series_utils.py b/build/lib/visions/backends/python/series_utils.py
new file mode 100644
index 000000000..c83643b13
--- /dev/null
+++ b/build/lib/visions/backends/python/series_utils.py
@@ -0,0 +1,26 @@
+import functools
+from typing import Callable, Sequence
+
+
+def sequence_not_empty(fn: Callable[..., bool]) -> Callable[..., bool]:
+ """Decorator to exclude empty series"""
+
+ @functools.wraps(fn)
+ def inner(sequence: Sequence, *args, **kwargs) -> bool:
+ if not any(True for _ in sequence):
+ return False
+
+ return fn(sequence, *args, **kwargs)
+
+ return inner
+
+
+def sequence_handle_none(fn: Callable[..., bool]) -> Callable[..., bool]:
+ """Decorator for nullable series"""
+
+ @functools.wraps(fn)
+ def inner(sequence: Sequence, *args, **kwargs) -> bool:
+ sequence = tuple(filter(None, sequence))
+ return fn(sequence, *args, **kwargs)
+
+ return inner
diff --git a/build/lib/visions/backends/python/types/__init__.py b/build/lib/visions/backends/python/types/__init__.py
new file mode 100644
index 000000000..0b9192ccc
--- /dev/null
+++ b/build/lib/visions/backends/python/types/__init__.py
@@ -0,0 +1,22 @@
+import visions.backends.python.types.boolean
+import visions.backends.python.types.categorical
+import visions.backends.python.types.complex
+import visions.backends.python.types.count
+import visions.backends.python.types.date
+import visions.backends.python.types.date_time
+import visions.backends.python.types.email_address
+import visions.backends.python.types.file
+import visions.backends.python.types.float
+import visions.backends.python.types.geometry
+import visions.backends.python.types.image
+import visions.backends.python.types.integer
+import visions.backends.python.types.ip_address
+import visions.backends.python.types.numeric
+import visions.backends.python.types.object
+import visions.backends.python.types.ordinal
+import visions.backends.python.types.path
+import visions.backends.python.types.string
+import visions.backends.python.types.time
+import visions.backends.python.types.time_delta
+import visions.backends.python.types.url
+import visions.backends.python.types.uuid
diff --git a/build/lib/visions/backends/python/types/boolean.py b/build/lib/visions/backends/python/types/boolean.py
new file mode 100644
index 000000000..8ece6320c
--- /dev/null
+++ b/build/lib/visions/backends/python/types/boolean.py
@@ -0,0 +1,60 @@
+from typing import Dict, List, Sequence
+
+from visions.backends.python.series_utils import (
+ sequence_handle_none,
+ sequence_not_empty,
+)
+from visions.types import Boolean, Object, String
+
+
+def get_boolean_coercions(id: str) -> List[Dict]:
+ coercion_map = {
+ "default": [{"true": True, "false": False}],
+ "en": [
+ {"true": True, "false": False},
+ {"y": True, "n": False},
+ {"yes": True, "no": False},
+ ],
+ "nl": [
+ {"true": True, "false": False},
+ {"ja": True, "nee": False},
+ {"j": True, "n": False},
+ ],
+ }
+ return coercion_map[id]
+
+
+@sequence_not_empty
+@sequence_handle_none
+def is_bool(sequence: Sequence, state: dict):
+ return all(isinstance(value, bool) for value in sequence)
+
+
+def to_bool(sequence: Sequence, state: dict):
+ return map(bool, sequence)
+
+
+@Boolean.register_relationship(Object, Sequence)
+def object_is_bool(sequence: Sequence, state: dict) -> bool:
+ return is_bool(sequence, state)
+
+
+@Boolean.register_transformer(Object, Sequence)
+def object_to_bool(sequence: Sequence, state: dict) -> Sequence:
+ return to_bool(sequence, state)
+
+
+@Boolean.register_relationship(String, Sequence)
+@sequence_handle_none
+def string_is_bool(sequence: Sequence, state: dict):
+ return all(value.lower() in {"true", "false"} for value in sequence)
+
+
+@Boolean.register_transformer(String, Sequence)
+def string_to_bool(sequence: Sequence, state: dict):
+ return map(lambda v: v.lower() == "true" if isinstance(v, str) else v, sequence)
+
+
+@Boolean.contains_op.register
+def boolean_contains(sequence: Sequence, state: dict) -> bool:
+ return is_bool(sequence, state)
diff --git a/build/lib/visions/backends/python/types/categorical.py b/build/lib/visions/backends/python/types/categorical.py
new file mode 100644
index 000000000..8299fc356
--- /dev/null
+++ b/build/lib/visions/backends/python/types/categorical.py
@@ -0,0 +1,8 @@
+from typing import Sequence
+
+from visions.types.categorical import Categorical
+
+
+@Categorical.contains_op.register
+def categorical_contains(sequence: Sequence, state: dict) -> bool:
+ return False
diff --git a/build/lib/visions/backends/python/types/complex.py b/build/lib/visions/backends/python/types/complex.py
new file mode 100644
index 000000000..71febe9fb
--- /dev/null
+++ b/build/lib/visions/backends/python/types/complex.py
@@ -0,0 +1,26 @@
+from typing import Sequence
+
+from visions.backends.python.series_utils import sequence_not_empty
+from visions.backends.python.types.float import no_leading_zeros
+from visions.types.complex import Complex
+from visions.types.string import String
+
+
+@Complex.register_relationship(String, Sequence)
+def string_is_complex(sequence: Sequence, state: dict) -> bool:
+ try:
+ coerced = list(string_to_complex(sequence, state))
+ return no_leading_zeros(sequence, [r.real for r in coerced])
+ except (ValueError, TypeError, AttributeError):
+ return False
+
+
+@Complex.register_transformer(String, Sequence)
+def string_to_complex(sequence: Sequence, state: dict) -> Sequence:
+ return list(map(complex, sequence))
+
+
+@Complex.contains_op.register
+@sequence_not_empty
+def complex_contains(sequence: Sequence, state: dict) -> bool:
+ return all(isinstance(value, complex) for value in sequence)
diff --git a/build/lib/visions/backends/python/types/count.py b/build/lib/visions/backends/python/types/count.py
new file mode 100644
index 000000000..22078aff9
--- /dev/null
+++ b/build/lib/visions/backends/python/types/count.py
@@ -0,0 +1,8 @@
+from typing import Sequence
+
+from visions.types.count import Count
+
+
+@Count.contains_op.register
+def count_contains(sequence: Sequence, state: dict) -> bool:
+ return all(isinstance(value, int) and value >= 0 for value in sequence)
diff --git a/build/lib/visions/backends/python/types/date.py b/build/lib/visions/backends/python/types/date.py
new file mode 100644
index 000000000..da2828c97
--- /dev/null
+++ b/build/lib/visions/backends/python/types/date.py
@@ -0,0 +1,21 @@
+from datetime import date, time
+from typing import Sequence
+
+from visions.types.date import Date
+from visions.types.date_time import DateTime
+
+
+@Date.register_relationship(DateTime, Sequence)
+def datetime_is_date(sequence: Sequence, state: dict) -> bool:
+ value = time(0, 0)
+ return all(v == value for v in sequence)
+
+
+@Date.register_transformer(DateTime, Sequence)
+def datetime_to_date(sequence: Sequence, state: dict) -> Sequence:
+ return tuple(map(lambda v: v.date(), sequence))
+
+
+@Date.contains_op.register
+def date_contains(sequence: Sequence, state: dict) -> bool:
+ return all(isinstance(value, date) for value in sequence)
diff --git a/build/lib/visions/backends/python/types/date_time.py b/build/lib/visions/backends/python/types/date_time.py
new file mode 100644
index 000000000..4a1a418e7
--- /dev/null
+++ b/build/lib/visions/backends/python/types/date_time.py
@@ -0,0 +1,30 @@
+from datetime import datetime
+from typing import Sequence
+
+from visions.backends.python.series_utils import sequence_not_empty
+from visions.types.date_time import DateTime
+from visions.types.string import String
+
+
+@DateTime.register_relationship(String, Sequence)
+def string_is_datetime(sequence: Sequence, state: dict) -> bool:
+ try:
+ _ = list(string_to_datetime(sequence, state))
+ return True
+ except (OverflowError, TypeError, ValueError):
+ return False
+
+
+@DateTime.register_transformer(String, Sequence)
+def string_to_datetime(sequence: Sequence, state: dict) -> Sequence:
+ """
+ Python 3.7+
+ return map(datetime.fromisoformat, sequence)
+ """
+ return tuple(map(lambda s: datetime.strptime(s, "%Y-%m-%d %H:%M:%S"), sequence))
+
+
+@DateTime.contains_op.register
+@sequence_not_empty
+def datetime_contains(sequence: Sequence, state: dict) -> bool:
+ return all(isinstance(value, datetime) for value in sequence)
diff --git a/build/lib/visions/backends/python/types/email_address.py b/build/lib/visions/backends/python/types/email_address.py
new file mode 100644
index 000000000..bb156fec6
--- /dev/null
+++ b/build/lib/visions/backends/python/types/email_address.py
@@ -0,0 +1,24 @@
+from typing import Sequence
+
+from visions.types.email_address import FQDA, EmailAddress, _to_email
+from visions.types.string import String
+
+
+@EmailAddress.register_relationship(String, Sequence)
+def string_is_email(sequence: Sequence, state: dict) -> bool:
+ try:
+ return all(
+ value.local and value.fqdn for value in string_to_email(sequence, state)
+ )
+ except (ValueError, TypeError, AttributeError):
+ return False
+
+
+@EmailAddress.register_transformer(String, Sequence)
+def string_to_email(sequence: Sequence, state: dict) -> Sequence:
+ return tuple(map(_to_email, sequence))
+
+
+@EmailAddress.contains_op.register
+def email_address_contains(sequence: Sequence, state: dict) -> bool:
+ return all(isinstance(value, FQDA) for value in sequence)
diff --git a/build/lib/visions/backends/python/types/file.py b/build/lib/visions/backends/python/types/file.py
new file mode 100644
index 000000000..2886a3bce
--- /dev/null
+++ b/build/lib/visions/backends/python/types/file.py
@@ -0,0 +1,9 @@
+import pathlib
+from typing import Sequence
+
+from visions.types.file import File
+
+
+@File.contains_op.register
+def file_contains(sequence: Sequence, state: dict) -> bool:
+ return all(isinstance(p, pathlib.Path) and p.exists() for p in sequence)
diff --git a/build/lib/visions/backends/python/types/float.py b/build/lib/visions/backends/python/types/float.py
new file mode 100644
index 000000000..4559b9883
--- /dev/null
+++ b/build/lib/visions/backends/python/types/float.py
@@ -0,0 +1,43 @@
+from typing import Sequence
+
+from visions.backends.python.series_utils import sequence_not_empty
+from visions.types.complex import Complex
+from visions.types.float import Float
+from visions.types.string import String
+
+
+def no_leading_zeros(sequence, coerced_sequence) -> bool:
+ return not any(s[0] == "0" and c > 1 for s, c in zip(sequence, coerced_sequence))
+
+
+@Float.register_relationship(String, Sequence)
+def string_is_float(sequence: Sequence, state: dict) -> bool:
+ try:
+ coerced = list(string_to_float(sequence, state))
+ return no_leading_zeros(sequence, coerced)
+ except ValueError:
+ return False
+
+
+@Float.register_transformer(String, Sequence)
+def string_to_float(sequence: Sequence, state: dict) -> Sequence:
+ return tuple(map(float, sequence))
+
+
+@Float.register_relationship(Complex, Sequence)
+def complex_is_float(sequence: Sequence, state: dict) -> bool:
+ try:
+ return all(value.imag == 0 for value in sequence)
+ except ValueError:
+ return False
+
+
+@Float.register_transformer(Complex, Sequence)
+def complex_to_float(sequence: Sequence, state: dict) -> Sequence:
+ return list(map(lambda v: v.real, sequence))
+
+
+@Float.contains_op.register
+@sequence_not_empty
+def float_contains(sequence: Sequence, state: dict) -> bool:
+ return all(isinstance(value, float) for value in sequence)
diff --git a/build/lib/visions/backends/python/types/geometry.py b/build/lib/visions/backends/python/types/geometry.py
new file mode 100644
index 000000000..5dd6cd17c
--- /dev/null
+++ b/build/lib/visions/backends/python/types/geometry.py
@@ -0,0 +1,38 @@
+import os
+import sys
+from typing import Sequence
+
+from visions.types.geometry import Geometry
+from visions.types.string import String
+
+
+@Geometry.register_relationship(String, Sequence)
+def string_is_geometry(sequence: Sequence, state: dict) -> bool:
+ """Shapely logs failures at a silly severity, just trying to suppress it's output on failures."""
+ from shapely import wkt
+ from shapely.errors import WKTReadingError
+
+ # only way to get rid of sys output when wkt.loads hits a bad value
+ # TODO: use coercion wrapper for this
+ sys.stderr = open(os.devnull, "w")
+ try:
+ result = all(wkt.loads(value) for value in sequence)
+ except (WKTReadingError, AttributeError, UnicodeEncodeError, TypeError):
+ result = False
+ finally:
+ sys.stderr = sys.__stderr__
+ return result
+
+
+@Geometry.register_transformer(String, Sequence)
+def string_to_geometry(sequence: Sequence, state: dict) -> Sequence:
+ from shapely import wkt
+
+ return tuple(map(wkt.loads, sequence))
+
+
+@Geometry.contains_op.register
+def geometry_contains(sequence: Sequence, state: dict) -> bool:
+ from shapely.geometry.base import BaseGeometry
+
+ return all(issubclass(type(x), BaseGeometry) for x in sequence)
diff --git a/build/lib/visions/backends/python/types/image.py b/build/lib/visions/backends/python/types/image.py
new file mode 100644
index 000000000..f5ff00296
--- /dev/null
+++ b/build/lib/visions/backends/python/types/image.py
@@ -0,0 +1,12 @@
+import imghdr
+import pathlib
+from typing import Sequence
+
+from visions.types.image import Image
+
+
+@Image.contains_op.register
+def image_contains(sequence: Sequence, state: dict) -> bool:
+ return all(
+ isinstance(p, pathlib.Path) and p.exists() and imghdr.what(p) for p in sequence
+ )
diff --git a/build/lib/visions/backends/python/types/integer.py b/build/lib/visions/backends/python/types/integer.py
new file mode 100644
index 000000000..79d697e58
--- /dev/null
+++ b/build/lib/visions/backends/python/types/integer.py
@@ -0,0 +1,26 @@
+from typing import Sequence
+
+from visions.backends.python.series_utils import sequence_not_empty
+from visions.types.float import Float
+from visions.types.integer import Integer
+
+
+@Integer.register_relationship(Float, Sequence)
+def float_is_int(sequence: Sequence, state: dict) -> bool:
+ try:
+ return all(int(value) == value for value in sequence)
+ except (ValueError, TypeError, OverflowError):
+ return False
+
+
+@Integer.register_transformer(Float, Sequence)
+def float_to_int(sequence: Sequence, state: dict) -> Sequence:
+ return tuple(map(int, sequence))
+
+
+@Integer.contains_op.register
+@sequence_not_empty
+def integer_contains(sequence: Sequence, state: dict) -> bool:
+ return all(
+ isinstance(value, int) and not isinstance(value, bool) for value in sequence
+ )
diff --git a/build/lib/visions/backends/python/types/ip_address.py b/build/lib/visions/backends/python/types/ip_address.py
new file mode 100644
index 000000000..cba9dbd5c
--- /dev/null
+++ b/build/lib/visions/backends/python/types/ip_address.py
@@ -0,0 +1,24 @@
+from ipaddress import _BaseAddress, ip_address
+from typing import Sequence
+
+from visions.types.ip_address import IPAddress
+from visions.types.string import String
+
+
+@IPAddress.register_relationship(String, Sequence)
+def string_is_ip_address(sequence: Sequence, state: dict) -> bool:
+ try:
+ _ = list(string_to_ip_address(sequence, state))
+ return True
+ except (ValueError, TypeError, AttributeError):
+ return False
+
+
+@IPAddress.register_transformer(String, Sequence)
+def string_to_ip_address(sequence: Sequence, state: dict) -> Sequence:
+ return tuple(map(ip_address, sequence))
+
+
+@IPAddress.contains_op.register
+def ip_address_contains(sequence: Sequence, state: dict) -> bool:
+ return all(isinstance(x, _BaseAddress) for x in sequence)
diff --git a/build/lib/visions/backends/python/types/numeric.py b/build/lib/visions/backends/python/types/numeric.py
new file mode 100644
index 000000000..12e3e4aac
--- /dev/null
+++ b/build/lib/visions/backends/python/types/numeric.py
@@ -0,0 +1,12 @@
+import numbers
+from typing import Sequence
+
+from visions.types.numeric import Numeric
+
+
+@Numeric.contains_op.register
+def numeric_contains_op(sequence: Sequence, state: dict):
+ return all(
+ isinstance(value, numbers.Number) and not isinstance(value, bool)
+ for value in sequence
+ )
diff --git a/build/lib/visions/backends/python/types/object.py b/build/lib/visions/backends/python/types/object.py
new file mode 100644
index 000000000..5d59bad82
--- /dev/null
+++ b/build/lib/visions/backends/python/types/object.py
@@ -0,0 +1,14 @@
+from typing import Sequence
+
+from visions.backends.python.series_utils import (
+ sequence_handle_none,
+ sequence_not_empty,
+)
+from visions.types.object import Object
+
+
+@Object.contains_op.register
+@sequence_not_empty
+@sequence_handle_none
+def object_contains(sequence: Sequence, state: dict) -> bool:
+ return any(not isinstance(value, (float, bool, int, complex)) for value in sequence)
diff --git a/build/lib/visions/backends/python/types/ordinal.py b/build/lib/visions/backends/python/types/ordinal.py
new file mode 100644
index 000000000..dfd500652
--- /dev/null
+++ b/build/lib/visions/backends/python/types/ordinal.py
@@ -0,0 +1,8 @@
+from typing import Sequence
+
+from visions.types.ordinal import Ordinal
+
+
+@Ordinal.contains_op.register
+def ordinal_contains(sequence: Sequence, state: dict) -> bool:
+ return False
diff --git a/build/lib/visions/backends/python/types/path.py b/build/lib/visions/backends/python/types/path.py
new file mode 100644
index 000000000..9cada4e1c
--- /dev/null
+++ b/build/lib/visions/backends/python/types/path.py
@@ -0,0 +1,28 @@
+import pathlib
+from typing import Sequence
+
+from visions.types.path import Path
+from visions.types.string import String
+
+
+@Path.register_relationship(String, Sequence)
+def string_is_path(series, state: dict) -> bool:
+ try:
+ s = string_to_path(series.copy(), state)
+ return all(value.is_absolute() for value in s)
+ except TypeError:
+ return False
+
+
+@Path.register_transformer(String, Sequence)
+def string_to_path(sequence: Sequence, state: dict) -> Sequence:
+ s = tuple(map(pathlib.PureWindowsPath, sequence))
+ if not all(value.is_absolute() for value in s):
+ return tuple(map(pathlib.PurePosixPath, sequence))
+ else:
+ return s
+
+
+@Path.contains_op.register
+def path_contains(sequence: Sequence, state: dict) -> bool:
+ return all(isinstance(x, pathlib.PurePath) and x.is_absolute() for x in sequence)
diff --git a/build/lib/visions/backends/python/types/string.py b/build/lib/visions/backends/python/types/string.py
new file mode 100644
index 000000000..29bc952bb
--- /dev/null
+++ b/build/lib/visions/backends/python/types/string.py
@@ -0,0 +1,14 @@
+from typing import Sequence
+
+from visions.backends.python.series_utils import (
+ sequence_handle_none,
+ sequence_not_empty,
+)
+from visions.types.string import String
+
+
+@String.contains_op.register
+@sequence_not_empty
+@sequence_handle_none
+def string_contains(sequence: Sequence, state: dict) -> bool:
+ return all(isinstance(v, str) for v in sequence)
diff --git a/build/lib/visions/backends/python/types/time.py b/build/lib/visions/backends/python/types/time.py
new file mode 100644
index 000000000..407b482a1
--- /dev/null
+++ b/build/lib/visions/backends/python/types/time.py
@@ -0,0 +1,20 @@
+from datetime import time
+from typing import Sequence
+
+# from visions.types.date_time import DateTime
+from visions.types.time import Time
+
+# @Time.register_relationship(DateTime, Sequence)
+# def datetime_is_time(sequence: Sequence, state: dict) -> bool:
+# value = date(1, 1, 1)
+# return all(v == value for v in sequence)
+#
+#
+# @Time.register_transformer(DateTime, Sequence)
+# def datetime_to_time(sequence: Sequence, state: dict) -> Sequence:
+# return map(lambda v: v.time(), sequence)
+
+
+@Time.contains_op.register
+def time_contains(sequence: Sequence, state: dict) -> bool:
+ return all(isinstance(value, time) for value in sequence)
diff --git a/build/lib/visions/backends/python/types/time_delta.py b/build/lib/visions/backends/python/types/time_delta.py
new file mode 100644
index 000000000..e35c0e916
--- /dev/null
+++ b/build/lib/visions/backends/python/types/time_delta.py
@@ -0,0 +1,11 @@
+from datetime import timedelta
+from typing import Sequence
+
+from visions.backends.python.series_utils import sequence_not_empty
+from visions.types.time_delta import TimeDelta
+
+
+@TimeDelta.contains_op.register
+@sequence_not_empty
+def time_delta_contains(sequence: Sequence, state: dict) -> bool:
+ return all(isinstance(value, timedelta) for value in sequence)
diff --git a/build/lib/visions/backends/python/types/url.py b/build/lib/visions/backends/python/types/url.py
new file mode 100644
index 000000000..aa5c880d8
--- /dev/null
+++ b/build/lib/visions/backends/python/types/url.py
@@ -0,0 +1,23 @@
+from typing import Sequence
+from urllib.parse import ParseResult, urlparse
+
+from visions.types.string import String
+from visions.types.url import URL
+
+
+@URL.contains_op.register
+def url_contains(sequence: Sequence, state: dict) -> bool:
+ return all(isinstance(value, ParseResult) for value in sequence)
+
+
+@URL.register_transformer(String, Sequence)
+def string_to_url(sequence: Sequence, state: dict) -> Sequence:
+ return tuple(map(urlparse, sequence))
+
+
+@URL.register_relationship(String, Sequence)
+def string_is_url(sequence: Sequence, state: dict) -> bool:
+ try:
+ return all(x.netloc and x.scheme for x in string_to_url(sequence, {}))
+ except (ValueError, TypeError, AttributeError):
+ return False
diff --git a/build/lib/visions/backends/python/types/uuid.py b/build/lib/visions/backends/python/types/uuid.py
new file mode 100644
index 000000000..f7672fe29
--- /dev/null
+++ b/build/lib/visions/backends/python/types/uuid.py
@@ -0,0 +1,24 @@
+import uuid
+from typing import Sequence
+
+from visions.types.string import String
+from visions.types.uuid import UUID
+
+
+@UUID.contains_op.register
+def uuid_contains(sequence: Sequence, state: dict) -> bool:
+ return all(isinstance(value, uuid.UUID) for value in sequence)
+
+
+@UUID.register_transformer(String, Sequence)
+def string_to_uuid(sequence: Sequence, state: dict) -> Sequence:
+ return [uuid.UUID(value) for value in sequence]
+
+
+@UUID.register_relationship(String, Sequence)
+def string_is_uuid(sequence: Sequence, state: dict) -> bool:
+ try:
+ string_to_uuid(sequence)
+ return True
+ except (ValueError, TypeError, AttributeError):
+ return False
diff --git a/build/lib/visions/backends/shared/__init__.py b/build/lib/visions/backends/shared/__init__.py
new file mode 100644
index 000000000..a66517c65
--- /dev/null
+++ b/build/lib/visions/backends/shared/__init__.py
@@ -0,0 +1 @@
+from . import nan_handling, parallelization_engines, utilities
diff --git a/build/lib/visions/backends/shared/nan_handling.py b/build/lib/visions/backends/shared/nan_handling.py
new file mode 100644
index 000000000..cc2e6dcea
--- /dev/null
+++ b/build/lib/visions/backends/shared/nan_handling.py
@@ -0,0 +1,60 @@
+import math
+from datetime import datetime, timedelta
+
+import numpy as np
+import pandas as pd
+
+from .utilities import has_import
+
+has_numba = has_import("numba")
+
+if has_numba:
+ import numba as nb
+
+
+def nan_mask(array: np.ndarray) -> np.ndarray:
+ # TODO: Fails for values like None, pandas resolves this but it's complicated some links:
+ # https://github.com/pandas-dev/pandas/blob/3391a348f3f7cd07a96c8e6a4b05e3e9f60c8567/pandas/core/series.py#L192
+ # https://github.com/pandas-dev/pandas/blob/65319af6e563ccbb02fb5152949957b6aef570ef/pandas/core/base.py#L816
+ # https://github.com/pandas-dev/pandas/blob/65319af6e563ccbb02fb5152949957b6aef570ef/pandas/core/dtypes/missing.py#L133
+ # https://github.com/pandas-dev/pandas/blob/65319af6e563ccbb02fb5152949957b6aef570ef/pandas/core/dtypes/missing.py#L202
+ # raise NotImplementedError('Robust missing value detection not implemented for numpy arrays')
+ try:
+ mask = ~np.isnan(array)
+ except TypeError:
+ # mask = np.array([not pd. for v in array], dtype=bool)
+ mask = ~pd.isna(array)
+ return mask
+
+
+# TODO: There are optimizations here, just have to define precisely the desired missing ruleset in the
+# generated jit
+if has_numba:
+
+ def is_missing(x):
+ """
+ Return True if the value is missing, False otherwise.
+ """
+ if isinstance(x, float):
+ return np.isnan(x)
+ elif isinstance(x, (datetime, timedelta)):
+ missing = x("NaT")
+ return x == missing
+ elif x is None:
+ return True
+ else:
+ return False
+
+ nb.extending.overload(is_missing)(lambda x: is_missing)
+
+ @nb.jit(nopython=True)
+ def hasna(x: np.ndarray) -> bool:
+ for item in x:
+ if is_missing(item):
+ return True
+ return False
+
+else:
+
+ def anynan(array: np.ndarray) -> bool:
+ return any(math.isnan(v) for v in array)
diff --git a/build/lib/visions/backends/shared/parallelization_engines.py b/build/lib/visions/backends/shared/parallelization_engines.py
new file mode 100644
index 000000000..f08970501
--- /dev/null
+++ b/build/lib/visions/backends/shared/parallelization_engines.py
@@ -0,0 +1,105 @@
+from typing import Callable, List, Type
+
+import attr
+import pandas as pd
+
+from visions.backends.shared.utilities import has_import
+
+
+@attr.s
+class Engine:
+ name = attr.ib()
+
+ @classmethod
+ def setup(cls, *args, **kwargs) -> None:
+ raise NotImplementedError("No setup defined for generic engine")
+
+ @staticmethod
+ def apply(series: pd.Series) -> Callable[[Callable], pd.Series]:
+ raise NotImplementedError("No apply defined for generic engine")
+
+
+class PandasEngine(Engine):
+ name = "pandas"
+ _is_setup = True
+
+ @classmethod
+ def setup(cls, *args, **kwargs) -> None:
+ pass
+
+ @staticmethod
+ def apply(series: pd.Series) -> Callable[[Callable], pd.Series]:
+ return series.apply
+
+
+class SwifterEngine(Engine):
+ name = "swifter"
+ _is_setup = False
+
+ @classmethod
+ def setup(cls, *args, **kwargs) -> None:
+ if cls._is_setup:
+ return
+
+ import swifter
+
+ cls._is_setup = True
+
+ @staticmethod
+ def apply(series: pd.Series) -> Callable[[Callable], pd.Series]:
+ return series.swifter.apply
+
+
+_PANDAS_ENGINES = [PandasEngine, SwifterEngine]
+
+
+class EngineCollection:
+ def __init__(self, engines: List[Type[Engine]]):
+ self.engines = {engine.name: engine for engine in engines}
+
+ def is_engine(self, name: str) -> bool:
+ return name in self.engines
+
+ def get(self, name: str) -> Type[Engine]:
+ return self.engines[name]
+
+
+class PandasApply:
+ supported_engines = EngineCollection(
+ [engine for engine in _PANDAS_ENGINES if hasattr(engine, "apply")]
+ )
+ _engine: Type[Engine] = PandasEngine
+
+ @property
+ def engine(self) -> Type[Engine]:
+ return self._engine
+
+ @engine.setter
+ def engine(self, value: str, *args, **kwargs) -> None:
+ if not self.supported_engines.is_engine(value):
+ raise ValueError(f"{value} is not a supported pandas apply engine")
+ self._engine = self.supported_engines.get(value)
+ self._engine.setup(*args, **kwargs)
+
+ @property
+ def apply(self) -> Callable[[pd.Series], Callable[[Callable], pd.Series]]:
+ return self.engine.apply
+
+
+class PandasHandler:
+ def __init__(self):
+ self.has_swifter = has_import("swifter")
+
+ self.applier = PandasApply()
+ self._set_default_apply_engine()
+
+ def _set_default_apply_engine(self) -> None:
+ if self.has_swifter:
+ self.applier.engine = "swifter"
+
+
+_pandas_handler = PandasHandler()
+
+
+def pandas_apply(series: pd.Series, func: Callable) -> pd.Series:
+ return _pandas_handler.applier.apply(series)(func)
diff --git a/build/lib/visions/backends/shared/utilities.py b/build/lib/visions/backends/shared/utilities.py
new file mode 100644
index 000000000..abb84708b
--- /dev/null
+++ b/build/lib/visions/backends/shared/utilities.py
@@ -0,0 +1,6 @@
+from importlib import util as import_util
+
+
+def has_import(module: str) -> bool:
+ has_module = import_util.find_spec(module) is not None
+ return has_module
diff --git a/build/lib/visions/backends/spark/__init__.py b/build/lib/visions/backends/spark/__init__.py
new file mode 100644
index 000000000..37d88a021
--- /dev/null
+++ b/build/lib/visions/backends/spark/__init__.py
@@ -0,0 +1,2 @@
+import visions.backends.spark.traversal
+import visions.backends.spark.types
diff --git a/build/lib/visions/backends/spark/traversal.py b/build/lib/visions/backends/spark/traversal.py
new file mode 100644
index 000000000..429a38ffe
--- /dev/null
+++ b/build/lib/visions/backends/spark/traversal.py
@@ -0,0 +1,33 @@
+from typing import Dict, List, Tuple, Type
+
+import networkx as nx
+import pandas as pd
+from pyspark.sql.dataframe import DataFrame
+
+from visions.types.type import VisionsBaseType
+from visions.typesets.typeset import traverse_graph, traverse_graph_with_series
+
+T = Type[VisionsBaseType]
+
+
+@traverse_graph.register(DataFrame)
+def _traverse_graph_spark_dataframe(
+ df: DataFrame, root_node: T, graph: nx.DiGraph
+) -> Tuple[DataFrame, Dict[str, List[T]], Dict[str, dict]]:
+ inferred_values = {
+ col: traverse_graph_with_series(root_node, df.select(col), graph)
+ for col in df.columns
+ }
+
+ inferred_series = {}
+ inferred_paths: Dict[str, List[T]] = {}
+ inferred_states: Dict[str, dict] = {}
+ for col, (inf_series, inf_path, inf_state) in inferred_values.items():
+ assert isinstance(inf_path, list) # Placate the MyPy Gods
+
+ inferred_series[col] = inf_series
+ inferred_paths[col] = inf_path
+ inferred_states[col] = inf_state
+
+ # note inference disabled, return df
+ return df, inferred_paths, inferred_states
diff --git a/build/lib/visions/backends/spark/types/__init__.py b/build/lib/visions/backends/spark/types/__init__.py
new file mode 100644
index 000000000..82aae7a3a
--- /dev/null
+++ b/build/lib/visions/backends/spark/types/__init__.py
@@ -0,0 +1,8 @@
+import visions.backends.spark.types.boolean
+import visions.backends.spark.types.categorical
+import visions.backends.spark.types.date
+import visions.backends.spark.types.float
+import visions.backends.spark.types.integer
+import visions.backends.spark.types.numeric
+import visions.backends.spark.types.object
+import visions.backends.spark.types.string
diff --git a/build/lib/visions/backends/spark/types/boolean.py b/build/lib/visions/backends/spark/types/boolean.py
new file mode 100644
index 000000000..4d4e08be9
--- /dev/null
+++ b/build/lib/visions/backends/spark/types/boolean.py
@@ -0,0 +1,13 @@
+from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.types import BooleanType
+
+from visions.types.boolean import Boolean
+
+
+@Boolean.contains_op.register
+def boolean_contains(sequence: DataFrame, state: dict) -> bool:
+ if len(sequence.schema) != 1:
+ return False
+
+ dtype = sequence.schema[0].dataType
+ return isinstance(dtype, BooleanType)
diff --git a/build/lib/visions/backends/spark/types/categorical.py b/build/lib/visions/backends/spark/types/categorical.py
new file mode 100644
index 000000000..fa7f13117
--- /dev/null
+++ b/build/lib/visions/backends/spark/types/categorical.py
@@ -0,0 +1,8 @@
+from pyspark.sql.dataframe import DataFrame
+
+from visions.types.categorical import Categorical
+
+
+@Categorical.contains_op.register
+def categorical_contains(sequence: DataFrame, state: dict) -> bool:
+ return False
diff --git a/build/lib/visions/backends/spark/types/date.py b/build/lib/visions/backends/spark/types/date.py
new file mode 100644
index 000000000..fdb5b231f
--- /dev/null
+++ b/build/lib/visions/backends/spark/types/date.py
@@ -0,0 +1,13 @@
+from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.types import DateType
+
+from visions.types.date import Date
+
+
+@Date.contains_op.register
+def date_contains(sequence: DataFrame, state: dict) -> bool:
+ if len(sequence.schema) != 1:
+ return False
+
+ dtype = sequence.schema[0].dataType
+ return isinstance(dtype, DateType)
diff --git a/build/lib/visions/backends/spark/types/datetime.py b/build/lib/visions/backends/spark/types/datetime.py
new file mode 100644
index 000000000..42e24da3b
--- /dev/null
+++ b/build/lib/visions/backends/spark/types/datetime.py
@@ -0,0 +1,13 @@
+from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.types import TimestampType
+
+from visions.types.date_time import DateTime
+
+
+@DateTime.contains_op.register
+def datetime_contains(sequence: DataFrame, state: dict) -> bool:
+ if len(sequence.schema) != 1:
+ return False
+
+ dtype = sequence.schema[0].dataType
+ return isinstance(dtype, TimestampType)
diff --git a/build/lib/visions/backends/spark/types/float.py b/build/lib/visions/backends/spark/types/float.py
new file mode 100644
index 000000000..8f8be72ef
--- /dev/null
+++ b/build/lib/visions/backends/spark/types/float.py
@@ -0,0 +1,13 @@
+from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.types import DecimalType, DoubleType, FloatType
+
+from visions.types.float import Float
+
+
+@Float.contains_op.register
+def float_contains(sequence: DataFrame, state: dict) -> bool:
+ if len(sequence.schema) != 1:
+ return False
+
+ dtype = sequence.schema[0].dataType
+ return isinstance(dtype, (FloatType, DoubleType, DecimalType))
diff --git a/build/lib/visions/backends/spark/types/integer.py b/build/lib/visions/backends/spark/types/integer.py
new file mode 100644
index 000000000..2271b7d13
--- /dev/null
+++ b/build/lib/visions/backends/spark/types/integer.py
@@ -0,0 +1,13 @@
+from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.types import ByteType, IntegerType, LongType, ShortType
+
+from visions.types.integer import Integer
+
+
+@Integer.contains_op.register
+def integer_contains(sequence: DataFrame, state: dict) -> bool:
+ if len(sequence.schema) != 1:
+ return False
+
+ dtype = sequence.schema[0].dataType
+ return isinstance(dtype, (ByteType, ShortType, IntegerType, LongType))
diff --git a/build/lib/visions/backends/spark/types/numeric.py b/build/lib/visions/backends/spark/types/numeric.py
new file mode 100644
index 000000000..939a1a5a3
--- /dev/null
+++ b/build/lib/visions/backends/spark/types/numeric.py
@@ -0,0 +1,13 @@
+from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.types import NumericType
+
+from visions.types.numeric import Numeric
+
+
+@Numeric.contains_op.register
+def numeric_contains(sequence: DataFrame, state: dict) -> bool:
+ if len(sequence.schema) != 1:
+ return False
+
+ dtype = sequence.schema[0].dataType
+ return isinstance(dtype, NumericType)
diff --git a/build/lib/visions/backends/spark/types/object.py b/build/lib/visions/backends/spark/types/object.py
new file mode 100644
index 000000000..00b71fd3c
--- /dev/null
+++ b/build/lib/visions/backends/spark/types/object.py
@@ -0,0 +1,13 @@
+from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.types import ArrayType, DateType, MapType, StringType, StructType
+
+from visions.types.object import Object
+
+
+@Object.contains_op.register
+def object_contains(sequence: DataFrame, state: dict) -> bool:
+ if len(sequence.schema) != 1:
+ return False
+
+ dtype = sequence.schema[0].dataType
+ return isinstance(dtype, (StringType, DateType, ArrayType, MapType, StructType))
diff --git a/build/lib/visions/backends/spark/types/string.py b/build/lib/visions/backends/spark/types/string.py
new file mode 100644
index 000000000..98fd3536f
--- /dev/null
+++ b/build/lib/visions/backends/spark/types/string.py
@@ -0,0 +1,13 @@
+from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.types import StringType
+
+from visions.types.string import String
+
+
+@String.contains_op.register
+def string_contains(sequence: DataFrame, state: dict) -> bool:
+ if len(sequence.schema) != 1:
+ return False
+
+ dtype = sequence.schema[0].dataType
+ return isinstance(dtype, StringType)
diff --git a/build/lib/visions/contrib/README.md b/build/lib/visions/contrib/README.md
new file mode 100644
index 000000000..76b5066f8
--- /dev/null
+++ b/build/lib/visions/contrib/README.md
@@ -0,0 +1,10 @@
+# Contribution Guidelines
+
+Contributions here will go through a standard review process
+to be promoted to standard types. Contributions made
+to the contrib folder will also receive help / guidance whenever
+requested.
+
+
+TODO:
+* Add reference in the docs
\ No newline at end of file
diff --git a/build/lib/visions/contrib/__init__.py b/build/lib/visions/contrib/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/build/lib/visions/contrib/relations/__init__.py b/build/lib/visions/contrib/relations/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/build/lib/visions/contrib/relations/categorical_to_ordinal.py b/build/lib/visions/contrib/relations/categorical_to_ordinal.py
new file mode 100644
index 000000000..5284f2243
--- /dev/null
+++ b/build/lib/visions/contrib/relations/categorical_to_ordinal.py
@@ -0,0 +1,27 @@
+# import pandas as pd
+#
+# from visions.relations.relations import InferenceRelation
+# from visions.relations.relations_utils import values_are_consecutive
+# from visions.types.ordinal import to_ordinal
+# from visions.utils import func_nullable_series_contains
+#
+#
+# @func_nullable_series_contains
+# def is_ordinal_cat(series: pd.Series, state: dict) -> bool:
+# initial_element = "a"
+# s = series.astype(str)
+# if s.str.len().max() == 1:
+# distinct_values = list(s.str.lower().unique())
+# return initial_element in distinct_values and values_are_consecutive(
+# list(map(ord, distinct_values))
+# )
+# else:
+# return False
+#
+#
+# def categorical_to_ordinal() -> InferenceRelation:
+# from visions.types import Categorical
+#
+# return InferenceRelation(
+# Categorical, relationship=is_ordinal_cat, transformer=to_ordinal
+# )
diff --git a/build/lib/visions/contrib/relations/integer_to_count.py b/build/lib/visions/contrib/relations/integer_to_count.py
new file mode 100644
index 000000000..4c64cf7ae
--- /dev/null
+++ b/build/lib/visions/contrib/relations/integer_to_count.py
@@ -0,0 +1,22 @@
+import numpy as np
+import pandas as pd
+
+from visions.relations.relations import InferenceRelation
+from visions.types.integer import Integer
+
+
+def is_unsigned_int(series: pd.Series, state: dict) -> bool:
+ # TODO: add coercion, ensure that > uint.MAX raises error
+ return series.ge(0).all()
+
+
+def to_unsigned_int(series: pd.Series, state: dict) -> pd.Series:
+ return series.astype(np.uint64)
+
+
+def integer_to_count() -> InferenceRelation:
+ return InferenceRelation(
+ relationship=is_unsigned_int,
+ transformer=to_unsigned_int,
+ related_type=Integer,
+ )
diff --git a/build/lib/visions/contrib/relations/integer_to_datetime.py b/build/lib/visions/contrib/relations/integer_to_datetime.py
new file mode 100644
index 000000000..441a1afb4
--- /dev/null
+++ b/build/lib/visions/contrib/relations/integer_to_datetime.py
@@ -0,0 +1,27 @@
+# import pandas as pd
+#
+# from visions.backends.pandas_be import test_utils
+# from visions.relations import InferenceRelation
+# from visions.relations.string_to_datetime import to_datetime_year_month_day
+# from visions.types import Integer
+#
+#
+# def to_datetime(series: pd.Series) -> pd.Series:
+# return pd.to_datetime(series)
+#
+#
+# def _to_datetime(func) -> InferenceRelation:
+# return InferenceRelation(
+# relationship=test_utils.coercion_test(lambda s: func(s.astype(str))),
+# transformer=to_datetime,
+# related_type=Integer,
+# )
+#
+#
+# # TODO: do only convert obvious dates (20191003000000)
+# def integer_to_datetime(cls):
+# return _to_datetime(cls, to_datetime)
+#
+#
+# def integer_to_datetime_year_month_day(cls) -> InferenceRelation:
+# return _to_datetime(cls, to_datetime_year_month_day)
diff --git a/build/lib/visions/contrib/relations/integer_to_ordinal.py b/build/lib/visions/contrib/relations/integer_to_ordinal.py
new file mode 100644
index 000000000..e97f3af70
--- /dev/null
+++ b/build/lib/visions/contrib/relations/integer_to_ordinal.py
@@ -0,0 +1,23 @@
+# import pandas as pd
+#
+# from visions.relations import InferenceRelation
+# from visions.relations.relations_utils import values_are_consecutive
+# from visions.types.ordinal import to_ordinal
+#
+#
+# def is_ordinal_int(s: pd.Series, state: dict) -> bool:
+# initial_element = 1
+# distinct_values = list(s.unique())
+# return (
+# initial_element in distinct_values
+# and values_are_consecutive(distinct_values)
+# and 2 < len(distinct_values) < 10
+# )
+#
+#
+# def integer_to_ordinal() -> InferenceRelation:
+# from visions.types import Integer
+#
+# return InferenceRelation(
+# Integer, relationship=is_ordinal_int, transformer=to_ordinal
+# )
diff --git a/build/lib/visions/contrib/relations/relations_utils.py b/build/lib/visions/contrib/relations/relations_utils.py
new file mode 100644
index 000000000..82f4b5d6b
--- /dev/null
+++ b/build/lib/visions/contrib/relations/relations_utils.py
@@ -0,0 +1,5 @@
+from typing import Sequence
+
+
+def values_are_consecutive(sequence: Sequence) -> bool:
+ return sorted(sequence) == list(range(min(sequence), max(sequence) + 1))
diff --git a/build/lib/visions/contrib/relations/string_to_categorical.py b/build/lib/visions/contrib/relations/string_to_categorical.py
new file mode 100644
index 000000000..fc4251292
--- /dev/null
+++ b/build/lib/visions/contrib/relations/string_to_categorical.py
@@ -0,0 +1,16 @@
+# from visions import String
+# from visions.relations.relations import InferenceRelation
+#
+#
+# def string_to_categorical_distinct_count() -> InferenceRelation:
+# """Convert string to categorical when it has fewer than 50% unique values.
+#
+# Returns:
+# relation
+# """
+# # TODO: only when not any other string relation (either exclude others or have ordering and evaluate last)
+# return InferenceRelation(
+# relationship=lambda s, state: s.nunique() / len(s) < 0.5,
+# transformer=lambda s: s.astype("category"),
+# related_type=String,
+# )
diff --git a/build/lib/visions/contrib/relations/string_to_datetime.py b/build/lib/visions/contrib/relations/string_to_datetime.py
new file mode 100644
index 000000000..5fb6c80fb
--- /dev/null
+++ b/build/lib/visions/contrib/relations/string_to_datetime.py
@@ -0,0 +1,64 @@
+# import pandas as pd
+#
+# from visions import String
+# from visions.backends.pandas_be import test_utils
+# from visions.relations import InferenceRelation
+#
+#
+# def to_datetime_year_week(series: pd.Series) -> pd.Series:
+# """Convert a series of the format YYYY/UU (year, week) to datetime.
+# A '0' is added as day dummy value, as pandas requires a day value to parse.
+#
+# Args:
+# series: the Series to parse
+#
+# Returns:
+# A datetime series
+#
+# Examples:
+# >>> series = pd.Series(['2018/47', '2018/12', '2018/03'])
+# >>> parsed_series = to_datetime_year_week(series)
+# >>> print(parsed_series.dt.week)
+# 0 47
+# 1 12
+# 2 3
+# dtype: int64
+# """
+# return pd.to_datetime(series + "0", format="%Y/%U%w")
+#
+#
+# def to_datetime_year_month_day(series: pd.Series) -> pd.Series:
+# """Convert a series of the format YYYYMMDD (year, month, day) to datetime.
+#
+# Args:
+# series: the Series to parse
+#
+# Returns:
+# A datetime series
+#
+# Examples:
+# >>> series = pd.Series(['20181201', '20181202', '20181203'])
+# >>> parsed_series = to_datetime_year_week(series)
+# >>> print(parsed_series.dt.day)
+# 0 1
+# 1 2
+# 2 3
+# dtype: int64
+# """
+# return pd.to_datetime(series, format="%Y%m%d")
+#
+#
+# def _to_datetime(func) -> InferenceRelation:
+# return InferenceRelation(
+# relationship=test_utils.coercion_test(func),
+# transformer=func,
+# related_type=String,
+# )
+#
+#
+# def string_to_datetime_year_week() -> InferenceRelation:
+# return _to_datetime(to_datetime_year_week)
+#
+#
+# def string_to_datetime_year_month_day() -> InferenceRelation:
+# return _to_datetime(to_datetime_year_month_day)
diff --git a/build/lib/visions/contrib/relations/string_to_ordinal.py b/build/lib/visions/contrib/relations/string_to_ordinal.py
new file mode 100644
index 000000000..2815cdd8e
--- /dev/null
+++ b/build/lib/visions/contrib/relations/string_to_ordinal.py
@@ -0,0 +1,25 @@
+# import pandas as pd
+#
+# from visions.relations.relations import InferenceRelation
+# from visions.relations.relations_utils import values_are_consecutive
+# from visions.types.ordinal import to_ordinal
+#
+#
+# def is_ordinal_str(s: pd.Series, state: dict) -> bool:
+# if s.str.len().max() == 1:
+# unique_values = list(s[s.notna()].str.lower().unique())
+# return "a" in unique_values and values_are_consecutive(
+# list(map(ord, unique_values))
+# )
+# else:
+# return False
+#
+#
+# def string_to_ordinal() -> InferenceRelation:
+# from visions.types import String
+#
+# return InferenceRelation(
+# related_type=String,
+# relationship=is_ordinal_str,
+# transformer=to_ordinal,
+# )
diff --git a/build/lib/visions/contrib/types/__init__.py b/build/lib/visions/contrib/types/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/build/lib/visions/contrib/typesets/__init__.py b/build/lib/visions/contrib/typesets/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/build/lib/visions/declarative.py b/build/lib/visions/declarative.py
new file mode 100644
index 000000000..6ccd11dca
--- /dev/null
+++ b/build/lib/visions/declarative.py
@@ -0,0 +1,52 @@
+from typing import Any, Callable, List, Optional, Sequence, Type, TypeVar, Union
+
+from visions.relations import IdentityRelation, InferenceRelation
+from visions.types.type import VisionsBaseType
+
+T = TypeVar("T")
+
+
+def process_relation(items: Union[dict, Type[VisionsBaseType]]) -> IdentityRelation:
+ if isinstance(items, dict):
+ return IdentityRelation(**items)
+ elif issubclass(items, VisionsBaseType):
+ return IdentityRelation(related_type=items)
+ else:
+ raise TypeError("identity should be a list, a dict of params or related_type.")
+
+
+def create_type(
+ name: str,
+ contains: Callable[[Any, dict], bool],
+ identity: Optional[
+ Union[Type[VisionsBaseType], List[Union[dict, Type[VisionsBaseType]]], dict]
+ ] = None,
+ inference: Optional[Union[List[dict], dict]] = None,
+):
+ def get_relations():
+ if isinstance(identity, Sequence):
+ relations = [process_relation(item) for item in identity]
+ else:
+ relations = [] if identity is None else [process_relation(identity)]
+
+ if inference is not None:
+ if isinstance(inference, dict):
+ relations += [InferenceRelation(**inference)]
+ elif isinstance(inference, list):
+ relations += [InferenceRelation(**params) for params in inference]
+ else:
+ raise TypeError("inference should be a list or a dict of params.")
+
+ return relations
+
+ def contains_op(series, state):
+ return contains(series, state)
+
+ return type(
+ name,
+ (VisionsBaseType,),
+ {
+ "get_relations": staticmethod(get_relations),
+ "contains_op": staticmethod(contains_op),
+ },
+ )
diff --git a/build/lib/visions/dtypes/__init__.py b/build/lib/visions/dtypes/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/build/lib/visions/dtypes/boolean.py b/build/lib/visions/dtypes/boolean.py
new file mode 100644
index 000000000..508b20659
--- /dev/null
+++ b/build/lib/visions/dtypes/boolean.py
@@ -0,0 +1,776 @@
+import numbers
+import warnings
+from typing import Type
+
+import numpy as np
+import pandas
+from pandas._libs import lib
+from pandas.compat import set_function_name
+from pandas.core import nanops, ops
+from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin
+from pandas.core.dtypes.base import ExtensionDtype
+from pandas.core.dtypes.cast import astype_nansafe
+from pandas.core.dtypes.common import (
+ is_bool_dtype,
+ is_float,
+ is_float_dtype,
+ is_integer,
+ is_integer_dtype,
+ is_list_like,
+ is_object_dtype,
+ is_scalar,
+)
+from pandas.core.dtypes.dtypes import register_extension_dtype
+
+if tuple(map(int, pandas.__version__.split("."))) < (1, 3):
+ from pandas.core.dtypes.generic import ABCIndexClass
+
+ dtg = ABCIndexClass
+else:
+ from pandas.core.dtypes.generic import ABCIndex
+
+ dtg = ABCIndex
+
+from pandas.core.dtypes.generic import ABCSeries
+from pandas.core.dtypes.missing import isna, notna
+from pandas.core.tools.numeric import to_numeric
+from pandas.util._decorators import cache_readonly
+
+
+class _BoolDtype(ExtensionDtype):
+ """
+ An ExtensionDtype to hold a single size & kind of integer dtype.
+
+ These specific implementations are subclasses of the non-public
+ _BoolDtype. For example we have Int8Dtype to represent signed int 8s.
+
+ The attributes name & type are set when these subclasses are created.
+ """
+
+ name = None # type: str
+ base = None
+ type = None # type: Type
+ na_value = None
+
+ def __repr__(self):
+ """When the user calls `repr(series.dtype)`"""
+ return "BoolDtype()"
+
+ @property
+ def _is_boolean(self) -> bool:
+ """Results in `pandas.api.types.is_boolean_dtype` recognizing this type."""
+ return True
+
+ @cache_readonly
+ def numpy_dtype(self):
+ """Return an instance of our numpy dtype"""
+ return np.dtype(self.type)
+
+ @cache_readonly
+ def kind(self):
+ """When calling `series.dtype.kind`, returns 'b'."""
+ return self.numpy_dtype.kind
+
+ @cache_readonly
+ def itemsize(self):
+ """Return the number of bytes numpy requires to store the bool"""
+ return self.numpy_dtype.itemsize
+
+ @classmethod
+ def construct_array_type(cls):
+ """Return the array type associated with this dtype
+
+ Returns
+ -------
+ type
+ """
+ return BoolArray
+
+
+def boolean_array(values, dtype=None, copy=False):
+ """
+ Infer and return an integer array of the values.
+
+ Parameters
+ ----------
+ values : 1D list-like
+ dtype : dtype, optional
+ dtype to coerce
+ copy : boolean, default False
+
+ Returns
+ -------
+ BoolArray
+
+ Raises
+ ------
+ TypeError if incompatible types
+ """
+ values, mask = coerce_to_array(values, dtype=dtype, copy=copy)
+ return BoolArray(values, mask)
+
+
+def safe_cast(values, dtype, copy):
+ """
+ Safely cast the values to the dtype if they
+ are equivalent, meaning floats must be equivalent to the
+ ints.
+
+ """
+
+ try:
+ return values.astype(dtype, casting="safe", copy=copy)
+ except TypeError:
+
+ casted = values.astype(dtype, copy=copy)
+ if (casted == values).all():
+ return casted
+
+ raise TypeError(
+ "cannot safely cast non-equivalent {} to {}".format(
+ values.dtype, np.dtype(dtype)
+ )
+ )
+
+
+def coerce_to_array(values, dtype, mask=None, copy=False):
+ """
+ Coerce the input values array to numpy arrays with a mask
+
+ Parameters
+ ----------
+ values : 1D list-like
+ dtype : integer dtype
+ mask : boolean 1D array, optional
+ copy : boolean, default False
+ if True, copy the input
+
+ Returns
+ -------
+ tuple of (values, mask)
+ """
+ # if values is integer numpy array, preserve it's dtype
+ if dtype is None and hasattr(values, "dtype"):
+ if is_integer_dtype(values.dtype):
+ dtype = values.dtype
+
+ if dtype is not None:
+ # if isinstance(dtype, str) and (
+ # dtype.startswith("Int") or dtype.startswith("UInt")
+ # ):
+ # # Avoid DeprecationWarning from NumPy about np.dtype("Int64")
+ # # https://github.com/numpy/numpy/pull/7476
+ # dtype = dtype.lower()
+
+ if not issubclass(type(dtype), _BoolDtype):
+ try:
+ dtype = _dtypes[str(np.dtype(dtype))]
+ except KeyError:
+ raise ValueError(f"invalid dtype specified {dtype}")
+
+ if isinstance(values, BoolArray):
+ values, mask = values._data, values._mask
+ if dtype is not None:
+ values = values.astype(dtype.numpy_dtype, copy=False)
+
+ if copy:
+ values = values.copy()
+ mask = mask.copy()
+ return values, mask
+
+ values = np.array(values, copy=copy)
+ if is_object_dtype(values):
+ inferred_type = lib.infer_dtype(values, skipna=True)
+ if inferred_type == "empty":
+ values = np.empty(len(values))
+ values.fill(_BoolDtype.na_value)
+ elif inferred_type not in [
+ "floating",
+ "integer",
+ "boolean",
+ "mixed-integer",
+ "mixed-integer-float",
+ ]:
+ raise TypeError(f"{values.dtype} cannot be converted to an IntegerDtype")
+
+ elif is_bool_dtype(values) and is_integer_dtype(dtype):
+ values = np.array(values, dtype=int, copy=copy)
+
+ elif not (
+ is_integer_dtype(values) or is_float_dtype(values) or is_bool_dtype(values)
+ ):
+ raise TypeError(f"{values.dtype} cannot be converted to an IntegerDtype")
+
+ if mask is None:
+ mask = isna(values)
+ else:
+ assert len(mask) == len(values)
+
+ if not values.ndim == 1:
+ raise TypeError("values must be a 1D list-like")
+ if not mask.ndim == 1:
+ raise TypeError("mask must be a 1D list-like")
+
+ # infer dtype if needed
+ if dtype is None:
+ dtype = np.dtype("bool")
+ else:
+ dtype = dtype.type
+
+ # if we are float, let's make sure that we can
+ # safely cast
+
+ # we copy as need to coerce here
+ if mask.any():
+ values = values.copy()
+ values[mask] = 1
+ values = safe_cast(values, dtype, copy=False)
+ else:
+ values = safe_cast(values, dtype, copy=False)
+
+ return values, mask
+
+
+class BoolArray(ExtensionArray, ExtensionOpsMixin):
+ """
+ Array of integer (optional missing) values.
+
+ .. versionadded:: 0.24.0
+
+ .. warning::
+
+ BoolArray is currently experimental, and its API or internal
+ implementation may change without warning.
+
+ We represent an BoolArray with 2 numpy arrays:
+
+ - data: contains a numpy integer array of the appropriate dtype
+ - mask: a boolean array holding a mask on the data, True is missing
+
+ To construct an BoolArray from generic array-like input, use
+ :func:`pandas.array` with one of the integer dtypes (see examples).
+
+ See :ref:`integer_na` for more.
+
+ Parameters
+ ----------
+ values : numpy.ndarray
+ A 1-d integer-dtype array.
+ mask : numpy.ndarray
+ A 1-d boolean-dtype array indicating missing values.
+ copy : bool, default False
+ Whether to copy the `values` and `mask`.
+
+ Attributes
+ ----------
+ None
+
+ Methods
+ -------
+ None
+
+ Returns
+ -------
+ BoolArray
+
+ Examples
+ --------
+ Create an BoolArray with :func:`pandas.array`.
+
+ >>> int_array = pd.array([1, None, 3], dtype=pd.Int32Dtype())
+ >>> int_array
+
+ [1, NaN, 3]
+ Length: 3, dtype: Int32
+
+ String aliases for the dtypes are also available. They are capitalized.
+
+ >>> pd.array([1, None, 3], dtype='Int32')
+
+ [1, NaN, 3]
+ Length: 3, dtype: Int32
+
+ >>> pd.array([1, None, 3], dtype='UInt16')
+
+ [1, NaN, 3]
+ Length: 3, dtype: UInt16
+ """
+
+ @cache_readonly
+ def dtype(self):
+ return _dtypes[str(self._data.dtype)]
+
+ def __init__(self, values, mask, copy=False):
+ if not (
+ isinstance(values, np.ndarray)
+ and is_integer_dtype(values.dtype)
+ or is_bool_dtype(values.dtype)
+ ):
+ raise TypeError(
+ "values should be integer numpy array. Use "
+ "the 'integer_array' function instead"
+ )
+ if not (isinstance(mask, np.ndarray) and is_bool_dtype(mask.dtype)):
+ raise TypeError(
+ "mask should be boolean numpy array. Use "
+ "the 'integer_array' function instead"
+ )
+
+ if copy:
+ values = values.copy()
+ mask = mask.copy()
+
+ self._data = values
+ self._mask = mask
+
+ @classmethod
+ def _from_sequence(cls, scalars, dtype=None, copy=False):
+ return boolean_array(scalars, dtype=dtype, copy=copy)
+
+ @classmethod
+ def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
+ scalars = to_numeric(strings, errors="raise")
+ return cls._from_sequence(scalars, dtype, copy)
+
+ @classmethod
+ def _from_factorized(cls, values, original):
+ return boolean_array(values, dtype=original.dtype)
+
+ def _formatter(self, boxed=False):
+ def fmt(x):
+ if isna(x):
+ return "None"
+ return str(x)
+
+ return fmt
+
+ def __getitem__(self, item):
+ if is_integer(item):
+ if self._mask[item]:
+ return self.dtype.na_value
+ return self._data[item]
+ return type(self)(self._data[item], self._mask[item])
+
+ def _coerce_to_ndarray(self):
+ """
+ coerce to an ndarary of object dtype
+ """
+
+ # TODO(jreback) make this better
+ data = self._data.astype(object)
+ data[self._mask] = self._na_value
+ return data
+
+ __array_priority__ = 1000 # higher than ndarray so ops dispatch to us
+
+ def __array__(self, dtype=None):
+ """
+ the array interface, return my values
+ We return an object array here to preserve our scalar values
+ """
+ return self._coerce_to_ndarray()
+
+ _HANDLED_TYPES = (np.ndarray, numbers.Number)
+
+ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
+ # For BoolArray inputs, we apply the ufunc to ._data
+ # and mask the result.
+ if method == "reduce":
+ # Not clear how to handle missing values in reductions. Raise.
+ raise NotImplementedError("The 'reduce' method is not supported.")
+ out = kwargs.get("out", ())
+
+ for x in inputs + out:
+ if not isinstance(x, self._HANDLED_TYPES + (BoolArray,)):
+ return NotImplemented
+
+ # for binary ops, use our custom dunder methods
+ result = ops.maybe_dispatch_ufunc_to_dunder_op(
+ self, ufunc, method, *inputs, **kwargs
+ )
+ if result is not NotImplemented:
+ return result
+
+ mask = np.zeros(len(self), dtype=bool)
+ inputs2 = []
+ for x in inputs:
+ if isinstance(x, BoolArray):
+ mask |= x._mask
+ inputs2.append(x._data)
+ else:
+ inputs2.append(x)
+
+ def reconstruct(x):
+ # we don't worry about scalar `x` here, since we
+ # raise for reduce up above.
+
+ if is_integer_dtype(x.dtype):
+ m = mask.copy()
+ return BoolArray(x, m)
+ else:
+ x[mask] = _BoolDtype.na_value
+ return x
+
+ result = getattr(ufunc, method)(*inputs2, **kwargs)
+ if isinstance(result, tuple):
+ tuple(reconstruct(x) for x in result)
+ else:
+ return reconstruct(result)
+
+ def __iter__(self):
+ for i in range(len(self)):
+ if self._mask[i]:
+ yield self.dtype.na_value
+ else:
+ yield self._data[i]
+
+ def take(self, indexer, allow_fill=False, fill_value=None):
+ from pandas.api.extensions import take
+
+ # we always fill with 1 internally
+ # to avoid upcasting
+ data_fill_value = 1 if isna(fill_value) else fill_value
+ result = take(
+ self._data, indexer, fill_value=data_fill_value, allow_fill=allow_fill
+ )
+
+ mask = take(self._mask, indexer, fill_value=True, allow_fill=allow_fill)
+
+ # if we are filling
+ # we only fill where the indexer is null
+ # not existing missing values
+ # TODO(jreback) what if we have a non-na float as a fill value?
+ if allow_fill and notna(fill_value):
+ fill_mask = np.asarray(indexer) == -1
+ result[fill_mask] = fill_value
+ mask = mask ^ fill_mask
+
+ return type(self)(result, mask, copy=False)
+
+ def copy(self):
+ data, mask = self._data, self._mask
+ data = data.copy()
+ mask = mask.copy()
+ return type(self)(data, mask, copy=False)
+
+ def __setitem__(self, key, value):
+ _is_scalar = is_scalar(value)
+ if _is_scalar:
+ value = [value]
+ value, mask = coerce_to_array(value, dtype=self.dtype)
+
+ if _is_scalar:
+ value = value[0]
+ mask = mask[0]
+
+ self._data[key] = value
+ self._mask[key] = mask
+
+ def __len__(self):
+ return len(self._data)
+
+ @property
+ def nbytes(self):
+ return self._data.nbytes + self._mask.nbytes
+
+ def isna(self):
+ return self._mask
+
+ @property
+ def _na_value(self):
+ return _BoolDtype.na_value
+
+ @classmethod
+ def _concat_same_type(cls, to_concat):
+ data = np.concatenate([x._data for x in to_concat])
+ mask = np.concatenate([x._mask for x in to_concat])
+ return cls(data, mask)
+
+ def astype(self, dtype, copy=True):
+ """
+ Cast to a NumPy array or BoolArray with 'dtype'.
+
+ Parameters
+ ----------
+ dtype : str or dtype
+ Typecode or data-type to which the array is cast.
+ copy : bool, default True
+ Whether to copy the data, even if not necessary. If False,
+ a copy is made only if the old dtype does not match the
+ new dtype.
+
+ Returns
+ -------
+ array : ndarray or BoolArray
+ NumPy ndarray or IntergerArray with 'dtype' for its dtype.
+
+ Raises
+ ------
+ TypeError
+ if incompatible type with an IntegerDtype, equivalent of same_kind
+ casting
+ """
+
+ # if we are astyping to an existing IntegerDtype we can fastpath
+ if isinstance(dtype, _BoolDtype):
+ result = self._data.astype(dtype.numpy_dtype, copy=False)
+ return type(self)(result, mask=self._mask, copy=False)
+
+ # coerce
+ data = self._coerce_to_ndarray()
+ return astype_nansafe(data, dtype, copy=None)
+
+ @property
+ def _ndarray_values(self) -> np.ndarray:
+ """Internal pandas method for lossy conversion to a NumPy ndarray.
+
+ This method is not part of the pandas interface.
+
+ The expectation is that this is cheap to compute, and is primarily
+ used for interacting with our indexers.
+ """
+ return self._data
+
+ def value_counts(self, dropna=True):
+ """
+ Returns a Series containing counts of each category.
+
+ Every category will have an entry, even those with a count of 0.
+
+ Parameters
+ ----------
+ dropna : boolean, default True
+ Don't include counts of NaN.
+
+ Returns
+ -------
+ counts : Series
+
+ See Also
+ --------
+ Series.value_counts
+
+ """
+
+ from pandas import Index, Series
+
+ # compute counts on the data with no nans
+ data = self._data[~self._mask]
+ value_counts = Index(data).value_counts()
+ array = value_counts.values
+
+ # TODO(extension)
+ # if we have allow Index to hold an ExtensionArray
+ # this is easier
+ index = value_counts.index.astype(object)
+
+ # if we want nans, count the mask
+ if not dropna:
+ # TODO(extension)
+ # appending to an Index *always* infers
+ # w/o passing the dtype
+ array = np.append(array, [self._mask.sum()])
+ index = Index(
+ np.concatenate([index.values, np.array([np.nan], dtype=object)]),
+ dtype=object,
+ )
+
+ return Series(array, index=index)
+
+ def _values_for_argsort(self) -> np.ndarray:
+ """Return values for sorting.
+
+ Returns
+ -------
+ ndarray
+ The transformed values should maintain the ordering between values
+ within the array.
+
+ See Also
+ --------
+ ExtensionArray.argsort
+ """
+ data = self._data.copy()
+ data[self._mask] = data.min() - 1
+ return data
+
+ @classmethod
+ def _create_comparison_method(cls, op):
+ def cmp_method(self, other):
+ op_name = op.__name__
+ mask = None
+
+ if isinstance(other, (ABCSeries, dtg)):
+ # Rely on pandas to unbox and dispatch to us.
+ return NotImplemented
+
+ if isinstance(other, BoolArray):
+ other, mask = other._data, other._mask
+
+ elif is_list_like(other):
+ other = np.asarray(other)
+ if other.ndim > 0 and len(self) != len(other):
+ raise ValueError("Lengths must match to compare")
+
+ other = lib.item_from_zerodim(other)
+
+ # numpy will show a DeprecationWarning on invalid elementwise
+ # comparisons, this will raise in the future
+ with warnings.catch_warnings():
+ warnings.filterwarnings("ignore", "elementwise", FutureWarning)
+ with np.errstate(all="ignore"):
+ result = op(self._data, other)
+
+ # nans propagate
+ if mask is None:
+ mask = self._mask
+ else:
+ mask = self._mask | mask
+
+ result[mask] = op_name == "ne"
+ return result
+
+ name = f"__{op.__name__}__"
+ return set_function_name(cmp_method, name, cls)
+
+ def _reduce(self, name, skipna=True, **kwargs):
+ data = self._data
+ mask = self._mask
+
+ # coerce to a nan-aware float if needed
+ if mask.any():
+ data = self._data.astype("float64")
+ data[mask] = self._na_value
+
+ op = getattr(nanops, "nan" + name)
+ result = op(data, axis=0, skipna=skipna, mask=mask)
+
+ # if we have a boolean op, don't coerce
+ if name in ["any", "all"]:
+ pass
+
+ # if we have a preservable numeric op,
+ # provide coercion back to an integer type if possible
+ elif name in ["sum", "min", "max", "prod"] and notna(result):
+ int_result = int(result)
+ if int_result == result:
+ result = int_result
+
+ return result
+
+ def _maybe_mask_result(self, result, mask, other, op_name):
+ """
+ Parameters
+ ----------
+ result : array-like
+ mask : array-like bool
+ other : scalar or array-like
+ op_name : str
+ """
+
+ # may need to fill infs
+ # and mask wraparound
+ if is_float_dtype(result):
+ mask |= (result == np.inf) | (result == -np.inf)
+
+ # if we have a float operand we are by-definition
+ # a float result
+ # or our op is a divide
+ if (is_float_dtype(other) or is_float(other)) or (
+ op_name in ["rtruediv", "truediv"]
+ ):
+ result[mask] = np.nan
+ return result
+
+ return type(self)(result, mask, copy=False)
+
+ @classmethod
+ def _create_arithmetic_method(cls, op):
+ def integer_arithmetic_method(self, other):
+
+ op_name = op.__name__
+ mask = None
+
+ if isinstance(other, (ABCSeries, dtg)):
+ # Rely on pandas to unbox and dispatch to us.
+ return NotImplemented
+
+ if getattr(other, "ndim", 0) > 1:
+ raise NotImplementedError("can only perform ops with 1-d structures")
+
+ if isinstance(other, BoolArray):
+ other, mask = other._data, other._mask
+
+ elif getattr(other, "ndim", None) == 0:
+ other = other.item()
+
+ elif is_list_like(other):
+ other = np.asarray(other)
+ if not other.ndim:
+ other = other.item()
+ elif other.ndim == 1:
+ if not (is_float_dtype(other) or is_integer_dtype(other)):
+ raise TypeError("can only perform ops with numeric values")
+ else:
+ if not (is_float(other) or is_integer(other)):
+ raise TypeError("can only perform ops with numeric values")
+
+ # nans propagate
+ if mask is None:
+ mask = self._mask
+ else:
+ mask = self._mask | mask
+
+ # 1 ** np.nan is 1. So we have to unmask those.
+ if op_name == "pow":
+ mask = np.where(self == 1, False, mask)
+
+ elif op_name == "rpow":
+ mask = np.where(other == 1, False, mask)
+
+ with np.errstate(all="ignore"):
+ result = op(self._data, other)
+
+ # divmod returns a tuple
+ if op_name == "divmod":
+ div, mod = result
+ return (
+ self._maybe_mask_result(div, mask, other, "floordiv"),
+ self._maybe_mask_result(mod, mask, other, "mod"),
+ )
+
+ return self._maybe_mask_result(result, mask, other, op_name)
+
+ name = f"__{op.__name__}__"
+ return set_function_name(integer_arithmetic_method, name, cls)
+
+
+BoolArray._add_arithmetic_ops()
+BoolArray._add_comparison_ops()
+
+_dtype_docstring = """
+An ExtensionDtype for {dtype} data.
+
+Attributes
+----------
+None
+
+Methods
+-------
+None
+"""
+
+# create the Dtype
+BoolDtype = register_extension_dtype(
+ type(
+ "BoolDtype",
+ (_BoolDtype,),
+ {
+ "type": np.bool_,
+ "name": "Bool",
+ "__doc__": _dtype_docstring.format(dtype="bool"),
+ },
+ )
+)
+
+_dtypes = {"bool": BoolDtype()}
diff --git a/build/lib/visions/functional.py b/build/lib/visions/functional.py
new file mode 100644
index 000000000..005d5e9de
--- /dev/null
+++ b/build/lib/visions/functional.py
@@ -0,0 +1,140 @@
+from typing import Dict, List, Sequence, Tuple, Type, Union
+
+import pandas as pd
+
+from visions.types.type import VisionsBaseType
+from visions.typesets.typeset import VisionsTypeset
+
+T = Type[VisionsBaseType]
+
+
+def cast_to_detected(data: Sequence, typeset: VisionsTypeset) -> Sequence:
+ """Casts a DataFrame into a typeset by first performing column wise type detection against
+ a provided typeset
+
+ Args:
+ data: the DataFrame to cast
+ typeset: the Typeset in which we cast
+
+ Returns:
+ A tuple of the casted DataFrame and the types to which the columns were cast
+ """
+ return typeset.cast_to_detected(data)
+
+
+def cast_to_inferred(data: Sequence, typeset: VisionsTypeset) -> Sequence:
+ """Casts a DataFrame into a typeset by first performing column wise type inference against
+ a provided typeset
+
+ Args:
+ data: the DataFrame to cast
+ typeset: the Typeset in which we cast
+
+ Returns:
+ A tuple of the casted DataFrame and the types to which the columns were cast
+ """
+ return typeset.cast_to_inferred(data)
+
+
+def infer_type(data: Sequence, typeset: VisionsTypeset) -> Union[Dict[str, T], T]:
+ """Infer the current types of each column in the DataFrame given the typeset.
+
+ Args:
+ data: the DataFrame to infer types on
+ typeset: the Typeset that provides the type context
+
+ Returns:
+ A dictionary with a mapping from column name to type
+ """
+ return typeset.infer_type(data)
+
+
+def detect_type(data: Sequence, typeset: VisionsTypeset) -> Union[Dict[str, T], T]:
+ """Detect the type in the base graph
+
+ Args:
+ data: the DataFrame to detect types on
+ typeset: the Typeset that provides the type context
+
+ Returns:
+ A dictionary with a mapping from column name to type
+ """
+ return typeset.detect_type(data)
+
+
+def compare_detect_inference_frame(
+ data: Sequence, typeset: VisionsTypeset
+) -> List[Tuple[str, T, T]]:
+ """Compare the types given by inference on the base graph and the relational graph
+
+ Args:
+ data: the sequence to detect types on
+ typeset: the Typeset that provides the type context
+
+ Examples:
+ >>> for column, type_before, type_after in compare_detect_inference_frame(data, typeset):
+ >>> print(f"{column} was {type_before} is {type_after}")
+
+ See Also:
+ :doc:`type_inference_report_frame `:
+ Formatted report of the output of this function
+ """
+ comparisons = []
+ detected_types = detect_type(data, typeset)
+ inferred_types = infer_type(data, typeset)
+
+ assert isinstance(detected_types, dict) and isinstance(
+ inferred_types, dict
+ ) # Placate the MyPy Gods
+
+ for key in detected_types.keys() & inferred_types.keys():
+ comparisons.append((key, detected_types[key], inferred_types[key]))
+ return comparisons
+
+
+# TODO: make independent of pandas
+def type_inference_report_frame(df: pd.DataFrame, typeset: VisionsTypeset) -> str:
+ """Return formatted report of the output of `compare_detect_inference_frame`.
+
+ Args:
+ df: the DataFrame to detect types on
+ typeset: the Typeset that provides the type context
+
+ Returns:
+ Text-based comparative type inference report
+
+ Examples:
+ >>> import pandas as pd
+ >>> from visions.functional import type_inference_report_frame
+ >>> from visions.typesets import StandardSet
+ >>>
+ >>> typeset = StandardSet()
+ >>> df = pd.read_csv('dataset.csv')
+ >>>
+ >>> report = type_inference_report_frame(df, typeset)
+ >>> print(report)
+ """
+ padding = 5
+ max_column_length = max(len(column) for column in df.columns) + padding
+ max_type_length = 30
+
+ report = ""
+ change_count = 0
+ for column, type_before, type_after in compare_detect_inference_frame(df, typeset):
+ changed = type_before != type_after
+ if changed:
+ fill = "!="
+ change_count += 1
+ else:
+ fill = "=="
+ report += (
+ f"{column: <{max_column_length}} {type_before: <{max_type_length}} "
+ f"{fill} "
+ f"{type_after: <{max_type_length}} \n"
+ )
+ report += (
+ "In total {change_count} out of {type_count} types were changed.\n".format(
+ change_count=change_count, type_count=len(df.columns)
+ )
+ )
+ return report
diff --git a/build/lib/visions/py.typed b/build/lib/visions/py.typed
new file mode 100644
index 000000000..e69de29bb
diff --git a/build/lib/visions/relations/__init__.py b/build/lib/visions/relations/__init__.py
new file mode 100644
index 000000000..b48c2456d
--- /dev/null
+++ b/build/lib/visions/relations/__init__.py
@@ -0,0 +1,13 @@
+"""This module contains (predefined) relations."""
+
+from visions.relations.relations import (
+ IdentityRelation,
+ InferenceRelation,
+ TypeRelation,
+)
+
+__all__ = [
+ "IdentityRelation",
+ "InferenceRelation",
+ "TypeRelation",
+]
diff --git a/build/lib/visions/relations/relations.py b/build/lib/visions/relations/relations.py
new file mode 100644
index 000000000..e74d5fe40
--- /dev/null
+++ b/build/lib/visions/relations/relations.py
@@ -0,0 +1,88 @@
+from typing import Any, Callable, Optional, TypeVar
+
+import attr
+from multimethod import multimethod
+
+T = TypeVar("T")
+
+
+def func_repr(func: Callable) -> str:
+ return func.__name__ if hasattr(func, "__name__") else "lambda"
+
+
+def identity_transform(series: Any, state: dict = dict()) -> Any:
+ return series
+
+
+def default_relation(series: Any, state: dict = dict()) -> bool:
+ raise NotImplementedError
+
+
+@attr.s(frozen=True)
+class TypeRelation:
+ """Relationship encoder between implementations of :class:`visions.types.type.VisionsBaseType`
+
+ Defines a one to one relationship between two :class:`visions.types.type.VisionsBaseType` implementations,
+ A and B, with respect to an underlying data series. In order to define a relationship we need
+ two methods:
+
+ - **is_relationship**, determines whether a series of type B can be alternatively represented as type A.
+ - **transform**, provides a mechanism to convert the series from B -> A.
+
+ For example, the series `pd.Series([1.0, 2.0, 3.0])` is encoded as a sequence of
+ floats but in reality they are all integers.
+
+ Examples:
+ >>> from visions.types import Integer, Float
+ >>> x = pd.Series([1.0, 2.0, 3.0])
+ >>> state = dict()
+ >>> relation = TypeRelation(Integer, Float)
+ >>> relation.is_relation(x, state)
+ True
+
+ >>> relation.transform(x, state)
+ pd.Series([1, 2, 3])
+ """
+
+ related_type = attr.ib()
+ inferential: bool = attr.ib()
+ transformer: Callable[[T, dict], T] = attr.ib(
+ converter=multimethod, repr=func_repr # type: ignore
+ )
+ relationship: Callable[[Any, dict], bool] = attr.ib(
+ default=default_relation, converter=multimethod, repr=func_repr # type: ignore
+ )
+ type = attr.ib(default=None)
+
+ def is_relation(self, series: Any, state: Optional[dict] = None) -> bool:
+ if state is None:
+ state = {}
+ return self.relationship(series, state)
+
+ def transform(self, series: T, state: Optional[dict] = None) -> T:
+ if state is None:
+ state = {}
+ return self.transformer(series, state)
+
+ def __str__(self):
+ return f"{self.related_type}->{self.type}"
+
+
+@attr.s(frozen=True)
+class IdentityRelation(TypeRelation):
+ relationship: Callable[[T, dict], bool] = attr.ib(repr=func_repr, default=None)
+ transformer: Callable[[T, dict], T] = attr.ib(
+ default=identity_transform, repr=func_repr
+ )
+ inferential: bool = attr.ib(default=False)
+
+
+@attr.s(frozen=True)
+class InferenceRelation(TypeRelation):
+ relationship: Callable[[T, dict], bool] = attr.ib(
+ repr=func_repr, default=default_relation
+ )
+ transformer: Callable[[T, dict], T] = attr.ib(
+ repr=func_repr, default=identity_transform
+ )
+ inferential: bool = attr.ib(default=True)
diff --git a/build/lib/visions/test/__init__.py b/build/lib/visions/test/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/build/lib/visions/test/data/__init__.py b/build/lib/visions/test/data/__init__.py
new file mode 100644
index 000000000..cc9602125
--- /dev/null
+++ b/build/lib/visions/test/data/__init__.py
@@ -0,0 +1 @@
+"""Small files used for test sequences"""
diff --git a/build/lib/visions/test/data/file.html b/build/lib/visions/test/data/file.html
new file mode 100644
index 000000000..e69de29bb
diff --git a/build/lib/visions/test/data/img.jpeg b/build/lib/visions/test/data/img.jpeg
new file mode 100644
index 000000000..f835344f1
Binary files /dev/null and b/build/lib/visions/test/data/img.jpeg differ
diff --git a/build/lib/visions/test/data/img.jpg b/build/lib/visions/test/data/img.jpg
new file mode 100644
index 000000000..f835344f1
Binary files /dev/null and b/build/lib/visions/test/data/img.jpg differ
diff --git a/build/lib/visions/test/data/img.png b/build/lib/visions/test/data/img.png
new file mode 100644
index 000000000..c5916f289
Binary files /dev/null and b/build/lib/visions/test/data/img.png differ
diff --git a/build/lib/visions/test/series.py b/build/lib/visions/test/series.py
new file mode 100644
index 000000000..fe1557775
--- /dev/null
+++ b/build/lib/visions/test/series.py
@@ -0,0 +1,21 @@
+from typing import Dict
+
+import pandas as pd
+
+
+def get_series() -> Dict[str, pd.Series]:
+ from visions.backends.numpy.sequences import get_sequences as get_numpy_sequences
+ from visions.backends.pandas.sequences import get_sequences as get_pandas_sequences
+ from visions.backends.python.sequences import get_sequences as get_builtin_sequences
+
+ sequences = get_builtin_sequences()
+ sequences.update(get_numpy_sequences())
+
+ test_series = {}
+ for name, sequence in sequences.items():
+ test_series[name] = pd.Series(sequence)
+
+ test_series.update(get_pandas_sequences())
+ assert all(isinstance(v, pd.Series) for v in test_series.values())
+
+ return test_series
diff --git a/build/lib/visions/test/series_geometry.py b/build/lib/visions/test/series_geometry.py
new file mode 100644
index 000000000..6bb7c0418
--- /dev/null
+++ b/build/lib/visions/test/series_geometry.py
@@ -0,0 +1,29 @@
+from typing import Dict
+
+import pandas as pd
+
+
+def get_geometry_series() -> Dict[str, pd.Series]:
+ from shapely import wkt
+
+ series = {
+ "geometry_string_series": pd.Series(
+ ["POINT (-92 42)", "POINT (-92 42.1)", "POINT (-92 42.2)"],
+ ),
+ "geometry_series": pd.Series(
+ [
+ wkt.loads("POINT (-92 42)"),
+ wkt.loads("POINT (-92 42.1)"),
+ wkt.loads("POINT (-92 42.2)"),
+ ],
+ ),
+ "geometry_series_missing": pd.Series(
+ [
+ wkt.loads("POINT (-92 42)"),
+ wkt.loads("POINT (-92 42.1)"),
+ wkt.loads("POINT (-92 42.2)"),
+ None,
+ ],
+ ),
+ }
+ return series
diff --git a/build/lib/visions/test/series_sparse.py b/build/lib/visions/test/series_sparse.py
new file mode 100644
index 000000000..83ada9cc4
--- /dev/null
+++ b/build/lib/visions/test/series_sparse.py
@@ -0,0 +1,56 @@
+from typing import Dict
+
+import numpy as np
+import pandas as pd
+
+from visions.backends.pandas.test_utils import pandas_version
+
+not_pandas_1_0_5 = not (
+ (pandas_version[0] == 1) and (pandas_version[1] == 0) and (pandas_version[2] == 5)
+)
+
+
+def get_sparse_series() -> Dict[str, pd.Series]:
+ test_series = {
+ "int_sparse": pd.Series([-1, 0, 1, 2, 3], dtype=pd.SparseDtype(np.int32, 0)),
+ "float_sparse": pd.Series(
+ [np.nan, 0.2, 1, 2, 3],
+ dtype=pd.SparseDtype(np.float64, np.nan),
+ ),
+ "complex_sparse": pd.Series(
+ [np.nan, complex(0, 1), complex(1, -1), complex(2, 4), complex(3, -12)],
+ dtype=pd.SparseDtype(np.complex128, np.nan),
+ ),
+ "bool_sparse": pd.Series(
+ [True, False, False],
+ dtype=pd.SparseDtype(np.bool_, False),
+ ),
+ "str_obj_sparse": pd.Series(
+ pd.arrays.SparseArray([None, None, "gold", "black", "silver"]),
+ ),
+ # Pending https://github.com/pandas-dev/pandas/issues/35762
+ # pd.Series([None, 0, 1, 2, 3, 4], name="datetime_sparse", dtype=pd.SparseDtype(np.datetime64)),
+ # Pandas dtypes
+ "pd_int64_sparse": pd.Series(
+ [0, 1, 2, 3, None],
+ dtype=pd.SparseDtype("int", np.nan),
+ ),
+ # Pending https://github.com/pandas-dev/pandas/issues/35793
+ # pd.Series(
+ # ["a", "b", "c", None],
+ # name="pd_categorical_sparse",
+ # dtype=pd.SparseDtype(pd.CategoricalDtype(['a', 'b', 'c', 'd']))
+ # )
+ }
+
+ if pandas_version[0] >= 1 and not_pandas_1_0_5:
+ test_series["pd_string_sparse"] = pd.Series(
+ ["Patty", "Valentine", "Upper", "", "", ""],
+ dtype=pd.SparseDtype(pd.StringDtype(), ""),
+ )
+ test_series["pd_bool_sparse"] = pd.Series(
+ [True, False, False, None],
+ dtype=pd.SparseDtype("bool", pd.NA),
+ )
+
+ return test_series
diff --git a/build/lib/visions/test/utils.py b/build/lib/visions/test/utils.py
new file mode 100644
index 000000000..8d30a41a9
--- /dev/null
+++ b/build/lib/visions/test/utils.py
@@ -0,0 +1,246 @@
+from typing import Any, Dict, Iterable, Optional, Sequence, Set, Tuple, Type
+
+import networkx as nx
+import pandas as pd
+import pytest
+
+from visions import VisionsBaseType, VisionsTypeset
+
+T = Type[VisionsBaseType]
+
+
+def is_iter(v: Any) -> bool:
+ return isinstance(v, Iterable) and not isinstance(v, (str, bytes))
+
+
+def sequences_equal(s1: Sequence, s2: Sequence) -> bool:
+ for v1, v2 in zip(s1, s2):
+ if is_iter(v1) and is_iter(v2):
+ if not sequences_equal(v1, v2):
+ return False
+ elif not (pd.isna(v1) and pd.isna(v2)) and not v1 == v2:
+ return False
+
+ return True
+
+
+def all_series_included(
+ series_list: Dict[str, Sequence], series_map: Dict[T, Set[str]]
+):
+ """Check that all names are indeed used"""
+ used_names = {name for names in series_map.values() for name in names}
+ names = set(series_list.keys())
+
+ if not names == used_names:
+ unused = names - used_names
+ not_provided = used_names - names
+ # TODO: warning?
+ if len(unused) > 0:
+ raise ValueError(f"{len(unused)} series not included in tests {unused}")
+ if len(not_provided) > 0:
+ raise ValueError(
+ f"{len(not_provided)} series are included, not not provided {not_provided}"
+ )
+
+
+def get_contains_cases(
+ _test_suite: Dict[str, Sequence],
+ _series_map: Dict[T, Set[str]],
+ typeset: VisionsTypeset,
+):
+ """Parametrize contains tests
+
+ Args:
+ _test_suite: mapping from sequence identifiers to sequences
+ _series_map: mapping from type to a set of sequence identifiers
+ typeset: A VisionsTypeset
+
+ Returns:
+ the args for the generated tests
+ """
+
+ # Include children's series in parent
+ reversed_topological_edge_sort = list(
+ reversed(list(nx.topological_sort(nx.line_graph(typeset.base_graph))))
+ )
+ for parent, child in reversed_topological_edge_sort:
+ _series_map[parent] |= _series_map[child]
+
+ all_series_included(_test_suite, _series_map)
+
+ argsvalues = []
+ for name, item in _test_suite.items():
+ for type, series_list in _series_map.items():
+ args: Dict[str, Any] = {"id": f"{name} x {type}"}
+
+ member = name in series_list
+ argsvalues.append(pytest.param(name, item, type, member, **args))
+
+ return {
+ "argnames": ["name", "series", "contains_type", "member"],
+ "argvalues": argsvalues,
+ }
+
+
+def contains(name: str, series: Sequence, type: T, member: bool) -> Tuple[bool, str]:
+ return (
+ member == (series in type),
+ f"{name} in {type}; expected {member}, got {series in type}",
+ )
+
+
+def get_inference_cases(
+ _test_suite: Dict[str, Sequence],
+ inferred_series_type_map: Dict[str, T],
+ typeset: VisionsTypeset,
+) -> Dict[str, Any]:
+ argsvalues = []
+ for name, series in _test_suite.items():
+ if name not in inferred_series_type_map:
+ raise ValueError(
+ f"{name} has no defined inference type, please add one to the test case mapping"
+ )
+
+ expected_type = inferred_series_type_map[name]
+ for test_type in typeset.types:
+ expected = test_type == expected_type
+ args: Dict[str, Any] = {"id": f"{name} x {test_type} expected {expected}"}
+ difference = test_type != expected_type
+ argsvalues.append(
+ pytest.param(name, series, test_type, typeset, difference, **args)
+ )
+ return {
+ "argnames": "name,series,inference_type,typeset,difference",
+ "argvalues": argsvalues,
+ }
+
+
+def infers(
+ name: str,
+ series: Sequence,
+ expected_type: T,
+ typeset: VisionsTypeset,
+ difference: bool,
+) -> Tuple[bool, str]:
+ from visions.typesets.typeset import get_type_from_path
+
+ _, paths, _ = typeset.infer(series)
+ inferred_type = get_type_from_path(paths)
+
+ # inferred_type = typeset.infer_type(series)
+ return (
+ (inferred_type == expected_type) != difference,
+ f"inference of {name} expected {expected_type} to be {not difference} (typeset={typeset}). Path: {paths}",
+ )
+ # return series in inferred_type, f"series should be member of inferred type"
+
+
+def all_relations_tested(series_map, typeset):
+ # Convert data structure for mapping
+ series_map_lookup = {}
+ for map_to_type, map_from_type, items in series_map:
+ try:
+ series_map_lookup[map_to_type][map_from_type] = items
+ except KeyError:
+ series_map_lookup[map_to_type] = {map_from_type: items}
+
+ missing_relations = set()
+ for node in typeset.types:
+ for relation in node.relations:
+ from_type, to_type = relation.related_type, relation.type
+ if relation.inferential and (
+ to_type not in series_map_lookup
+ or from_type not in series_map_lookup[to_type]
+ or len(series_map_lookup[to_type][from_type]) == 0
+ ):
+ missing_relations.add(str(relation))
+
+ if len(missing_relations) > 0:
+ raise ValueError(
+ f"Not all inferential relations are tested {missing_relations}"
+ )
+
+
+def get_convert_cases(_test_suite, _series_map, typeset):
+ all_relations_tested(_series_map, typeset)
+
+ argsvalues = []
+ for name, item in _test_suite.items():
+ for source_type, relation_type, series_list in _series_map:
+ for namex in series_list:
+ if namex not in _test_suite.keys():
+ raise ValueError(
+ f"{namex} specified in convert_map, but not in provided sequences"
+ )
+
+ if item in relation_type:
+ args: Dict[str, Any] = {
+ "id": f"{name}: {relation_type} -> {source_type}"
+ }
+ member = name in series_list
+ argsvalues.append(
+ pytest.param(name, source_type, relation_type, item, member, **args)
+ )
+
+ return dict(
+ argnames=["name", "source_type", "relation_type", "series", "member"],
+ argvalues=argsvalues,
+ )
+
+
+def convert(
+ name: str, source_type: T, relation_type: T, series: Sequence, member: bool
+) -> Tuple[bool, str]:
+ relation = source_type.relations.get(relation_type, None)
+ is_relation = False if relation is None else relation.is_relation(series, {})
+
+ if not member:
+ return (
+ (not is_relation),
+ f"{source_type}, {relation}, {member}, {name}, {series}",
+ )
+ else:
+ # Note that the transformed series is not exactly the cast series
+ transformed_series = list(relation.transform(series, {}))
+
+ return (
+ is_relation,
+ f"Relationship {relation} for {series} tested false (but shouldn't have). "
+ f"Transform result would have been {transformed_series}",
+ )
+
+
+def get_cast_cases(_test_suite: Dict[str, Sequence], _results: Dict) -> Dict:
+ argsvalues = []
+ for name, item in _test_suite.items():
+ changed = name in _results
+ value = _results.get(name, "")
+ args: Dict[str, Any] = {"id": f"{name}: {changed}"}
+ argsvalues.append(pytest.param(name, item, value, **args))
+
+ return dict(
+ argnames=["name", "series", "expected"],
+ argvalues=argsvalues,
+ )
+
+
+def cast(
+ name: str,
+ series: Sequence,
+ typeset: VisionsTypeset,
+ expected: Optional[pd.Series] = None,
+) -> Tuple[bool, str]:
+ result = typeset.cast_to_inferred(series)
+ # TODO: if error also print Path
+ if expected is None:
+ v = sequences_equal(result, series)
+ m = f"Series {name} cast expected {series} (no casting) got {result}"
+
+ if v:
+ v = id(series) == id(result)
+ m = f"Series {name} memory addresses are not equal, while return value was"
+ else:
+ v = sequences_equal(result, expected)
+ m = f"Series {name} cast expected {expected} got {result}"
+
+ return v, m
diff --git a/build/lib/visions/types/__init__.py b/build/lib/visions/types/__init__.py
new file mode 100644
index 000000000..8ab058fa4
--- /dev/null
+++ b/build/lib/visions/types/__init__.py
@@ -0,0 +1,53 @@
+from visions.types.boolean import Boolean
+from visions.types.categorical import Categorical
+from visions.types.complex import Complex
+from visions.types.count import Count
+from visions.types.date import Date
+from visions.types.date_time import DateTime
+from visions.types.email_address import EmailAddress
+from visions.types.file import File
+from visions.types.float import Float
+from visions.types.generic import Generic
+from visions.types.geometry import Geometry
+from visions.types.image import Image
+from visions.types.integer import Integer
+from visions.types.ip_address import IPAddress
+from visions.types.numeric import Numeric
+from visions.types.object import Object
+from visions.types.ordinal import Ordinal
+from visions.types.path import Path
+from visions.types.sparse import Sparse
+from visions.types.string import String
+from visions.types.time import Time
+from visions.types.time_delta import TimeDelta
+from visions.types.type import VisionsBaseType
+from visions.types.url import URL
+from visions.types.uuid import UUID
+
+__all__ = [
+ "VisionsBaseType",
+ "Generic",
+ "String",
+ "Boolean",
+ "Categorical",
+ "Complex",
+ "Count",
+ "Date",
+ "DateTime",
+ "File",
+ "Float",
+ "Geometry",
+ "Image",
+ "Integer",
+ "IPAddress",
+ "Object",
+ "Ordinal",
+ "Path",
+ "TimeDelta",
+ "UUID",
+ "URL",
+ "Time",
+ "EmailAddress",
+ "Sparse",
+ "Numeric",
+]
diff --git a/build/lib/visions/types/boolean.py b/build/lib/visions/types/boolean.py
new file mode 100644
index 000000000..fc5a2dbb2
--- /dev/null
+++ b/build/lib/visions/types/boolean.py
@@ -0,0 +1,38 @@
+from typing import Any, Sequence
+
+from multimethod import multimethod
+
+from visions.relations import IdentityRelation, InferenceRelation, TypeRelation
+from visions.types.generic import Generic
+from visions.types.object import Object
+from visions.types.string import String
+from visions.types.type import VisionsBaseType
+
+
+class Boolean(VisionsBaseType):
+ """**Boolean** implementation of :class:`visions.types.type.VisionsBaseType`.
+
+ Examples:
+ >>> import visions
+ >>> x = [True, False, False, True]
+ >>> x in visions.Boolean
+ True
+
+ >>> x = [True, False, None]
+ >>> x in visions.Boolean
+ True
+ """
+
+ @staticmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ relations = [
+ IdentityRelation(Generic),
+ InferenceRelation(String),
+ InferenceRelation(Object),
+ ]
+ return relations
+
+ @staticmethod
+ @multimethod
+ def contains_op(item: Any, state: dict) -> bool:
+ pass
diff --git a/build/lib/visions/types/categorical.py b/build/lib/visions/types/categorical.py
new file mode 100644
index 000000000..6826ef513
--- /dev/null
+++ b/build/lib/visions/types/categorical.py
@@ -0,0 +1,29 @@
+from typing import Any, Sequence
+
+from multimethod import multimethod
+
+from visions.relations import IdentityRelation, TypeRelation
+from visions.types.generic import Generic
+from visions.types.type import VisionsBaseType
+
+
+class Categorical(VisionsBaseType):
+ """**Categorical** implementation of :class:`visions.types.type.VisionsBaseType`.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import visions
+ >>> x = pd.Series([True, False, 1], dtype='category')
+ >>> x in visions.Categorical
+ True
+ """
+
+ @staticmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ relations = [IdentityRelation(Generic)]
+ return relations
+
+ @staticmethod
+ @multimethod
+ def contains_op(item: Any, state: dict) -> bool:
+ pass
diff --git a/build/lib/visions/types/complex.py b/build/lib/visions/types/complex.py
new file mode 100644
index 000000000..1da4f77c9
--- /dev/null
+++ b/build/lib/visions/types/complex.py
@@ -0,0 +1,31 @@
+from typing import Any, Sequence
+
+from multimethod import multimethod
+
+from visions.relations import IdentityRelation, InferenceRelation, TypeRelation
+from visions.types.generic import Generic
+from visions.types.string import String
+from visions.types.type import VisionsBaseType
+
+
+class Complex(VisionsBaseType):
+ """**Complex** implementation of :class:`visions.types.type.VisionsBaseType`.
+
+ Examples:
+ >>> x = [complex(0, 0), complex(1, 2), complex(3, -1)]
+ >>> x in visions.Complex
+ True
+ """
+
+ @staticmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ relations = [
+ IdentityRelation(Generic),
+ InferenceRelation(String),
+ ]
+ return relations
+
+ @staticmethod
+ @multimethod
+ def contains_op(item: Any, state: dict) -> bool:
+ pass
diff --git a/build/lib/visions/types/count.py b/build/lib/visions/types/count.py
new file mode 100644
index 000000000..33a9a79fe
--- /dev/null
+++ b/build/lib/visions/types/count.py
@@ -0,0 +1,27 @@
+from typing import Any, Sequence
+
+from multimethod import multimethod
+
+from visions.relations import IdentityRelation, TypeRelation
+from visions.types.integer import Integer
+from visions.types.type import VisionsBaseType
+
+
+class Count(VisionsBaseType):
+ """**Count** (positive integer) implementation of :class:`visions.types.type.VisionsBaseType`.
+
+ Examples:
+ >>> x = [1, 4, 10, 20]
+ >>> x in visions.Count
+ True
+ """
+
+ @staticmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ relations = [IdentityRelation(Integer)]
+ return relations
+
+ @staticmethod
+ @multimethod
+ def contains_op(item: Any, state: dict) -> bool:
+ pass
diff --git a/build/lib/visions/types/date.py b/build/lib/visions/types/date.py
new file mode 100644
index 000000000..00922bb50
--- /dev/null
+++ b/build/lib/visions/types/date.py
@@ -0,0 +1,34 @@
+from typing import Any, Sequence
+
+from multimethod import multimethod
+
+from visions.relations import IdentityRelation, InferenceRelation, TypeRelation
+from visions.types.date_time import DateTime
+from visions.types.object import Object
+from visions.types.type import VisionsBaseType
+
+
+class Date(VisionsBaseType):
+ """**Date** implementation of :class:`visions.types.type.VisionsBaseType`.
+ All values are should be datetime.date or missing
+
+ Examples:
+ >>> import datetime
+ >>> import visions
+ >>> x = [datetime.date(2017, 3, 5), datetime.date(2019, 12, 4)]
+ >>> x in visions.Date
+ True
+ """
+
+ @staticmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ relations = [
+ IdentityRelation(Object),
+ InferenceRelation(DateTime),
+ ]
+ return relations
+
+ @staticmethod
+ @multimethod
+ def contains_op(item: Any, state: dict) -> bool:
+ pass
diff --git a/build/lib/visions/types/date_time.py b/build/lib/visions/types/date_time.py
new file mode 100644
index 000000000..1698379a2
--- /dev/null
+++ b/build/lib/visions/types/date_time.py
@@ -0,0 +1,33 @@
+from typing import Any, Sequence
+
+from multimethod import multimethod
+
+from visions.relations import IdentityRelation, InferenceRelation, TypeRelation
+from visions.types.generic import Generic
+from visions.types.string import String
+from visions.types.type import VisionsBaseType
+
+
+class DateTime(VisionsBaseType):
+ """**Datetime** implementation of :class:`visions.types.type.VisionsBaseType`.
+
+ Examples:
+ >>> import datetime
+ >>> import visions
+ >>> x = [datetime.datetime(2017, 3, 5), datetime.datetime(2019, 12, 4)]
+ >>> x in visions.DateTime
+ True
+ """
+
+ @staticmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ relations = [
+ IdentityRelation(Generic),
+ InferenceRelation(String),
+ ]
+ return relations
+
+ @staticmethod
+ @multimethod
+ def contains_op(item: Any, state: dict) -> bool:
+ pass
diff --git a/build/lib/visions/types/email_address.py b/build/lib/visions/types/email_address.py
new file mode 100644
index 000000000..6720c7127
--- /dev/null
+++ b/build/lib/visions/types/email_address.py
@@ -0,0 +1,57 @@
+from typing import Any, Sequence
+
+import attr
+from multimethod import multimethod
+
+from visions.relations import IdentityRelation, InferenceRelation, TypeRelation
+from visions.types.object import Object
+from visions.types.string import String
+from visions.types.type import VisionsBaseType
+
+
+@attr.s(slots=True)
+class FQDA:
+ local = attr.ib()
+ fqdn = attr.ib()
+
+ @staticmethod
+ def from_str(s):
+ return _to_email(s)
+
+
+def _to_email(s) -> FQDA:
+ if isinstance(s, FQDA):
+ return s
+ elif isinstance(s, str):
+ return FQDA(*s.split("@", maxsplit=1))
+ else:
+ raise TypeError("Only strings supported")
+
+
+class EmailAddress(VisionsBaseType):
+ """**EmailAddress** implementation of :class:`visions.types.type.VisionsBaseType`.
+
+ Notes:
+ The email address should be a **fully qualified domain address** (FQDA)
+ FQDA = local part + @ + fully qualified domain name (FQDN)
+ This type
+
+ Examples:
+ >>> import visions
+ >>> x = [FQDA('example','gmail.com'), FQDA.from_str('example@protonmail.com')]
+ >>> x in visions.EmailAddress
+ True
+ """
+
+ @staticmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ relations = [
+ IdentityRelation(Object),
+ InferenceRelation(String),
+ ]
+ return relations
+
+ @staticmethod
+ @multimethod
+ def contains_op(item: Any, state: dict) -> bool:
+ pass
diff --git a/build/lib/visions/types/file.py b/build/lib/visions/types/file.py
new file mode 100644
index 000000000..f8f8388c4
--- /dev/null
+++ b/build/lib/visions/types/file.py
@@ -0,0 +1,29 @@
+from typing import Any, Sequence
+
+from multimethod import multimethod
+
+from visions.relations import IdentityRelation, TypeRelation
+from visions.types.path import Path
+from visions.types.type import VisionsBaseType
+
+
+class File(VisionsBaseType):
+ """**File** implementation of :class:`visions.types.type.VisionsBaseType`.
+ (i.e. existing path)
+
+ Examples:
+ >>> import pathlib
+ >>> x = [pathlib.Path('/home/user/file.txt'), pathlib.Path('/home/user/test2.txt')]
+ >>> x in visions.File
+ True
+ """
+
+ @staticmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ relations = [IdentityRelation(Path)]
+ return relations
+
+ @staticmethod
+ @multimethod
+ def contains_op(item: Any, state: dict) -> bool:
+ pass
diff --git a/build/lib/visions/types/float.py b/build/lib/visions/types/float.py
new file mode 100644
index 000000000..b52d3537f
--- /dev/null
+++ b/build/lib/visions/types/float.py
@@ -0,0 +1,34 @@
+from typing import Any, Sequence
+
+from multimethod import multimethod
+
+from visions.relations import IdentityRelation, InferenceRelation, TypeRelation
+from visions.types.complex import Complex
+from visions.types.generic import Generic
+from visions.types.string import String
+from visions.types.type import VisionsBaseType
+
+
+class Float(VisionsBaseType):
+ """**Float** implementation of :class:`visions.types.type.VisionsBaseType`.
+
+ Examples:
+ >>> import visions
+ >>> x = [1.0, 2.5, 5.0]
+ >>> x in visions.Float
+ True
+ """
+
+ @staticmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ relations = [
+ IdentityRelation(Generic),
+ InferenceRelation(String),
+ InferenceRelation(Complex),
+ ]
+ return relations
+
+ @staticmethod
+ @multimethod
+ def contains_op(item: Any, state: dict) -> bool:
+ pass
diff --git a/build/lib/visions/types/generic.py b/build/lib/visions/types/generic.py
new file mode 100644
index 000000000..37a0b75c0
--- /dev/null
+++ b/build/lib/visions/types/generic.py
@@ -0,0 +1,27 @@
+from typing import Any, Sequence
+
+from multimethod import multimethod
+
+from visions.relations import TypeRelation
+from visions.types.type import VisionsBaseType
+
+
+class Generic(VisionsBaseType):
+ """**Generic** implementation of :class:`visions.types.type.VisionsBaseType`.
+
+ Examples:
+ >>> import numpy as np
+ >>> import visions
+ >>> x = ['a', 1, np.nan]
+ >>> x in visions.Generic
+ True
+ """
+
+ @staticmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ return []
+
+ @staticmethod
+ @multimethod
+ def contains_op(item: Any, state: dict) -> bool:
+ return True
diff --git a/build/lib/visions/types/geometry.py b/build/lib/visions/types/geometry.py
new file mode 100644
index 000000000..45829df32
--- /dev/null
+++ b/build/lib/visions/types/geometry.py
@@ -0,0 +1,32 @@
+from typing import Any, Sequence
+
+from multimethod import multimethod
+
+from visions.relations import IdentityRelation, InferenceRelation, TypeRelation
+from visions.types.object import Object
+from visions.types.string import String
+from visions.types.type import VisionsBaseType
+
+
+class Geometry(VisionsBaseType):
+ """**Geometry** implementation of :class:`visions.types.type.VisionsBaseType`.
+
+ Examples:
+ >>> from shapely import wkt
+ >>> x = pd.Series([wkt.loads('POINT (-92 42)'), wkt.loads('POINT (-92 42.1)'), wkt.loads('POINT (-92 42.2)')]
+ >>> x in visions.geometry
+ True
+ """
+
+ @staticmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ relations = [
+ IdentityRelation(Object),
+ InferenceRelation(String),
+ ]
+ return relations
+
+ @staticmethod
+ @multimethod
+ def contains_op(item: Any, state: dict) -> bool:
+ pass
diff --git a/build/lib/visions/types/image.py b/build/lib/visions/types/image.py
new file mode 100644
index 000000000..2b9e1d41c
--- /dev/null
+++ b/build/lib/visions/types/image.py
@@ -0,0 +1,30 @@
+from typing import Any, Sequence
+
+from multimethod import multimethod
+
+from visions.relations import IdentityRelation, TypeRelation
+from visions.types.file import File
+from visions.types.type import VisionsBaseType
+
+
+class Image(VisionsBaseType):
+ """**Image** implementation of :class:`visions.types.type.VisionsBaseType`.
+ (i.e. series with all image files)
+
+ Examples:
+ >>> from pathlib import Path
+ >>> import visions
+ >>> x = [Path('/home/user/file.png'), Path('/home/user/test2.jpg')]
+ >>> x in visions.Image
+ True
+ """
+
+ @staticmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ relations = [IdentityRelation(File)]
+ return relations
+
+ @staticmethod
+ @multimethod
+ def contains_op(item: Any, state: dict) -> bool:
+ pass
diff --git a/build/lib/visions/types/integer.py b/build/lib/visions/types/integer.py
new file mode 100644
index 000000000..8e79ce44a
--- /dev/null
+++ b/build/lib/visions/types/integer.py
@@ -0,0 +1,31 @@
+from typing import Any, Sequence
+
+from multimethod import multimethod
+
+from visions.relations import IdentityRelation, InferenceRelation, TypeRelation
+from visions.types.float import Float
+from visions.types.generic import Generic
+from visions.types.type import VisionsBaseType
+
+
+class Integer(VisionsBaseType):
+ """**Integer** implementation of :class:`visions.types.type.VisionsBaseType`.
+ Examples:
+ >>> import pandas as pd
+ >>> x = [-1, 1, 2, 3]
+ >>> x in visions.Integer
+ True
+ """
+
+ @staticmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ relations = [
+ IdentityRelation(Generic),
+ InferenceRelation(Float),
+ ]
+ return relations
+
+ @staticmethod
+ @multimethod
+ def contains_op(item: Any, state: dict) -> bool:
+ pass
diff --git a/build/lib/visions/types/ip_address.py b/build/lib/visions/types/ip_address.py
new file mode 100644
index 000000000..5fea143dc
--- /dev/null
+++ b/build/lib/visions/types/ip_address.py
@@ -0,0 +1,33 @@
+from typing import Any, Sequence
+
+from multimethod import multimethod
+
+from visions.relations import IdentityRelation, InferenceRelation, TypeRelation
+from visions.types.object import Object
+from visions.types.string import String
+from visions.types.type import VisionsBaseType
+
+
+class IPAddress(VisionsBaseType):
+ """**IP Address** (v4 and v6) implementation of :class:`visions.types.type.VisionsBaseType`.
+
+ Examples:
+ >>> from ipaddress import IPv4Address
+ >>> import visions
+ >>> x = [IPv4Address('127.0.0.1'), IPv4Address('128.0.1.2')]
+ >>> x in visions.IPAddress
+ True
+ """
+
+ @staticmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ relations = [
+ IdentityRelation(Object),
+ InferenceRelation(String),
+ ]
+ return relations
+
+ @staticmethod
+ @multimethod
+ def contains_op(item: Any, state: dict) -> bool:
+ pass
diff --git a/build/lib/visions/types/numeric.py b/build/lib/visions/types/numeric.py
new file mode 100644
index 000000000..05e790a2b
--- /dev/null
+++ b/build/lib/visions/types/numeric.py
@@ -0,0 +1,30 @@
+from typing import Any, Sequence
+
+from multimethod import multimethod
+
+from visions.relations import IdentityRelation, TypeRelation
+from visions.types.generic import Generic
+from visions.types.type import VisionsBaseType
+
+
+class Numeric(VisionsBaseType):
+ """**Numeric** implementation of :class:`visions.types.type.VisionsBaseType`.
+
+ Examples:
+ >>> import visions
+ >>> from decimal import Decimal
+ >>>
+ >>> x = [Decimal(1), Decimal(2), Decimal(3)]
+ >>> x in visions.Numeric
+ True
+ """
+
+ @staticmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ relations = [IdentityRelation(Generic)]
+ return relations
+
+ @staticmethod
+ @multimethod
+ def contains_op(item: Any, state: dict) -> bool:
+ pass
diff --git a/build/lib/visions/types/object.py b/build/lib/visions/types/object.py
new file mode 100644
index 000000000..54b5417f5
--- /dev/null
+++ b/build/lib/visions/types/object.py
@@ -0,0 +1,27 @@
+from typing import Any, Sequence
+
+from multimethod import multimethod
+
+from visions.relations import IdentityRelation, TypeRelation
+from visions.types.generic import Generic
+from visions.types.type import VisionsBaseType
+
+
+class Object(VisionsBaseType):
+ """**Object** implementation of :class:`visions.types.type.VisionsBaseType`.
+
+ Examples:
+ >>> x = ['a', 1, np.nan]
+ >>> x in visions.Object
+ True
+ """
+
+ @staticmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ relations = [IdentityRelation(Generic)]
+ return relations
+
+ @staticmethod
+ @multimethod
+ def contains_op(item: Any, state: dict) -> bool:
+ pass
diff --git a/build/lib/visions/types/ordinal.py b/build/lib/visions/types/ordinal.py
new file mode 100644
index 000000000..2de6c6137
--- /dev/null
+++ b/build/lib/visions/types/ordinal.py
@@ -0,0 +1,29 @@
+from typing import Any, Sequence
+
+from multimethod import multimethod
+
+from visions.relations import IdentityRelation, TypeRelation
+from visions.types.categorical import Categorical
+from visions.types.type import VisionsBaseType
+
+
+class Ordinal(VisionsBaseType):
+ """**Ordinal** implementation of :class:`visions.types.type.VisionsBaseType`.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import visions
+ >>> x = pd.Series([1, 2, 3, 1, 1], dtype='category')
+ >>> x in visions.Ordinal
+ True
+ """
+
+ @staticmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ relations = [IdentityRelation(Categorical)]
+ return relations
+
+ @staticmethod
+ @multimethod
+ def contains_op(item: Any, state: dict) -> bool:
+ pass
diff --git a/build/lib/visions/types/path.py b/build/lib/visions/types/path.py
new file mode 100644
index 000000000..db1cdf610
--- /dev/null
+++ b/build/lib/visions/types/path.py
@@ -0,0 +1,33 @@
+from typing import Any, Sequence
+
+from multimethod import multimethod
+
+from visions.relations import IdentityRelation, InferenceRelation, TypeRelation
+from visions.types.object import Object
+from visions.types.string import String
+from visions.types.type import VisionsBaseType
+
+
+class Path(VisionsBaseType):
+ """**Path** implementation of :class:`visions.types.type.VisionsBaseType`.
+
+ Examples:
+ >>> import pathlib
+ >>> import visions
+ >>> x = [pathlib.Path('/home/user/file.txt'), pathlib.Path('/home/user/test2.txt')]
+ >>> x in visions.Path
+ True
+ """
+
+ @staticmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ relations = [
+ IdentityRelation(Object),
+ InferenceRelation(String),
+ ]
+ return relations
+
+ @staticmethod
+ @multimethod
+ def contains_op(item: Any, state: dict) -> bool:
+ pass
diff --git a/build/lib/visions/types/sparse.py b/build/lib/visions/types/sparse.py
new file mode 100644
index 000000000..23b8835f5
--- /dev/null
+++ b/build/lib/visions/types/sparse.py
@@ -0,0 +1,29 @@
+from typing import Any, Sequence
+
+from multimethod import multimethod
+
+from visions.relations import IdentityRelation, TypeRelation
+from visions.types.generic import Generic
+from visions.types.type import VisionsBaseType
+
+
+class Sparse(VisionsBaseType):
+ """**Sparse** implementation of :class:`visions.types.type.VisionsBaseType`.
+
+ Examples:
+ >>> import pandas as pd
+ >>> import visions
+ >>> x = pd.Sparse(pd.Series([np.complex(0, 0), np.complex(1, 2), np.complex(3, -1)]))
+ >>> x in visions.Sparse
+ True
+ """
+
+ @staticmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ relations = [IdentityRelation(Generic)]
+ return relations
+
+ @staticmethod
+ @multimethod
+ def contains_op(item: Any, state: dict) -> bool:
+ pass
diff --git a/build/lib/visions/types/string.py b/build/lib/visions/types/string.py
new file mode 100644
index 000000000..6c802f851
--- /dev/null
+++ b/build/lib/visions/types/string.py
@@ -0,0 +1,27 @@
+from typing import Any, Sequence
+
+from multimethod import multimethod
+
+from visions.relations import IdentityRelation, TypeRelation
+from visions.types.object import Object
+from visions.types.type import VisionsBaseType
+
+
+class String(VisionsBaseType):
+ """**String** implementation of :class:`visions.types.type.VisionsBaseType`.
+
+ Examples:
+ >>> x = ['rubin', 'carter', 'champion']
+ >>> x in visions.String
+ True
+ """
+
+ @staticmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ relations = [IdentityRelation(Object)]
+ return relations
+
+ @staticmethod
+ @multimethod
+ def contains_op(item: Any, state: dict) -> bool:
+ pass
diff --git a/build/lib/visions/types/time.py b/build/lib/visions/types/time.py
new file mode 100644
index 000000000..4d4f8161b
--- /dev/null
+++ b/build/lib/visions/types/time.py
@@ -0,0 +1,29 @@
+from typing import Any, Sequence
+
+from multimethod import multimethod
+
+from visions.relations import IdentityRelation, TypeRelation
+from visions.types.object import Object
+from visions.types.type import VisionsBaseType
+
+
+class Time(VisionsBaseType):
+ """**Time** implementation of :class:`visions.types.type.VisionsBaseType`.
+
+ Examples:
+ >>> import datetime
+ >>> import visions
+ >>> x = [datetime.time(10, 8, 4), datetime.time(21, 17, 0)]
+ >>> x in visions.Time
+ True
+ """
+
+ @staticmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ relations = [IdentityRelation(Object)]
+ return relations
+
+ @staticmethod
+ @multimethod
+ def contains_op(item: Any, state: dict) -> bool:
+ pass
diff --git a/build/lib/visions/types/time_delta.py b/build/lib/visions/types/time_delta.py
new file mode 100644
index 000000000..ac99bede5
--- /dev/null
+++ b/build/lib/visions/types/time_delta.py
@@ -0,0 +1,28 @@
+from typing import Any, Sequence
+
+from multimethod import multimethod
+
+from visions.relations import IdentityRelation, TypeRelation
+from visions.types.generic import Generic
+from visions.types.type import VisionsBaseType
+
+
+class TimeDelta(VisionsBaseType):
+ """**TimeDelta** implementation of :class:`visions.types.type.VisionsBaseType`.
+
+ Examples:
+ >>> from datetime import timedelta
+ >>> x = [timedelta(hours=1), timedelta(hours=3)]
+ >>> x in visions.Timedelta
+ True
+ """
+
+ @staticmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ relations = [IdentityRelation(Generic)]
+ return relations
+
+ @staticmethod
+ @multimethod
+ def contains_op(item: Any, state: dict) -> bool:
+ pass
diff --git a/build/lib/visions/types/type.py b/build/lib/visions/types/type.py
new file mode 100644
index 000000000..702dcf3b5
--- /dev/null
+++ b/build/lib/visions/types/type.py
@@ -0,0 +1,135 @@
+from abc import ABCMeta, abstractmethod
+from typing import Any, Dict, Optional, Sequence, Type, Union, cast
+
+import attr
+from multimethod import multimethod
+
+from visions.relations import TypeRelation
+
+_DEFAULT = object()
+
+
+class RelationsIterManager:
+ """Class to enable to treat relations as dict"""
+
+ def __init__(self, relations: Sequence[TypeRelation]):
+ self._keys: Dict["Type[VisionsBaseType]", int] = {
+ item.related_type: i for i, item in enumerate(relations)
+ }
+ self.values = tuple(relations)
+
+ def __getitem__(self, index: Union["Type[VisionsBaseType]", int]) -> TypeRelation:
+ idx = index if isinstance(index, int) else self._keys[index]
+ return self.values[idx]
+
+ def get(
+ self, index: Union["Type[VisionsBaseType]", int], default: Any = _DEFAULT
+ ) -> Union[TypeRelation, Any]:
+ try:
+ return self[index]
+ except (IndexError, KeyError) as err:
+ if default is _DEFAULT:
+ raise err
+ else:
+ return default
+
+ def __iter__(self):
+ yield from self.values
+
+
+class VisionsBaseTypeMeta(ABCMeta):
+ _relations: Optional[RelationsIterManager] = None
+
+ def __contains__(cls, sequence: Sequence) -> bool:
+ return cls.contains_op(sequence, dict())
+
+ @staticmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ raise NotImplementedError
+
+ @staticmethod
+ def contains_op(item: Any, state: dict) -> bool:
+ raise NotImplementedError
+
+ @property
+ def relations(cls) -> RelationsIterManager:
+ from visions.relations.relations import IdentityRelation
+
+ if cls._relations is None:
+ cls._relations = RelationsIterManager(
+ [
+ (
+ attr.evolve(
+ r,
+ type=cls,
+ relationship=(
+ cls.contains_op
+ if r.relationship is None
+ else r.relationship
+ ),
+ )
+ if isinstance(r, IdentityRelation)
+ else attr.evolve(
+ r,
+ type=cls,
+ relationship=(
+ multimethod(r.relationship)
+ if r.relationship is not None
+ else None
+ ),
+ transformer=multimethod(r.transformer),
+ )
+ )
+ for r in cls.get_relations()
+ ]
+ )
+ return cls._relations
+
+ def __add__(cls, other):
+ from visions.types import Generic
+ from visions.typesets import VisionsTypeset
+
+ if not any(issubclass(x, Generic) for x in [cls, other]):
+ return VisionsTypeset({Generic, cls, other})
+ return VisionsTypeset({cls, other})
+
+ def __str__(cls) -> str:
+ return str(cls.__name__)
+
+ def __repr__(cls) -> str:
+ return str(cls)
+
+
+class VisionsBaseType(metaclass=VisionsBaseTypeMeta):
+ """Abstract implementation of a vision type.
+
+ Provides a common API for building custom visions data types.
+ """
+
+ def __init__(self):
+ pass
+
+ @staticmethod
+ @abstractmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ raise NotImplementedError
+
+ @classmethod
+ def register_transformer(
+ cls, relation: "Type[VisionsBaseType]", dispatch_type: Any
+ ):
+ relation_transformer = cls.relations[relation].transformer
+ return cast(Any, relation_transformer).register(dispatch_type, dict)
+
+ @classmethod
+ def register_relationship(
+ cls, relation: "Type[VisionsBaseType]", dispatch_type: Any
+ ):
+ relation_relationship = cls.relations[relation].relationship
+ return cast(Any, relation_relationship).register(dispatch_type, dict)
+
+ @staticmethod
+ @multimethod
+ @abstractmethod
+ def contains_op(sequence: Any, state: Any) -> bool:
+ raise NotImplementedError
diff --git a/build/lib/visions/types/url.py b/build/lib/visions/types/url.py
new file mode 100644
index 000000000..086ff170f
--- /dev/null
+++ b/build/lib/visions/types/url.py
@@ -0,0 +1,33 @@
+from typing import Any, Sequence
+
+from multimethod import multimethod
+
+from visions.relations import IdentityRelation, InferenceRelation, TypeRelation
+from visions.types.object import Object
+from visions.types.string import String
+from visions.types.type import VisionsBaseType
+
+
+class URL(VisionsBaseType):
+ """**Url** implementation of :class:`visions.types.type.VisionsBaseType`.
+
+ Examples:
+ >>> from urllib.parse import urlparse
+ >>> urls = ['http://www.cwi.nl:80/%7Eguido/Python.html', 'https://github.com/pandas-profiling/pandas-profiling']
+ >>> x = [urlparse(url) for url in urls]
+ >>> x in visions.URL
+ True
+ """
+
+ @staticmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ relations = [
+ IdentityRelation(Object),
+ InferenceRelation(String),
+ ]
+ return relations
+
+ @staticmethod
+ @multimethod
+ def contains_op(item: Any, state: dict) -> bool:
+ pass
diff --git a/build/lib/visions/types/uuid.py b/build/lib/visions/types/uuid.py
new file mode 100644
index 000000000..22f949f47
--- /dev/null
+++ b/build/lib/visions/types/uuid.py
@@ -0,0 +1,41 @@
+from typing import Any, Sequence
+
+from multimethod import multimethod
+
+from visions.relations import IdentityRelation, InferenceRelation, TypeRelation
+from visions.types.object import Object
+from visions.types.string import String
+from visions.types.type import VisionsBaseType
+
+
+class UUID(VisionsBaseType):
+ """**UUID** implementation of :class:`visions.types.type.VisionsBaseType`.
+
+ References:
+ UUID specification in RFC4122:
+ https://tools.ietf.org/html/rfc4122#section-3
+
+ Python standard library:
+ https://docs.python.org/3/library/uuid.html
+
+ Examples:
+ >>> import uuid
+ >>> import visions
+ >>> uuids = ['0b8a22ca-80ad-4df5-85ac-fa49c44b7ede', 'aaa381d6-8442-4f63-88c8-7c900e9a23c6']
+ >>> x = [uuid.UUID(uuid_str) for uuid_str in uuids]
+ >>> x in visions.UUID
+ True
+ """
+
+ @staticmethod
+ def get_relations() -> Sequence[TypeRelation]:
+ relations = [
+ IdentityRelation(Object),
+ InferenceRelation(String),
+ ]
+ return relations
+
+ @staticmethod
+ @multimethod
+ def contains_op(item: Any, state: dict) -> bool:
+ pass
diff --git a/build/lib/visions/typesets/__init__.py b/build/lib/visions/typesets/__init__.py
new file mode 100644
index 000000000..803df1a50
--- /dev/null
+++ b/build/lib/visions/typesets/__init__.py
@@ -0,0 +1,6 @@
+from visions.typesets.complete_set import CompleteSet
+from visions.typesets.geometry_set import GeometrySet
+from visions.typesets.standard_set import StandardSet
+from visions.typesets.typeset import VisionsTypeset
+
+__all__ = ["VisionsTypeset", "CompleteSet", "StandardSet", "GeometrySet"]
diff --git a/build/lib/visions/typesets/complete_set.py b/build/lib/visions/typesets/complete_set.py
new file mode 100644
index 000000000..74ba8c56a
--- /dev/null
+++ b/build/lib/visions/typesets/complete_set.py
@@ -0,0 +1,93 @@
+from visions.types import (
+ URL,
+ UUID,
+ Boolean,
+ Categorical,
+ Complex,
+ Count,
+ Date,
+ DateTime,
+ EmailAddress,
+ File,
+ Float,
+ Generic,
+ Geometry,
+ Image,
+ Integer,
+ IPAddress,
+ Object,
+ Ordinal,
+ Path,
+ String,
+ Time,
+ TimeDelta,
+)
+from visions.typesets.typeset import VisionsTypeset
+
+
+class CompleteSet(VisionsTypeset):
+ """Complete visions typeset with all supported types
+
+ Includes support for the following types:
+
+ - Float
+ - Integer
+ - Boolean
+ - Object
+ - String
+ - Complex
+ - Categorical
+ - Ordinal
+ - Count
+ - DateTime
+ - Date
+ - Time
+ - TimeDelta
+ - Geometry
+ - Path
+ - File
+ - Image
+ - URL
+ - IPAddress
+ - EmailAddress
+ - UUID
+
+ """
+
+ def __init__(self) -> None:
+ types = {
+ Generic,
+ Boolean,
+ Float,
+ Object,
+ Complex,
+ Categorical,
+ Ordinal,
+ DateTime,
+ TimeDelta,
+ Integer,
+ Count,
+ String,
+ Geometry,
+ URL,
+ Path,
+ Date,
+ Time,
+ File,
+ Image,
+ IPAddress,
+ EmailAddress,
+ UUID,
+ }
+ super().__init__(types)
+
+ try:
+ import imagehash
+ import PIL
+ import shapely
+ except ImportError as e:
+ raise ImportError(
+ f"This typeset requires dependencies that are currently not installed ({e}). "
+ "You can follow the installation instructions to resolve this issue: "
+ "https://dylan-profiler.github.io/visions/visions/getting_started/installation.html"
+ )
diff --git a/build/lib/visions/typesets/geometry_set.py b/build/lib/visions/typesets/geometry_set.py
new file mode 100644
index 000000000..079331528
--- /dev/null
+++ b/build/lib/visions/typesets/geometry_set.py
@@ -0,0 +1,58 @@
+from visions.types import (
+ Boolean,
+ Categorical,
+ Complex,
+ DateTime,
+ Float,
+ Generic,
+ Geometry,
+ Integer,
+ Object,
+ String,
+ TimeDelta,
+)
+from visions.typesets.typeset import VisionsTypeset
+
+
+class GeometrySet(VisionsTypeset):
+ """Standard visions typeset with shapely geometry support
+
+ Includes support for the following types:
+
+ - Float
+ - Integer
+ - Boolean
+ - Object
+ - String
+ - Complex
+ - Categorical
+ - DateTime
+ - TimeDelta
+ - Geometry
+
+ """
+
+ def __init__(self) -> None:
+ types = {
+ Generic,
+ Boolean,
+ Float,
+ Object,
+ Complex,
+ Categorical,
+ DateTime,
+ TimeDelta,
+ Integer,
+ String,
+ Geometry,
+ }
+ super().__init__(types)
+
+ try:
+ import shapely
+ except ImportError as e:
+ raise ImportError(
+ f"This typeset requires dependencies that are currently not installed ({e}). "
+ "You can follow the installation instructions to resolve this issue: "
+ "https://dylan-profiler.github.io/visions/visions/getting_started/installation.html"
+ )
diff --git a/build/lib/visions/typesets/standard_set.py b/build/lib/visions/typesets/standard_set.py
new file mode 100644
index 000000000..8bea57e70
--- /dev/null
+++ b/build/lib/visions/typesets/standard_set.py
@@ -0,0 +1,46 @@
+from visions.types import (
+ Boolean,
+ Categorical,
+ Complex,
+ DateTime,
+ Float,
+ Generic,
+ Integer,
+ Object,
+ String,
+ TimeDelta,
+)
+from visions.typesets.typeset import VisionsTypeset
+
+
+class StandardSet(VisionsTypeset):
+ """The standard visions typesets
+
+ Includes support for the following types:
+
+ - Float
+ - Integer
+ - Boolean
+ - Object
+ - String
+ - Complex
+ - Categorical
+ - DateTime
+ - TimeDelta
+
+ """
+
+ def __init__(self) -> None:
+ types = {
+ Generic,
+ Boolean,
+ Float,
+ Object,
+ Complex,
+ Categorical,
+ DateTime,
+ TimeDelta,
+ Integer,
+ String,
+ }
+ super().__init__(types)
diff --git a/build/lib/visions/typesets/typeset.py b/build/lib/visions/typesets/typeset.py
new file mode 100644
index 000000000..3f4289052
--- /dev/null
+++ b/build/lib/visions/typesets/typeset.py
@@ -0,0 +1,483 @@
+import warnings
+from functools import singledispatch
+from pathlib import Path
+from typing import (
+ Any,
+ Dict,
+ Iterable,
+ List,
+ Optional,
+ Sequence,
+ Set,
+ Tuple,
+ Type,
+ TypeVar,
+ Union,
+)
+
+import networkx as nx
+import pandas as pd
+
+from visions.types.generic import Generic
+from visions.types.type import VisionsBaseType
+
+TypeOrTypeset = TypeVar("TypeOrTypeset", Type[VisionsBaseType], "VisionsTypeset")
+pathTypes = TypeVar(
+ "pathTypes", Type[VisionsBaseType], Dict[str, Type[VisionsBaseType]]
+)
+pdT = TypeVar("pdT", pd.Series, pd.DataFrame)
+T = Type[VisionsBaseType]
+
+
+def build_graph(nodes: Set[Type[VisionsBaseType]]) -> Tuple[nx.DiGraph, nx.DiGraph]:
+ """Constructs a traversable relation graph between visions types
+
+ Builds a type relation graph from a collection of :class:`visions.types.type.VisionsBaseType` where
+ each node corresponds to a type and each edge is a relation defined on the type.
+
+ Args:
+ nodes: An Sequence of :class:`visions.types.type.VisionsBaseType`
+
+ Returns:
+ A directed graph of type relations for the provided nodes.
+ """
+
+ style_map = {True: "dashed", False: "solid"}
+ relation_graph = nx.DiGraph()
+ relation_graph.add_nodes_from(nodes)
+
+ noninferential_edges = []
+
+ for node in nodes:
+ for relation in node.relations:
+ if relation.related_type not in nodes:
+ warnings.warn(
+ f"Provided relations included mapping from {relation.related_type} to {relation.type} "
+ f"but {relation.related_type} was not included in the provided list of nodes"
+ )
+ else:
+ relation_graph.add_edge(
+ relation.related_type,
+ relation.type,
+ relationship=relation,
+ style=style_map[relation.inferential],
+ )
+
+ if not relation.inferential:
+ noninferential_edges.append((relation.related_type, relation.type))
+
+ check_graph_constraints(relation_graph)
+
+ base_graph = relation_graph.edge_subgraph(noninferential_edges)
+ return relation_graph, base_graph
+
+
+def check_graph_constraints(relation_graph: nx.DiGraph) -> None:
+ """Validates a relation_graph is appropriately constructed
+
+ Args:
+ relation_graph: A directed graph representing the set of relations between type nodes.
+
+ """
+ check_isolates(relation_graph)
+ check_cycles(relation_graph)
+
+
+def check_isolates(graph: nx.DiGraph) -> None:
+ """Check for orphaned nodes.
+
+ Args:
+ graph: the graph to check
+
+ """
+ nodes = set(graph.nodes)
+ root_node = next(nx.topological_sort(graph))
+
+ isolates = list(set(nx.isolates(graph)) - {root_node}) # root can be isolate
+ graph.remove_nodes_from(isolates)
+ orphaned_nodes = nodes - set(graph.nodes)
+ if orphaned_nodes:
+ message = f"{orphaned_nodes} were isolates in the type relation map and consequently orphaned. "
+ message += "Please add some mapping to the orphaned nodes."
+ warnings.warn(message)
+
+
+def check_cycles(graph: nx.DiGraph) -> None:
+ """Check for cycles and warn if one is found
+
+ Args:
+ graph: the graph to check
+
+ """
+ cycles = list(nx.simple_cycles(graph))
+ if len(cycles) > 0:
+ warnings.warn(f"Cyclical relations between types {cycles} detected")
+
+
+def traverse_graph_with_series(
+ base_type: T,
+ series: Sequence,
+ graph: nx.DiGraph,
+ path: List[T] = None,
+ state: Optional[dict] = None,
+) -> Tuple[Sequence, List[T], dict]:
+ """Depth First Search traversal. There should be at most one successor that contains the series.
+
+ Args:
+ base_type: Entry-point for graph to start traversal
+ series: the Series to check
+ graph: the Graph to traverse
+ path: the path so far
+ state: traversal state
+
+ Returns:
+ The most uniquely specified node matching the series.
+ """
+ if state is None:
+ state = dict()
+
+ if path is None:
+ path = []
+
+ path.append(base_type)
+
+ for vision_type in graph.successors(base_type):
+ relation = graph[base_type][vision_type]["relationship"]
+
+ if relation.is_relation(series, state):
+ series = relation.transform(series, state)
+ return traverse_graph_with_series(vision_type, series, graph, path, state)
+
+ return series, path, state
+
+
+def traverse_graph_with_sampled_series(
+ base_type: T,
+ series: pd.Series,
+ graph: nx.DiGraph,
+ sample_size: int = 10,
+ state: dict = dict(),
+) -> Tuple[Sequence, List[T], dict]:
+ """Depth First Search traversal with sampling. There should be at most one successor that contains the series.
+
+ Args:
+ base_type: Entry-point for graph to start traversal
+ series: the Series to check
+ graph: the Graph to traverse
+ sample_size: number of items used in heuristic traversal
+ state: traversal state
+
+ Returns:
+ The most uniquely specified node matching the series.
+ """
+
+ if (series.shape[0] < 1000) or (sample_size > series.shape[0]):
+ return traverse_graph_with_series(base_type, series, graph, state=state)
+
+ series_sample = series.sample(sample_size)
+ _, path, _ = traverse_graph_with_series(
+ base_type, series_sample, graph, state=state
+ )
+ if len(path) == 1:
+ return series, path, state
+
+ # Cast the full series
+ from_type = path[0]
+ for i, to_type in enumerate(path[1:]):
+ relation = graph[from_type][to_type]["relationship"]
+ if not relation.is_relation(series, state):
+ break
+ series = relation.transform(series, state)
+ from_type = to_type
+
+ return series, path[0 : (i + 2)], state
+
+
+@singledispatch
+def traverse_graph(
+ data: Sequence, root_node: T, graph: nx.DiGraph
+) -> Tuple[Sequence, Union[List[T], Dict[str, List[T]]], Dict[str, dict]]:
+ return traverse_graph_with_series(root_node, data, graph)
+
+
+@singledispatch
+def get_type_from_path(
+ path_data: Union[Sequence[T], Dict[str, Sequence[T]]]
+) -> Union[T, Dict[str, T]]:
+ raise TypeError(f"Can't get types from path object of type {type(path_data)}")
+
+
+@get_type_from_path.register(list)
+@get_type_from_path.register(tuple)
+def _get_type_from_path_builtin(path_list: Sequence[T]) -> T:
+ return path_list[-1]
+
+
+@get_type_from_path.register(dict)
+def _get_type_from_path_dict(path_dict: Dict[str, Sequence[T]]) -> Dict[str, T]:
+ return {k: v[-1] for k, v in path_dict.items()}
+
+
+class VisionsTypeset:
+ """
+ A collection of :class:`visions.types.type.VisionsBaseType` with associated relationship map between them.
+
+ Attributes:
+ types: The collection of Visions Types derived from :class:`visions.types.type.VisionsBaseType`
+ base_graph: The graph of relations composed exclusively of :class:`visions.relations.relations.IdentityRelation`
+ relation_graph: The full relation graph including both :class:`visions.relations.relations.IdentityRelation`
+ and :class:`visions.relations.relations.InferenceRelation`
+ """
+
+ def __init__(self, types: Set[Type[VisionsBaseType]]) -> None:
+ """
+ Args:
+ types: a set of types
+ """
+ self._root_node: Optional[T] = None
+
+ if not isinstance(types, Iterable):
+ raise ValueError("types should be Sequence")
+
+ self.relation_graph, self.base_graph = build_graph(set(types))
+
+ if not issubclass(self.root_node, Generic):
+ raise ValueError("`root_node` should be a subclass of Generic")
+
+ self.types = set(self.relation_graph.nodes)
+
+ @property
+ def root_node(self) -> T:
+ """Returns a cached copy of the relation_graphs root node
+
+ Args:
+
+ Returns:
+ A cached copy of the relation_graphs root node.
+ """
+ if self._root_node is None:
+ self._root_node = next(nx.topological_sort(self.relation_graph))
+ return self._root_node
+
+ def detect(self, data: Any) -> Tuple[Sequence, Any, dict]:
+ """The results found after only considering IdentityRelations.
+
+ Notes:
+ This is an advanced feature, consider using `detect_type` in case the type is what is needed.
+
+ Args:
+ data: a DataFrame or Series to determine types over
+
+ Returns:
+ A tuple of the coerced sequence, visited nodes and state
+ """
+ return traverse_graph(data, self.root_node, self.base_graph)
+
+ def detect_type(self, data: Sequence) -> Union[T, Dict[str, T]]:
+ """The inferred type found only considering IdentityRelations.
+
+ Args:
+ data: a DataFrame or Series to determine types over
+
+ Returns:
+ A dictionary of {name: type} pairs in the case of DataFrame input or a type
+ """
+ _, paths, _ = self.detect(data)
+ return get_type_from_path(paths)
+
+ def infer(self, data: Sequence) -> Tuple[Sequence, Any, dict]:
+ """The results found after considering all relations.
+
+ Notes:
+ This is an advanced feature, consider using `infer_type` in case the type is what is needed.
+
+ Args:
+ data: a DataFrame or Series to determine types over
+
+ Returns:
+ A tuple of the coerced sequence, visited nodes and state
+ """
+ return traverse_graph(data, self.root_node, self.relation_graph)
+
+ def infer_type(self, data: Sequence) -> Union[T, Dict[str, T]]:
+ """The inferred type found using all type relations.
+
+ Args:
+ data: a DataFrame or Series to determine types over
+
+ Returns:
+ A dictionary of {name: type} pairs in the case of DataFrame input or a type
+ """
+ _, paths, _ = self.infer(data)
+ return get_type_from_path(paths)
+
+ def cast_to_detected(self, data: Sequence) -> Sequence:
+ """Transforms input data into a canonical representation using only IdentityRelations
+
+ Args:
+ data: a DataFrame or Series to determine types over
+
+ Returns:
+ new_data: The transformed DataFrame or Series.
+ """
+ data, _, _ = self.detect(data)
+ return data
+
+ def cast_to_inferred(self, data: Sequence) -> Sequence:
+ """Transforms input data and returns it's corresponding new type relation using all relations.
+
+ Args:
+ data: a DataFrame or Series to determine types over
+
+ Returns:
+ new_data: The transformed DataFrame or Series.
+ types: A dictionary of {name: type} pairs in the case of DataFrame input or a type.
+ """
+ data, _, _ = self.infer(data)
+ return data
+
+ def output_graph(
+ self,
+ file_name: Union[str, Path],
+ base_only: bool = False,
+ dpi: Optional[int] = None,
+ ) -> None:
+ """Write the type graph to a file.
+
+ Args:
+ file_name: the file to save the output to
+ base_only: if True, plot the graph without relation mapping edges
+ dpi: set the dpi of the output image
+ """
+ from visions.utils.graph import output_graph
+
+ if base_only:
+ graph = self.base_graph.copy()
+ else:
+ graph = self.relation_graph.copy()
+
+ graph.graph["node"] = {"shape": "box", "color": "red"}
+ if dpi is not None:
+ graph.graph["graph"] = {"dpi": dpi}
+
+ output_graph(graph, file_name)
+
+ def plot_graph(
+ self,
+ dpi: int = 800,
+ base_only: bool = False,
+ figsize: Optional[Tuple[int, int]] = None,
+ ):
+ """
+
+ Args:
+ dpi: dpi of the matplotlib figure.
+ figsize: figure size
+ base_only: Only display the typesets base_graph
+ Returns:
+ Displays the image
+ """
+ import os
+ import tempfile
+
+ from matplotlib import image as mpimg
+ from matplotlib import pyplot as plt
+
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
+ self.output_graph(temp_file.name, dpi=dpi, base_only=base_only)
+ img = mpimg.imread(temp_file.name)
+ plt.figure(dpi=dpi, figsize=figsize)
+ plt.axis("off")
+ plt.imshow(img)
+ os.unlink(temp_file.name)
+
+ def _get_other_type(self, other: TypeOrTypeset) -> Set[T]:
+ """Converts input into a set of :class:`visions.types.type.VisionsBaseType`
+
+ Args:
+ other: A :class:`visions.types.type.VisionsBaseType` or :class:`visions.typesets.typeset.VisionsTypeset`
+
+ Raises:
+ NotImplementedError:
+
+ Returns:
+ Set[Type[VisionsBaseType]]:
+ """
+ if isinstance(other, VisionsTypeset):
+ other_types = set(other.types)
+ elif issubclass(other, VisionsBaseType):
+ other_types = {other}
+ else:
+ raise NotImplementedError(
+ f"Typeset operation not implemented for type {type(other)}"
+ )
+ return other_types
+
+ def replace(self, old: T, new: T) -> "VisionsTypeset":
+ """Create a new typeset having replace one type with another.
+
+ Args:
+ old: Visions type to replace.
+ new: Replacement visions type.
+
+ Returns
+ A VisionsTypeset
+ """
+ types = self.types.copy()
+ types.add(new)
+ types.remove(old)
+ return VisionsTypeset(types)
+
+ def __add__(self, other: TypeOrTypeset) -> "VisionsTypeset":
+ """Adds a type or typeset into the current typeset.
+
+ Args:
+ other: Type or typeset to be added
+
+ Returns
+ A VisionsTypeset
+ """
+ other_types = self._get_other_type(other)
+ return VisionsTypeset(self.types | other_types)
+
+ def __iadd__(self, other: TypeOrTypeset) -> "VisionsTypeset":
+ """Adds a type or typeset into the current typeset.
+
+ Args:
+ other: Type or typeset to be added
+
+ Returns
+ A VisionsTypeset
+ """
+ return self.__add__(other)
+
+ def __sub__(self, other: TypeOrTypeset) -> "VisionsTypeset":
+ """Subtracts a type or typeset from the current typeset.
+
+ Args:
+ other: Type or typeset to be removed
+
+ Returns
+ A VisionsTypeset
+ """
+ other_types = self._get_other_type(other)
+ return VisionsTypeset(self.types - other_types)
+
+ def __isub__(self, other: TypeOrTypeset) -> "VisionsTypeset":
+ """Subtracts a type or typeset from the current typeset.
+
+ Args:
+ other: Type or typeset to be removed
+
+ Returns
+ A VisionsTypeset
+ """
+ return self.__sub__(other)
+
+ def __repr__(self) -> str:
+ """Pretty representation of the typeset.
+
+ Returns
+ A :class:`visions.typesets.typeset.VisionsTypeset`
+ """
+ return self.__class__.__name__
diff --git a/build/lib/visions/utils/__init__.py b/build/lib/visions/utils/__init__.py
new file mode 100644
index 000000000..ffdbaabd1
--- /dev/null
+++ b/build/lib/visions/utils/__init__.py
@@ -0,0 +1,11 @@
+""" Utilities suite for visions """
+
+# from visions.utils.images import image_utils
+from visions.utils.monkeypatches import imghdr_patch, pathlib_patch
+from visions.utils.profiling import profile_type
+from visions.utils.warning_handling import suppress_warnings
+
+__all__ = [
+ "profile_type",
+ "suppress_warnings",
+]
diff --git a/build/lib/visions/utils/cache.py b/build/lib/visions/utils/cache.py
new file mode 100644
index 000000000..77d491dc2
--- /dev/null
+++ b/build/lib/visions/utils/cache.py
@@ -0,0 +1,60 @@
+import functools
+from collections import OrderedDict
+
+import pandas as pd
+
+
+class LRUCacher:
+ def __init__(self, hash_func, max_length, value_func):
+ self.hash_func = hash_func
+ self.max_length = max_length
+ self.value_func = value_func
+ self.cache = OrderedDict()
+
+ def __getitem__(self, key):
+ value = self.cache[key]
+ self.cache.move_to_end(key)
+ return value
+
+ def __setitem__(self, key, value):
+ if key in self.cache:
+ self.cache.move_to_end(key)
+ self.cache[key] = value
+ if len(self.cache) > self.max_length:
+ oldest = next(iter(self.cache))
+ del self.cache[oldest]
+
+ def get_key(self, *args):
+ return self.hash_func(*args)
+
+ def get(self, *args):
+ id_key = self.get_key(*args)
+ if id_key not in self.cache:
+ self[id_key] = self.value_func(*args)
+ return self[id_key]
+
+
+def lru_cache(hash_func, max_length):
+ def func_inner(func):
+ cache = LRUCacher(hash_func, max_length, func)
+
+ @functools.wraps(func)
+ def inner(*args):
+ return cache.get(*args)
+
+ return inner
+
+ return func_inner
+
+
+def mutable_pseudo_hash(data, node, graph):
+ # return id((data, node, graph))
+ try:
+ if isinstance(data, pd.DataFrame):
+ data_hash = hash(hash(tuple(data[col])) for col in data.columns)
+ else:
+ data_hash = hash(tuple(data.values))
+ except (ValueError, TypeError, AttributeError):
+ return id((data, node, graph))
+
+ return hash((data_hash, node, graph))
diff --git a/build/lib/visions/utils/graph.py b/build/lib/visions/utils/graph.py
new file mode 100644
index 000000000..b9e74f042
--- /dev/null
+++ b/build/lib/visions/utils/graph.py
@@ -0,0 +1,49 @@
+from pathlib import Path
+from typing import Union
+
+import networkx as nx
+
+
+def output_graph(
+ G: nx.DiGraph, file_name: Union[Path, str], sort: bool = True, file_format=None
+) -> None:
+ """Output a graph to a file, either as image or as dot file.
+
+ Args:
+ G: the DiGraph to write or plot
+ file_name: the file name to write to.
+ sort: create a copy of the graph with sorted keys
+ file_format: graphviz output format, if None, the file_name extension is used as format
+ https://graphviz.org/doc/info/output.html
+
+ Returns:
+ Nothing
+
+ Raises:
+ ValueError when the file_name does not end on .svg, .png or .dot
+ """
+
+ if sort:
+ # Create ordered graph for deterministic image outputs
+ G_sorted = nx.DiGraph()
+ G_sorted.graph["node"] = {"shape": "box", "color": "red"}
+ G_sorted.add_nodes_from(sorted(G.nodes, key=lambda x: str(x)))
+
+ style = nx.get_edge_attributes(G, "style")
+ for edge in sorted(G.edges, key=lambda x: (str(x[0]), str(x[1]))):
+ G_sorted.add_edge(*edge, style=style.get(edge))
+ G = G_sorted
+
+ p = nx.drawing.nx_pydot.to_pydot(G)
+ if not isinstance(file_name, Path):
+ file_name = Path(file_name)
+
+ if file_format is None:
+ file_format = file_name.suffix[1:].lower()
+
+ try:
+ p.write(file_name, format=file_format)
+ except AssertionError:
+ raise ValueError(
+ "Could not write file. Please make sure that the format is accepted by pydot."
+ )
diff --git a/build/lib/visions/utils/images/__init__.py b/build/lib/visions/utils/images/__init__.py
new file mode 100644
index 000000000..6770ee051
--- /dev/null
+++ b/build/lib/visions/utils/images/__init__.py
@@ -0,0 +1 @@
+from visions.utils.images import image_utils
diff --git a/build/lib/visions/utils/images/image_utils.py b/build/lib/visions/utils/images/image_utils.py
new file mode 100644
index 000000000..4cef526e7
--- /dev/null
+++ b/build/lib/visions/utils/images/image_utils.py
@@ -0,0 +1,114 @@
+import imghdr
+from pathlib import Path
+from typing import Optional, Tuple, Union
+
+import imagehash
+from PIL import ExifTags, Image
+
+from visions.utils.monkeypatches.imghdr_patch import *
+
+
+def open_image(path: Path) -> Optional[Image.Image]:
+ """
+
+ Args:
+ path:
+
+ Returns:
+
+ """
+ try:
+ return Image.open(path)
+ except (OSError, AttributeError):
+ return None
+
+
+def is_image_truncated(image: Image) -> bool:
+ """Returns True if the path refers to a truncated image
+
+ Args:
+ image:
+
+ Returns:
+ True if the image is truncated
+ """
+ try:
+ image.load()
+ return False
+ except (OSError, AttributeError):
+ return True
+
+
+def get_image_shape(image: Image) -> Optional[Tuple[int, int]]:
+ """
+
+ Args:
+ image:
+
+ Returns:
+
+ """
+ try:
+ return image.size
+ except (OSError, AttributeError):
+ return None
+
+
+def hash_image(image: Image) -> Optional[str]:
+ """
+
+ Args:
+ image:
+
+ Returns:
+
+ """
+ try:
+ return str(imagehash.phash(image))
+ except (OSError, AttributeError):
+ return None
+
+
+def decode_byte_exif(exif_val: Union[str, bytes]) -> str:
+ """Decode byte encodings
+
+ Args:
+ exif_val:
+
+ Returns:
+
+ """
+ if isinstance(exif_val, str):
+ return exif_val
+ else:
+ return exif_val.decode()
+
+
+def extract_exif(image: Image) -> dict:
+ """
+
+ Args:
+ image:
+
+ Returns:
+
+ """
+ try:
+ exif_data = image._getexif()
+ if exif_data is not None:
+ exif = {
+ ExifTags.TAGS[k]: decode_byte_exif(v)
+ for k, v in exif_data.items()
+ if k in ExifTags.TAGS
+ }
+ else:
+ exif = {}
+ except (AttributeError, OSError):
+ # Not all file types (e.g. .gif) have exif information.
+ exif = {}
+
+ return exif
+
+
+def path_is_image(p: Path) -> bool:
+ return imghdr.what(p) is not None
diff --git a/build/lib/visions/utils/monkeypatches/__init__.py b/build/lib/visions/utils/monkeypatches/__init__.py
new file mode 100644
index 000000000..d65f05b9d
--- /dev/null
+++ b/build/lib/visions/utils/monkeypatches/__init__.py
@@ -0,0 +1,6 @@
+from visions.utils.monkeypatches import imghdr_patch, pathlib_patch
+
+__all__ = [
+ "imghdr_patch",
+ "pathlib_patch",
+]
diff --git a/build/lib/visions/utils/monkeypatches/imghdr_patch.py b/build/lib/visions/utils/monkeypatches/imghdr_patch.py
new file mode 100644
index 000000000..f9e7098b6
--- /dev/null
+++ b/build/lib/visions/utils/monkeypatches/imghdr_patch.py
@@ -0,0 +1,31 @@
+# Monkeypatch bug in imagehdr
+from imghdr import tests
+
+
+def test_jpeg1(h, f):
+ """JPEG data in JFIF format"""
+ if b"JFIF" in h[:23]:
+ return "jpeg"
+
+
+JPEG_MARK = (
+ b"\xff\xd8\xff\xdb\x00C\x00\x08\x06\x06"
+ b"\x07\x06\x05\x08\x07\x07\x07\t\t\x08\n\x0c\x14\r\x0c\x0b\x0b\x0c\x19\x12\x13\x0f"
+)
+
+
+def test_jpeg2(h, f):
+ """JPEG with small header"""
+ if len(h) >= 32 and 67 == h[5] and h[:32] == JPEG_MARK:
+ return "jpeg"
+
+
+def test_jpeg3(h, f):
+ """JPEG data in JFIF or Exif format"""
+ if h[6:10] in (b"JFIF", b"Exif") or h[:2] == b"\xff\xd8":
+ return "jpeg"
+
+
+tests.append(test_jpeg1)
+tests.append(test_jpeg2)
+tests.append(test_jpeg3)
diff --git a/build/lib/visions/utils/monkeypatches/pathlib_patch.py b/build/lib/visions/utils/monkeypatches/pathlib_patch.py
new file mode 100644
index 000000000..5170010f1
--- /dev/null
+++ b/build/lib/visions/utils/monkeypatches/pathlib_patch.py
@@ -0,0 +1,21 @@
+# type: ignore
+from pathlib import Path
+
+
+def _copy(self, target):
+ """Monkeypatch for pathlib
+
+ Args:
+ self:
+ target:
+
+ Returns:
+
+ """
+ import shutil
+
+ assert self.is_file()
+ shutil.copy(str(self), str(target)) # str() only there for Python < (3, 6)
+
+
+Path.copy = _copy
diff --git a/build/lib/visions/utils/profiling.py b/build/lib/visions/utils/profiling.py
new file mode 100644
index 000000000..0724c0fcb
--- /dev/null
+++ b/build/lib/visions/utils/profiling.py
@@ -0,0 +1,89 @@
+import functools
+import timeit
+
+import numpy as np
+import pandas as pd
+
+
+def big_o_tester(test_func):
+ import big_o
+
+ @functools.wraps(test_func)
+ def inner(test_series):
+ try:
+ best, _ = big_o.big_o(
+ test_func, lambda n: test_series[0:n], max_n=test_series.shape[0]
+ )
+ return best
+ except np.linalg.LinAlgError:
+ return np.nan
+
+ return inner
+
+
+def profile_type(dtype, profile_data, run_count=10, normed_length=100000):
+ profile_data = {
+ name: pd.Series(np.random.choice(data, normed_length))
+ for name, data in profile_data.items()
+ if len(data) > 0
+ }
+ big_O_test = big_o_tester(lambda x: x in dtype)
+ return [
+ {
+ "type": dtype,
+ "series": name,
+ "run count": run_count,
+ "average run time": timeit.timeit(lambda: data in dtype, number=run_count)
+ / run_count,
+ "big O": big_O_test(data),
+ }
+ for name, data in profile_data.items()
+ ]
+
+
+def profile_relation_is_relation(
+ relation, profile_data, run_count=10, normed_length=100000
+):
+ profile_data = {
+ name: pd.Series(np.random.choice(data, normed_length))
+ for name, data in profile_data.items()
+ if len(data) > 0
+ }
+ big_O_test = big_o_tester(relation.is_relation)
+ return [
+ {
+ "relation": relation,
+ "series": name,
+ "run count": run_count,
+ "average run time": timeit.timeit(
+ lambda: relation.is_relation, number=run_count
+ )
+ / run_count,
+ "big O": big_O_test(data),
+ }
+ for name, data in profile_data.items()
+ ]
+
+
+def profile_relation_transform(
+ relation, profile_data, run_count=10, normed_length=100000
+):
+ profile_data = {
+ name: pd.Series(np.random.choice(data, normed_length))
+ for name, data in profile_data.items()
+ if len(data) > 0
+ }
+ big_O_test = big_o_tester(relation.transform)
+ return [
+ {
+ "relation": relation,
+ "series": name,
+ "run count": run_count,
+ "average run time": timeit.timeit(
+ lambda: relation.transform, number=run_count
+ )
+ / run_count,
+ "big O": big_O_test(data),
+ }
+ for name, data in profile_data.items()
+ ]
diff --git a/build/lib/visions/utils/warning_handling.py b/build/lib/visions/utils/warning_handling.py
new file mode 100644
index 000000000..8d667b5a3
--- /dev/null
+++ b/build/lib/visions/utils/warning_handling.py
@@ -0,0 +1,33 @@
+import functools
+import os
+import sys
+import warnings
+from typing import Callable, TypeVar
+
+T = TypeVar("T")
+
+
+def suppress_warnings(func: Callable[..., T]) -> Callable[..., T]:
+ """Suppress warnings produces while executing the wrapped function."""
+
+ @functools.wraps(func)
+ def inner(*args, **kwargs) -> T:
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore")
+ return func(*args, **kwargs)
+
+ return inner
+
+
+def discard_stderr(func: Callable[..., T]) -> Callable[..., T]:
+ """Shapely logs failures at a silly severity, just trying to suppress it's output on failures.
+ Only known way to get rid of sys output when wkt.loads hits a bad value"""
+
+ @functools.wraps(func)
+ def wrapper(*args, **kwargs) -> T:
+ sys.stderr = open(os.devnull, "w")
+ res = func(*args, **kwargs)
+ sys.stderr = sys.__stderr__
+ return res
+
+ return wrapper
diff --git a/build/lib/visions/visualisation/__init__.py b/build/lib/visions/visualisation/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/build/lib/visions/visualisation/circular_packing.html b/build/lib/visions/visualisation/circular_packing.html
new file mode 100644
index 000000000..0c0698d04
--- /dev/null
+++ b/build/lib/visions/visualisation/circular_packing.html
@@ -0,0 +1,154 @@
+
+
+
+ Typeset circular packing
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/build/lib/visions/visualisation/plot_circular_packing.py b/build/lib/visions/visualisation/plot_circular_packing.py
new file mode 100644
index 000000000..b97fc7862
--- /dev/null
+++ b/build/lib/visions/visualisation/plot_circular_packing.py
@@ -0,0 +1,69 @@
+import json
+import re
+from itertools import chain
+from pathlib import Path
+
+import networkx as nx
+
+from visions.typesets import CompleteSet
+
+
+def update(data):
+ data["name"] = data.pop("id")
+ if "children" not in data:
+ data["size"] = 1
+ else:
+ data["children"] = [update(child) for child in data["children"]]
+ return data
+
+
+def write_html(data, output_file):
+ jdata = json.dumps(data)
+ string = f"\n\troot = {jdata};\n\t"
+
+ file_name = Path(__file__).parent / "circular_packing.html"
+ out_file = Path(output_file)
+ fc = file_name.read_text()
+ fc = re.sub(
+ r"// START-REPLACE(.*)// END-REPLACE",
+ rf"// START-REPLACE{string}// END-REPLACE",
+ fc,
+ flags=re.MULTILINE | re.DOTALL,
+ )
+ out_file.write_text(fc)
+
+
+def to_json_tree_sorted(G, root):
+ # json_graph.tree_data with sorting
+ def add_children(n, G):
+ nbrs = G[n]
+ if len(nbrs) == 0:
+ return []
+ children_ = []
+ for child in nbrs:
+ d = dict(chain(G.nodes[child].items(), [("id", child)]))
+ c = add_children(child, G)
+ if c:
+ d["children"] = c
+ children_.append(d)
+
+ children_ = sorted(children_, key=lambda x: x["id"])
+ return children_
+
+ data = dict(chain(G.nodes[root].items(), [("id", root)]))
+ data["children"] = add_children(root, G)
+ return data
+
+
+def plot_graph_circular_packing(typeset, output_file) -> None:
+ graph = typeset.base_graph.copy()
+ nx.relabel_nodes(graph, {n: str(n) for n in graph.nodes}, copy=False)
+
+ data = to_json_tree_sorted(graph, root=str(typeset.root_node))
+ data = update(data)
+ write_html(data, output_file)
+
+
+if __name__ == "__main__":
+ complete_set = CompleteSet()
+ plot_graph_circular_packing(complete_set, "circular_packing.html")
diff --git a/build/lib/visions/visualisation/plot_typesets.py b/build/lib/visions/visualisation/plot_typesets.py
new file mode 100644
index 000000000..ae886033a
--- /dev/null
+++ b/build/lib/visions/visualisation/plot_typesets.py
@@ -0,0 +1,36 @@
+from pathlib import Path
+
+from visions.typesets import CompleteSet, GeometrySet, StandardSet
+
+# Windows Note
+# Tip for Python3/64-bit compatible version of pygraphviz
+# https://github.com/CristiFati/Prebuilt-Binaries/raw/master/Windows/PyGraphviz/pygraphviz-1.5-cp37-cp37m-win_amd64.whl
+
+
+def generate_typeset_plots() -> None:
+ typesets_dir = Path("typesets/")
+ typesets_dir.mkdir(exist_ok=True)
+
+ # Initialize typeset
+ for name, tsc in [
+ ("typeset_complete", CompleteSet()),
+ ("typeset_geometry", GeometrySet()),
+ ("typeset_standard", StandardSet()),
+ ]:
+ # Write graph to dot
+ tsc.output_graph(typesets_dir / f"{name}.dot")
+
+ # Plot the graph (svg)
+ tsc.output_graph(typesets_dir / f"{name}.svg")
+ tsc.output_graph(typesets_dir / f"{name}_base.svg", base_only=True)
+
+ # Plot the graph (pdf)
+ tsc.output_graph(typesets_dir / f"{name}.pdf")
+ tsc.output_graph(typesets_dir / f"{name}_base.pdf", base_only=True)
+
+ # Plot the graph (png)
+ tsc.output_graph(typesets_dir / f"{name}.png", dpi=150)
+
+
+if __name__ == "__main__":
+ generate_typeset_plots()
diff --git a/setup.py b/setup.py
index 7d27dec34..3c5a43487 100644
--- a/setup.py
+++ b/setup.py
@@ -51,7 +51,7 @@
tests_require=test_requirements,
python_requires=">=3.8",
long_description=long_description,
- long_description_content_type="text/x-rst",
+ long_description_content_type="text/markdown",
zip_safe=False,
classifiers=[
"Programming Language :: Python :: 3",