updated docs

dylan-profiler · Feb 6, 2024 · 8f12f57 · 8f12f57
1 parent 076affa
commit 8f12f57
Show file tree

Hide file tree

Showing 164 changed files with 6,829 additions and 4 deletions.
diff --git a/Makefile b/Makefile
@@ -18,7 +18,7 @@ pypi_package:
 	check-manifest
 	python setup.py sdist bdist_wheel
 	twine check dist/*
-	twine upload --skip-existing dist/*
+	twine upload --skip-existing dist/* -u __token__
 
 ## Run black linting
 lint:

diff --git a/README.md b/README.md
@@ -2,7 +2,6 @@
   <img src="images/visions.png" width="600px"><br>
   <i>And these visions of data types, they kept us up past the dawn.</i> 
 </div>
-
 <p align="center">
   <a href="https://pypi.org/project/visions/">
     <img src="https://pepy.tech/badge/visions" />
@@ -121,7 +120,8 @@ df.head(2)
   </tbody>
 </table>
 
-The most important abstraction in `visions` are Types - these represent semantic notions about data. You have access to a
+The most important abstraction in `visions` are Types - these represent semantic notions about data. You have access to
+a
 range of well tested types like `Integer`, `Float`, and `Files` covering the most common software development use cases.
 Types can be bundled together into typesets. Behind the scenes, `visions` builds a traversable graph for any collection
 of types.

diff --git a/build/lib/visions/__init__.py b/build/lib/visions/__init__.py
@@ -0,0 +1,15 @@
+"""Core functionality"""
+
+from visions import types, typesets, utils
+from visions.backends import *
+from visions.declarative import create_type
+from visions.functional import (
+    cast_to_detected,
+    cast_to_inferred,
+    detect_type,
+    infer_type,
+)
+from visions.types import *
+from visions.typesets import *
+
+__version__ = "0.7.6"
diff --git a/build/lib/visions/backends/__init__.py b/build/lib/visions/backends/__init__.py
@@ -0,0 +1,45 @@
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+try:
+    import pandas as pd
+
+    import visions.backends.pandas
+    from visions.backends.pandas.test_utils import pandas_version
+
+    if pandas_version[0] < 1:
+        from visions.dtypes.boolean import BoolDtype
+    logger.info(f"Pandas backend loaded {pd.__version__}")
+
+except ImportError:
+    logger.info("Pandas backend NOT loaded")
+
+
+try:
+    import numpy as np
+
+    import visions.backends.numpy
+
+    logger.info(f"Numpy backend loaded {np.__version__}")
+except ImportError:
+    logger.info("Numpy backend NOT loaded")
+
+
+try:
+    import pyspark
+
+    import visions.backends.spark
+
+    logger.info(f"Pyspark backend loaded {pyspark.__version__}")
+except ImportError:
+    logger.info("Pyspark backend NOT loaded")
+
+
+try:
+    import visions.backends.python
+
+    logger.info("Python backend loaded")
+except ImportError:
+    logger.info("Python backend NOT loaded")
diff --git a/build/lib/visions/backends/numpy/__init__.py b/build/lib/visions/backends/numpy/__init__.py
@@ -0,0 +1,3 @@
+# https://het.as.utexas.edu/HET/Software/Numpy/reference/arrays.scalars.html
+import visions.backends.numpy.types
+from visions.backends.numpy.array_utils import array_handle_nulls, array_not_empty
diff --git a/build/lib/visions/backends/numpy/array_utils.py b/build/lib/visions/backends/numpy/array_utils.py
@@ -0,0 +1,67 @@
+import functools
+from typing import Callable, Sequence, Tuple, TypeVar, Union
+
+import numpy as np
+
+from visions.backends.shared.nan_handling import nan_mask
+from visions.backends.shared.utilities import has_import
+
+has_numba = has_import("numba")
+
+if has_numba:
+    import numba as nb
+
+T = TypeVar("T")
+
+
+def array_handle_nulls(fn: Callable[..., bool]) -> Callable[..., bool]:
+    """Decorator for nullable arrays"""
+
+    handles_missing = array_not_empty(fn)
+
+    @functools.wraps(fn)
+    def inner(array: np.ndarray, *args, **kwargs) -> bool:
+        array = array[nan_mask(array)]
+        return handles_missing(array, *args, **kwargs)
+
+    return inner
+
+
+def array_not_empty(fn: Callable[..., bool]) -> Callable[..., bool]:
+    """Decorator to exclude empty arrays"""
+
+    @functools.wraps(fn)
+    def inner(array: np.ndarray, *args, **kwargs) -> bool:
+        if array.shape[0] == 0:
+            return False
+        return fn(array, *args, **kwargs)
+
+    return inner
+
+
+def _base_all_type(array: np.ndarray, dtypes: Union[type, Tuple[type, ...]]) -> bool:
+    return all(isinstance(v, dtypes) for v in array)
+
+
+if has_numba:
+    # TODO: This only works when the numpy array dtype falls under a few categories
+    # There are alternative implementations with forceobj=True which work in all cases
+    # including the use of isinstance, but in those cases worst case performance can be substantially worse
+    # than the default python implementation.
+    def all_type_numba(dtype: Union[Tuple, T]):
+        @nb.jit(nopython=True)
+        def inner(array: np.ndarray) -> bool:
+            for i in nb.prange(array.size):
+                if type(array[i]) is not dtype:
+                    return False
+            return True
+
+        return inner
+
+    def all_type(array: np.ndarray, dtypes: Union[type, Tuple[type, ...]]) -> bool:
+        return _base_all_type(array, dtypes)
+
+else:
+
+    def all_type(array: np.ndarray, dtypes: Union[type, Tuple[type, ...]]) -> bool:
+        return _base_all_type(array, dtypes)
diff --git a/build/lib/visions/backends/numpy/sequences.py b/build/lib/visions/backends/numpy/sequences.py
@@ -0,0 +1,61 @@
+from typing import Dict, Sequence
+from urllib.parse import urlparse
+
+import numpy as np
+
+
+def get_sequences() -> Dict[str, Sequence]:
+    sequences = {
+        "complex_series_float": [
+            complex(0, 0),
+            complex(1, 0),
+            complex(3, 0),
+            complex(-1, 0),
+        ],
+        "url_nan_series": [
+            urlparse("http://www.cwi.nl:80/%7Eguido/Python.html"),
+            urlparse("https://github.com/dylan-profiling/hurricane"),
+            np.nan,
+        ],
+        "mixed": [True, False, np.nan],
+        "float_nan_series": [1.0, 2.5, np.nan],
+        "float_series5": [np.nan, 1.2],
+        "float_with_inf": [np.inf, np.NINF, np.PINF, 1000000.0, 5.5],
+        "inf_series": [np.inf, np.NINF, np.Infinity, np.PINF],
+        "int_nan_series": [1, 2, np.nan],
+        "nan_series": [np.nan],
+        "nan_series_2": [np.nan, np.nan, np.nan, np.nan],
+        "string_num_nan": ["1.0", "2.0", np.nan],
+        "string_with_sep_num_nan": ["1,000.0", "2.1", np.nan],
+        "string_flt_nan": ["1.0", "45.67", np.nan],
+        "string_str_nan": [
+            "I was only robbing the register,",
+            "I hope you understand",
+            "One of us had better call up the cops",
+            "In the hot New Jersey night",
+            np.nan,
+        ],
+        "float_series3": np.array([1.2, 2, 3, 4], dtype=np.float64),
+        "np_uint32": np.array([1, 2, 3, 4], dtype=np.uint32),
+        "string_np_unicode_series": np.array(["upper", "hall"], dtype=np.unicode_),
+        "complex_series": [
+            complex(0, 0),
+            complex(1, 2),
+            complex(3, -1),
+        ],
+        "bool_series3": np.array([1, 0, 0, 1], dtype=np.bool_),
+        "complex_series_nan": [complex(0, 0), complex(1, 2), complex(3, -1), None],
+        "complex_series_nan_2": [
+            complex(0, 0),
+            complex(1, 2),
+            complex(3, -1),
+            np.nan,
+        ],
+        "complex_series_py_nan": [
+            complex(0, 0),
+            complex(1, 2),
+            complex(3, -1),
+            np.nan,
+        ],
+    }
+    return sequences
diff --git a/build/lib/visions/backends/numpy/test_utils.py b/build/lib/visions/backends/numpy/test_utils.py
@@ -0,0 +1,172 @@
+"""
+A selection of testing utilities for visions.
+"""
+
+import functools
+from typing import Callable, Dict, List, Optional, Type, Union
+
+import numpy as np
+
+from visions.backends.numpy.array_utils import array_handle_nulls
+
+
+def option_coercion_evaluator(
+    fn: Callable[[np.ndarray], np.ndarray],
+    extra_errors: Optional[List[Type[Exception]]] = None,
+) -> Callable[[np.ndarray], Optional[np.ndarray]]:
+    """A coercion test evaluator
+    Evaluates a coercion function and optionally returns the coerced array.
+    Args:
+        fn: A function coercing a array to another array.
+        extra_errors: Additional exceptions to catch
+    Returns:
+        The coerced array if the coercion succeeds otherwise None.
+    """
+
+    error_list = [ValueError, TypeError, AttributeError]
+    if extra_errors:
+        error_list.extend(extra_errors)
+
+    @functools.wraps(fn)
+    def f(array: np.ndarray) -> Optional[np.ndarray]:
+        try:
+            return fn(array)
+        except tuple(error_list):
+            return None
+
+    return f
+
+
+def coercion_test(
+    fn: Callable[[np.ndarray], np.ndarray],
+    extra_errors: Optional[List[Type[Exception]]] = None,
+) -> Callable[[np.ndarray], bool]:
+    """A coercion test generator
+    Creates a coercion test based on a provided coercion function.
+    Args:
+        fn: A function coercing a array to another type.
+        extra_errors: Additional exceptions to catch
+    Returns:
+        Whether the coercion failed or was successful.
+    """
+    # Returns True or False if the coercion succeeds
+    tester = option_coercion_evaluator(fn, extra_errors)
+
+    @functools.wraps(fn)
+    def f(array: np.ndarray) -> bool:
+        result = tester(array)
+        return True if result is not None else False
+
+    return f
+
+
+def coercion_true_test(
+    fn: Callable[[np.ndarray], np.ndarray],
+    extra_errors: Optional[List[Type[Exception]]] = None,
+) -> Callable[[np.ndarray], bool]:
+    """A coercion equality test generator
+    Creates a coercion test based on a provided coercion function which also enforces
+    equality constraints on the output. This is useful when you want to change the
+    data type of a array without necessarily changing the data, for example,
+    when converting an integer to a float.
+    Args:
+        fn: A function coercing a array to another type.
+        extra_errors: Additional exceptions to catch
+    Returns:
+        Whether the coercion failed or was successful.
+    """
+    tester = option_coercion_evaluator(fn, extra_errors)
+
+    @functools.wraps(tester)
+    def f(array: np.ndarray) -> bool:
+        result = tester(array)
+        return False if result is None else array.all()
+
+    return f
+
+
+def coercion_equality_test(
+    fn: Callable[[np.ndarray], np.ndarray]
+) -> Callable[[np.ndarray], bool]:
+    """A coercion equality test generator
+    Creates a coercion test based on a provided coercion function which also enforces
+    equality constraints on the output. This is useful when you want to change the
+    data type of a array without necessarily changing the data, for example,
+    when converting an integer to a float.
+    Args:
+        fn: A function coercing a array to another type.
+    Returns:
+        Whether the coercion failed or was successful.
+    """
+    tester = option_coercion_evaluator(fn)
+
+    @functools.wraps(tester)
+    def f(array: np.ndarray) -> bool:
+        result = tester(array)
+        return False if result is None else np.array_equal(array, result)
+
+    return f
+
+
+def coercion_single_map_test(mapping: List[Dict]) -> Callable[[np.ndarray, Dict], bool]:
+    @array_handle_nulls
+    def f(array: np.ndarray, state: dict = {}) -> bool:
+        return any(
+            np.isin(array, list(single_map.keys())).all() for single_map in mapping
+        )
+
+    return f
+
+
+def coercion_multi_map_test(mapping: Dict) -> Callable[[np.ndarray, Dict], bool]:
+    @array_handle_nulls
+    def f(array: np.ndarray, state: dict = {}) -> bool:
+        return np.isin(array, list(mapping.keys())).all()
+
+    return f
+
+
+def coercion_map_test(
+    mapping: Union[List[Dict], Dict]
+) -> Callable[[np.ndarray, Dict], bool]:
+    """Create a testing function for a single mapping or a list of mappings.
+    Args:
+        mapping: A dict with a mapping or a list of dicts
+    Returns:
+        Callable that checks if a array consists of the mappable values
+    Examples:
+        >>> coercion_map_test({"Yes": True, "No": False})
+        >>> coercion_map_test(
+        >>>     [
+        >>>         {"Yes": True, "No": False},
+        >>>         {"Y": True, "N": False},
+        >>>     ]
+        >>> )
+    """
+
+    if isinstance(mapping, list):
+        f = coercion_single_map_test(mapping)
+    elif isinstance(mapping, dict):
+        f = coercion_multi_map_test(mapping)
+    else:
+        raise ValueError("Mapping should be dict or list of dicts")
+    return f
+
+
+def coercion_map(
+    mapping: Union[List[Dict], Dict]
+) -> Callable[[np.ndarray], np.ndarray]:
+    """Maps a array given a mapping
+    Args:
+        mapping: a dict to map, or a list of dicts.
+    Returns:
+        A callable that maps the array.
+    """
+    if isinstance(mapping, list):
+        mapping = {k: v for d in mapping for k, v in d.items()}
+    elif not isinstance(mapping, dict):
+        raise ValueError("Mapping should be dict or list of dicts")
+
+    f = np.vectorize(lambda value: mapping.get(value, np.nan))
+
+    return f