-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
164 changed files
with
6,829 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
"""Core functionality""" | ||
|
||
from visions import types, typesets, utils | ||
from visions.backends import * | ||
from visions.declarative import create_type | ||
from visions.functional import ( | ||
cast_to_detected, | ||
cast_to_inferred, | ||
detect_type, | ||
infer_type, | ||
) | ||
from visions.types import * | ||
from visions.typesets import * | ||
|
||
__version__ = "0.7.6" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import logging | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
try: | ||
import pandas as pd | ||
|
||
import visions.backends.pandas | ||
from visions.backends.pandas.test_utils import pandas_version | ||
|
||
if pandas_version[0] < 1: | ||
from visions.dtypes.boolean import BoolDtype | ||
logger.info(f"Pandas backend loaded {pd.__version__}") | ||
|
||
except ImportError: | ||
logger.info("Pandas backend NOT loaded") | ||
|
||
|
||
try: | ||
import numpy as np | ||
|
||
import visions.backends.numpy | ||
|
||
logger.info(f"Numpy backend loaded {np.__version__}") | ||
except ImportError: | ||
logger.info("Numpy backend NOT loaded") | ||
|
||
|
||
try: | ||
import pyspark | ||
|
||
import visions.backends.spark | ||
|
||
logger.info(f"Pyspark backend loaded {pyspark.__version__}") | ||
except ImportError: | ||
logger.info("Pyspark backend NOT loaded") | ||
|
||
|
||
try: | ||
import visions.backends.python | ||
|
||
logger.info("Python backend loaded") | ||
except ImportError: | ||
logger.info("Python backend NOT loaded") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# https://het.as.utexas.edu/HET/Software/Numpy/reference/arrays.scalars.html | ||
import visions.backends.numpy.types | ||
from visions.backends.numpy.array_utils import array_handle_nulls, array_not_empty |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
import functools | ||
from typing import Callable, Sequence, Tuple, TypeVar, Union | ||
|
||
import numpy as np | ||
|
||
from visions.backends.shared.nan_handling import nan_mask | ||
from visions.backends.shared.utilities import has_import | ||
|
||
has_numba = has_import("numba") | ||
|
||
if has_numba: | ||
import numba as nb | ||
|
||
T = TypeVar("T") | ||
|
||
|
||
def array_handle_nulls(fn: Callable[..., bool]) -> Callable[..., bool]: | ||
"""Decorator for nullable arrays""" | ||
|
||
handles_missing = array_not_empty(fn) | ||
|
||
@functools.wraps(fn) | ||
def inner(array: np.ndarray, *args, **kwargs) -> bool: | ||
array = array[nan_mask(array)] | ||
return handles_missing(array, *args, **kwargs) | ||
|
||
return inner | ||
|
||
|
||
def array_not_empty(fn: Callable[..., bool]) -> Callable[..., bool]: | ||
"""Decorator to exclude empty arrays""" | ||
|
||
@functools.wraps(fn) | ||
def inner(array: np.ndarray, *args, **kwargs) -> bool: | ||
if array.shape[0] == 0: | ||
return False | ||
return fn(array, *args, **kwargs) | ||
|
||
return inner | ||
|
||
|
||
def _base_all_type(array: np.ndarray, dtypes: Union[type, Tuple[type, ...]]) -> bool: | ||
return all(isinstance(v, dtypes) for v in array) | ||
|
||
|
||
if has_numba: | ||
# TODO: This only works when the numpy array dtype falls under a few categories | ||
# There are alternative implementations with forceobj=True which work in all cases | ||
# including the use of isinstance, but in those cases worst case performance can be substantially worse | ||
# than the default python implementation. | ||
def all_type_numba(dtype: Union[Tuple, T]): | ||
@nb.jit(nopython=True) | ||
def inner(array: np.ndarray) -> bool: | ||
for i in nb.prange(array.size): | ||
if type(array[i]) is not dtype: | ||
return False | ||
return True | ||
|
||
return inner | ||
|
||
def all_type(array: np.ndarray, dtypes: Union[type, Tuple[type, ...]]) -> bool: | ||
return _base_all_type(array, dtypes) | ||
|
||
else: | ||
|
||
def all_type(array: np.ndarray, dtypes: Union[type, Tuple[type, ...]]) -> bool: | ||
return _base_all_type(array, dtypes) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
from typing import Dict, Sequence | ||
from urllib.parse import urlparse | ||
|
||
import numpy as np | ||
|
||
|
||
def get_sequences() -> Dict[str, Sequence]: | ||
sequences = { | ||
"complex_series_float": [ | ||
complex(0, 0), | ||
complex(1, 0), | ||
complex(3, 0), | ||
complex(-1, 0), | ||
], | ||
"url_nan_series": [ | ||
urlparse("http://www.cwi.nl:80/%7Eguido/Python.html"), | ||
urlparse("https://github.com/dylan-profiling/hurricane"), | ||
np.nan, | ||
], | ||
"mixed": [True, False, np.nan], | ||
"float_nan_series": [1.0, 2.5, np.nan], | ||
"float_series5": [np.nan, 1.2], | ||
"float_with_inf": [np.inf, np.NINF, np.PINF, 1000000.0, 5.5], | ||
"inf_series": [np.inf, np.NINF, np.Infinity, np.PINF], | ||
"int_nan_series": [1, 2, np.nan], | ||
"nan_series": [np.nan], | ||
"nan_series_2": [np.nan, np.nan, np.nan, np.nan], | ||
"string_num_nan": ["1.0", "2.0", np.nan], | ||
"string_with_sep_num_nan": ["1,000.0", "2.1", np.nan], | ||
"string_flt_nan": ["1.0", "45.67", np.nan], | ||
"string_str_nan": [ | ||
"I was only robbing the register,", | ||
"I hope you understand", | ||
"One of us had better call up the cops", | ||
"In the hot New Jersey night", | ||
np.nan, | ||
], | ||
"float_series3": np.array([1.2, 2, 3, 4], dtype=np.float64), | ||
"np_uint32": np.array([1, 2, 3, 4], dtype=np.uint32), | ||
"string_np_unicode_series": np.array(["upper", "hall"], dtype=np.unicode_), | ||
"complex_series": [ | ||
complex(0, 0), | ||
complex(1, 2), | ||
complex(3, -1), | ||
], | ||
"bool_series3": np.array([1, 0, 0, 1], dtype=np.bool_), | ||
"complex_series_nan": [complex(0, 0), complex(1, 2), complex(3, -1), None], | ||
"complex_series_nan_2": [ | ||
complex(0, 0), | ||
complex(1, 2), | ||
complex(3, -1), | ||
np.nan, | ||
], | ||
"complex_series_py_nan": [ | ||
complex(0, 0), | ||
complex(1, 2), | ||
complex(3, -1), | ||
np.nan, | ||
], | ||
} | ||
return sequences |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
""" | ||
A selection of testing utilities for visions. | ||
""" | ||
|
||
import functools | ||
from typing import Callable, Dict, List, Optional, Type, Union | ||
|
||
import numpy as np | ||
|
||
from visions.backends.numpy.array_utils import array_handle_nulls | ||
|
||
|
||
def option_coercion_evaluator( | ||
fn: Callable[[np.ndarray], np.ndarray], | ||
extra_errors: Optional[List[Type[Exception]]] = None, | ||
) -> Callable[[np.ndarray], Optional[np.ndarray]]: | ||
"""A coercion test evaluator | ||
Evaluates a coercion function and optionally returns the coerced array. | ||
Args: | ||
fn: A function coercing a array to another array. | ||
extra_errors: Additional exceptions to catch | ||
Returns: | ||
The coerced array if the coercion succeeds otherwise None. | ||
""" | ||
|
||
error_list = [ValueError, TypeError, AttributeError] | ||
if extra_errors: | ||
error_list.extend(extra_errors) | ||
|
||
@functools.wraps(fn) | ||
def f(array: np.ndarray) -> Optional[np.ndarray]: | ||
try: | ||
return fn(array) | ||
except tuple(error_list): | ||
return None | ||
|
||
return f | ||
|
||
|
||
def coercion_test( | ||
fn: Callable[[np.ndarray], np.ndarray], | ||
extra_errors: Optional[List[Type[Exception]]] = None, | ||
) -> Callable[[np.ndarray], bool]: | ||
"""A coercion test generator | ||
Creates a coercion test based on a provided coercion function. | ||
Args: | ||
fn: A function coercing a array to another type. | ||
extra_errors: Additional exceptions to catch | ||
Returns: | ||
Whether the coercion failed or was successful. | ||
""" | ||
# Returns True or False if the coercion succeeds | ||
tester = option_coercion_evaluator(fn, extra_errors) | ||
|
||
@functools.wraps(fn) | ||
def f(array: np.ndarray) -> bool: | ||
result = tester(array) | ||
return True if result is not None else False | ||
|
||
return f | ||
|
||
|
||
def coercion_true_test( | ||
fn: Callable[[np.ndarray], np.ndarray], | ||
extra_errors: Optional[List[Type[Exception]]] = None, | ||
) -> Callable[[np.ndarray], bool]: | ||
"""A coercion equality test generator | ||
Creates a coercion test based on a provided coercion function which also enforces | ||
equality constraints on the output. This is useful when you want to change the | ||
data type of a array without necessarily changing the data, for example, | ||
when converting an integer to a float. | ||
Args: | ||
fn: A function coercing a array to another type. | ||
extra_errors: Additional exceptions to catch | ||
Returns: | ||
Whether the coercion failed or was successful. | ||
""" | ||
tester = option_coercion_evaluator(fn, extra_errors) | ||
|
||
@functools.wraps(tester) | ||
def f(array: np.ndarray) -> bool: | ||
result = tester(array) | ||
return False if result is None else array.all() | ||
|
||
return f | ||
|
||
|
||
def coercion_equality_test( | ||
fn: Callable[[np.ndarray], np.ndarray] | ||
) -> Callable[[np.ndarray], bool]: | ||
"""A coercion equality test generator | ||
Creates a coercion test based on a provided coercion function which also enforces | ||
equality constraints on the output. This is useful when you want to change the | ||
data type of a array without necessarily changing the data, for example, | ||
when converting an integer to a float. | ||
Args: | ||
fn: A function coercing a array to another type. | ||
Returns: | ||
Whether the coercion failed or was successful. | ||
""" | ||
tester = option_coercion_evaluator(fn) | ||
|
||
@functools.wraps(tester) | ||
def f(array: np.ndarray) -> bool: | ||
result = tester(array) | ||
return False if result is None else np.array_equal(array, result) | ||
|
||
return f | ||
|
||
|
||
def coercion_single_map_test(mapping: List[Dict]) -> Callable[[np.ndarray, Dict], bool]: | ||
@array_handle_nulls | ||
def f(array: np.ndarray, state: dict = {}) -> bool: | ||
return any( | ||
np.isin(array, list(single_map.keys())).all() for single_map in mapping | ||
) | ||
|
||
return f | ||
|
||
|
||
def coercion_multi_map_test(mapping: Dict) -> Callable[[np.ndarray, Dict], bool]: | ||
@array_handle_nulls | ||
def f(array: np.ndarray, state: dict = {}) -> bool: | ||
return np.isin(array, list(mapping.keys())).all() | ||
|
||
return f | ||
|
||
|
||
def coercion_map_test( | ||
mapping: Union[List[Dict], Dict] | ||
) -> Callable[[np.ndarray, Dict], bool]: | ||
"""Create a testing function for a single mapping or a list of mappings. | ||
Args: | ||
mapping: A dict with a mapping or a list of dicts | ||
Returns: | ||
Callable that checks if a array consists of the mappable values | ||
Examples: | ||
>>> coercion_map_test({"Yes": True, "No": False}) | ||
>>> coercion_map_test( | ||
>>> [ | ||
>>> {"Yes": True, "No": False}, | ||
>>> {"Y": True, "N": False}, | ||
>>> ] | ||
>>> ) | ||
""" | ||
|
||
if isinstance(mapping, list): | ||
f = coercion_single_map_test(mapping) | ||
elif isinstance(mapping, dict): | ||
f = coercion_multi_map_test(mapping) | ||
else: | ||
raise ValueError("Mapping should be dict or list of dicts") | ||
return f | ||
|
||
|
||
def coercion_map( | ||
mapping: Union[List[Dict], Dict] | ||
) -> Callable[[np.ndarray], np.ndarray]: | ||
"""Maps a array given a mapping | ||
Args: | ||
mapping: a dict to map, or a list of dicts. | ||
Returns: | ||
A callable that maps the array. | ||
""" | ||
if isinstance(mapping, list): | ||
mapping = {k: v for d in mapping for k, v in d.items()} | ||
elif not isinstance(mapping, dict): | ||
raise ValueError("Mapping should be dict or list of dicts") | ||
|
||
f = np.vectorize(lambda value: mapping.get(value, np.nan)) | ||
|
||
return f |
Oops, something went wrong.