Skip to content

Commit

Permalink
updated docs
Browse files Browse the repository at this point in the history
  • Loading branch information
ieaves committed Feb 6, 2024
1 parent 076affa commit 8f12f57
Show file tree
Hide file tree
Showing 164 changed files with 6,829 additions and 4 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ pypi_package:
check-manifest
python setup.py sdist bdist_wheel
twine check dist/*
twine upload --skip-existing dist/*
twine upload --skip-existing dist/* -u __token__

## Run black linting
lint:
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
<img src="images/visions.png" width="600px"><br>
<i>And these visions of data types, they kept us up past the dawn.</i>
</div>

<p align="center">
<a href="https://pypi.org/project/visions/">
<img src="https://pepy.tech/badge/visions" />
Expand Down Expand Up @@ -121,7 +120,8 @@ df.head(2)
</tbody>
</table>

The most important abstraction in `visions` are Types - these represent semantic notions about data. You have access to a
The most important abstraction in `visions` are Types - these represent semantic notions about data. You have access to
a
range of well tested types like `Integer`, `Float`, and `Files` covering the most common software development use cases.
Types can be bundled together into typesets. Behind the scenes, `visions` builds a traversable graph for any collection
of types.
Expand Down
15 changes: 15 additions & 0 deletions build/lib/visions/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""Core functionality"""

from visions import types, typesets, utils
from visions.backends import *
from visions.declarative import create_type
from visions.functional import (
cast_to_detected,
cast_to_inferred,
detect_type,
infer_type,
)
from visions.types import *
from visions.typesets import *

__version__ = "0.7.6"
45 changes: 45 additions & 0 deletions build/lib/visions/backends/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import logging

logger = logging.getLogger(__name__)


try:
import pandas as pd

import visions.backends.pandas
from visions.backends.pandas.test_utils import pandas_version

if pandas_version[0] < 1:
from visions.dtypes.boolean import BoolDtype
logger.info(f"Pandas backend loaded {pd.__version__}")

except ImportError:
logger.info("Pandas backend NOT loaded")


try:
import numpy as np

import visions.backends.numpy

logger.info(f"Numpy backend loaded {np.__version__}")
except ImportError:
logger.info("Numpy backend NOT loaded")


try:
import pyspark

import visions.backends.spark

logger.info(f"Pyspark backend loaded {pyspark.__version__}")
except ImportError:
logger.info("Pyspark backend NOT loaded")


try:
import visions.backends.python

logger.info("Python backend loaded")
except ImportError:
logger.info("Python backend NOT loaded")
3 changes: 3 additions & 0 deletions build/lib/visions/backends/numpy/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# https://het.as.utexas.edu/HET/Software/Numpy/reference/arrays.scalars.html
import visions.backends.numpy.types
from visions.backends.numpy.array_utils import array_handle_nulls, array_not_empty
67 changes: 67 additions & 0 deletions build/lib/visions/backends/numpy/array_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import functools
from typing import Callable, Sequence, Tuple, TypeVar, Union

import numpy as np

from visions.backends.shared.nan_handling import nan_mask
from visions.backends.shared.utilities import has_import

has_numba = has_import("numba")

if has_numba:
import numba as nb

T = TypeVar("T")


def array_handle_nulls(fn: Callable[..., bool]) -> Callable[..., bool]:
"""Decorator for nullable arrays"""

handles_missing = array_not_empty(fn)

@functools.wraps(fn)
def inner(array: np.ndarray, *args, **kwargs) -> bool:
array = array[nan_mask(array)]
return handles_missing(array, *args, **kwargs)

return inner


def array_not_empty(fn: Callable[..., bool]) -> Callable[..., bool]:
"""Decorator to exclude empty arrays"""

@functools.wraps(fn)
def inner(array: np.ndarray, *args, **kwargs) -> bool:
if array.shape[0] == 0:
return False
return fn(array, *args, **kwargs)

return inner


def _base_all_type(array: np.ndarray, dtypes: Union[type, Tuple[type, ...]]) -> bool:
return all(isinstance(v, dtypes) for v in array)


if has_numba:
# TODO: This only works when the numpy array dtype falls under a few categories
# There are alternative implementations with forceobj=True which work in all cases
# including the use of isinstance, but in those cases worst case performance can be substantially worse
# than the default python implementation.
def all_type_numba(dtype: Union[Tuple, T]):
@nb.jit(nopython=True)
def inner(array: np.ndarray) -> bool:
for i in nb.prange(array.size):
if type(array[i]) is not dtype:
return False
return True

return inner

def all_type(array: np.ndarray, dtypes: Union[type, Tuple[type, ...]]) -> bool:
return _base_all_type(array, dtypes)

else:

def all_type(array: np.ndarray, dtypes: Union[type, Tuple[type, ...]]) -> bool:
return _base_all_type(array, dtypes)
61 changes: 61 additions & 0 deletions build/lib/visions/backends/numpy/sequences.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from typing import Dict, Sequence
from urllib.parse import urlparse

import numpy as np


def get_sequences() -> Dict[str, Sequence]:
sequences = {
"complex_series_float": [
complex(0, 0),
complex(1, 0),
complex(3, 0),
complex(-1, 0),
],
"url_nan_series": [
urlparse("http://www.cwi.nl:80/%7Eguido/Python.html"),
urlparse("https://github.com/dylan-profiling/hurricane"),
np.nan,
],
"mixed": [True, False, np.nan],
"float_nan_series": [1.0, 2.5, np.nan],
"float_series5": [np.nan, 1.2],
"float_with_inf": [np.inf, np.NINF, np.PINF, 1000000.0, 5.5],
"inf_series": [np.inf, np.NINF, np.Infinity, np.PINF],
"int_nan_series": [1, 2, np.nan],
"nan_series": [np.nan],
"nan_series_2": [np.nan, np.nan, np.nan, np.nan],
"string_num_nan": ["1.0", "2.0", np.nan],
"string_with_sep_num_nan": ["1,000.0", "2.1", np.nan],
"string_flt_nan": ["1.0", "45.67", np.nan],
"string_str_nan": [
"I was only robbing the register,",
"I hope you understand",
"One of us had better call up the cops",
"In the hot New Jersey night",
np.nan,
],
"float_series3": np.array([1.2, 2, 3, 4], dtype=np.float64),
"np_uint32": np.array([1, 2, 3, 4], dtype=np.uint32),
"string_np_unicode_series": np.array(["upper", "hall"], dtype=np.unicode_),
"complex_series": [
complex(0, 0),
complex(1, 2),
complex(3, -1),
],
"bool_series3": np.array([1, 0, 0, 1], dtype=np.bool_),
"complex_series_nan": [complex(0, 0), complex(1, 2), complex(3, -1), None],
"complex_series_nan_2": [
complex(0, 0),
complex(1, 2),
complex(3, -1),
np.nan,
],
"complex_series_py_nan": [
complex(0, 0),
complex(1, 2),
complex(3, -1),
np.nan,
],
}
return sequences
172 changes: 172 additions & 0 deletions build/lib/visions/backends/numpy/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
"""
A selection of testing utilities for visions.
"""

import functools
from typing import Callable, Dict, List, Optional, Type, Union

import numpy as np

from visions.backends.numpy.array_utils import array_handle_nulls


def option_coercion_evaluator(
fn: Callable[[np.ndarray], np.ndarray],
extra_errors: Optional[List[Type[Exception]]] = None,
) -> Callable[[np.ndarray], Optional[np.ndarray]]:
"""A coercion test evaluator
Evaluates a coercion function and optionally returns the coerced array.
Args:
fn: A function coercing a array to another array.
extra_errors: Additional exceptions to catch
Returns:
The coerced array if the coercion succeeds otherwise None.
"""

error_list = [ValueError, TypeError, AttributeError]
if extra_errors:
error_list.extend(extra_errors)

@functools.wraps(fn)
def f(array: np.ndarray) -> Optional[np.ndarray]:
try:
return fn(array)
except tuple(error_list):
return None

return f


def coercion_test(
fn: Callable[[np.ndarray], np.ndarray],
extra_errors: Optional[List[Type[Exception]]] = None,
) -> Callable[[np.ndarray], bool]:
"""A coercion test generator
Creates a coercion test based on a provided coercion function.
Args:
fn: A function coercing a array to another type.
extra_errors: Additional exceptions to catch
Returns:
Whether the coercion failed or was successful.
"""
# Returns True or False if the coercion succeeds
tester = option_coercion_evaluator(fn, extra_errors)

@functools.wraps(fn)
def f(array: np.ndarray) -> bool:
result = tester(array)
return True if result is not None else False

return f


def coercion_true_test(
fn: Callable[[np.ndarray], np.ndarray],
extra_errors: Optional[List[Type[Exception]]] = None,
) -> Callable[[np.ndarray], bool]:
"""A coercion equality test generator
Creates a coercion test based on a provided coercion function which also enforces
equality constraints on the output. This is useful when you want to change the
data type of a array without necessarily changing the data, for example,
when converting an integer to a float.
Args:
fn: A function coercing a array to another type.
extra_errors: Additional exceptions to catch
Returns:
Whether the coercion failed or was successful.
"""
tester = option_coercion_evaluator(fn, extra_errors)

@functools.wraps(tester)
def f(array: np.ndarray) -> bool:
result = tester(array)
return False if result is None else array.all()

return f


def coercion_equality_test(
fn: Callable[[np.ndarray], np.ndarray]
) -> Callable[[np.ndarray], bool]:
"""A coercion equality test generator
Creates a coercion test based on a provided coercion function which also enforces
equality constraints on the output. This is useful when you want to change the
data type of a array without necessarily changing the data, for example,
when converting an integer to a float.
Args:
fn: A function coercing a array to another type.
Returns:
Whether the coercion failed or was successful.
"""
tester = option_coercion_evaluator(fn)

@functools.wraps(tester)
def f(array: np.ndarray) -> bool:
result = tester(array)
return False if result is None else np.array_equal(array, result)

return f


def coercion_single_map_test(mapping: List[Dict]) -> Callable[[np.ndarray, Dict], bool]:
@array_handle_nulls
def f(array: np.ndarray, state: dict = {}) -> bool:
return any(
np.isin(array, list(single_map.keys())).all() for single_map in mapping
)

return f


def coercion_multi_map_test(mapping: Dict) -> Callable[[np.ndarray, Dict], bool]:
@array_handle_nulls
def f(array: np.ndarray, state: dict = {}) -> bool:
return np.isin(array, list(mapping.keys())).all()

return f


def coercion_map_test(
mapping: Union[List[Dict], Dict]
) -> Callable[[np.ndarray, Dict], bool]:
"""Create a testing function for a single mapping or a list of mappings.
Args:
mapping: A dict with a mapping or a list of dicts
Returns:
Callable that checks if a array consists of the mappable values
Examples:
>>> coercion_map_test({"Yes": True, "No": False})
>>> coercion_map_test(
>>> [
>>> {"Yes": True, "No": False},
>>> {"Y": True, "N": False},
>>> ]
>>> )
"""

if isinstance(mapping, list):
f = coercion_single_map_test(mapping)
elif isinstance(mapping, dict):
f = coercion_multi_map_test(mapping)
else:
raise ValueError("Mapping should be dict or list of dicts")
return f


def coercion_map(
mapping: Union[List[Dict], Dict]
) -> Callable[[np.ndarray], np.ndarray]:
"""Maps a array given a mapping
Args:
mapping: a dict to map, or a list of dicts.
Returns:
A callable that maps the array.
"""
if isinstance(mapping, list):
mapping = {k: v for d in mapping for k, v in d.items()}
elif not isinstance(mapping, dict):
raise ValueError("Mapping should be dict or list of dicts")

f = np.vectorize(lambda value: mapping.get(value, np.nan))

return f
Loading

0 comments on commit 8f12f57

Please sign in to comment.