Skip to content

Commit

Permalink
0.7.6 (#202)
Browse files Browse the repository at this point in the history
* bump

* old

* 3.10

* lint

* lint

* removed 3.8 from tests

* remove mypy from precommit

* flake8 complaint

* black

* test updates

* "lint

* numpy differences

* lint

* integer->float

* integer->float

* pandas numpy roundtripping variation

* lint
  • Loading branch information
ieaves authored Feb 6, 2024
1 parent c91881d commit 5fe9dd0
Show file tree
Hide file tree
Showing 29 changed files with 231 additions and 178 deletions.
30 changes: 15 additions & 15 deletions .github/workflows/pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,47 +2,47 @@ name: Deploy to PyPi

on:
release:
types: [created]
types: [ created ]

jobs:
release:
if: github.event_name == 'release' && github.event.action == 'created'
name: PyPi Release
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
name: Checkout repo

- name: Set up Python 3.8
- name: Set up Python 3.12
uses: actions/setup-python@v1
with:
python-version: 3.8

python-version: 3.12
- uses: actions/cache@v1
name: Cache pip dependencies
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('requirements*.txt') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install pip dependencies
run: |
pip install --upgrade pip
pip install -r requirements.txt
pip install -r requirements_dev.txt
pip install -r requirements_test.txt
pip install --upgrade pip
pip install -r requirements.txt
pip install -r requirements_dev.txt
pip install -r requirements_test.txt
- name: Install
run: make install

- name: Make distribution
run: |
check-manifest
python setup.py sdist bdist_wheel
twine check dist/*
- name: Publish a Python distribution to PyPI
uses: pypa/gh-action-pypi-publish@master
with:
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ jobs:
test:
strategy:
matrix:
os: [ubuntu-latest]
python: [3.7, 3.8, 3.9]
os: [ ubuntu-latest ]
python: [ 3.8, 3.9, "3.10", 3.11, 3.12 ]
runs-on: ${{ matrix.os }}

steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python }}
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ docsrc/source/visions/api/_autosummary/
docsrc/build/
plots/*/
playground/
.vscode/

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
25 changes: 9 additions & 16 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,22 +1,21 @@
repos:
- repo: https://github.com/asottile/pyupgrade
rev: v3.2.0
rev: v3.15.0
hooks:
- id: pyupgrade
args: ['--py36-plus','--exit-zero-even-if-changed']
- repo: https://github.com/pycqa/isort
rev: 5.10.1
rev: 5.13.2
hooks:
- id: isort
files: '.*'
args: ['--profile=black']
args: ["--profile", "black", "--filter-files"]
- repo: https://github.com/psf/black
rev: 22.10.0
rev: 24.1.1
hooks:
- id: black
language_version: python
- repo: https://gitlab.com/pycqa/flake8
rev: 3.9.2
- repo: https://github.com/PyCQA/flake8
rev: 7.0.0
hooks:
- id: flake8
# TODO: remove F40x, E772 and fix errors
Expand All @@ -26,17 +25,11 @@ repos:
# - flake8-print
- flake8-2020
- repo: https://github.com/mgedmin/check-manifest
rev: "0.48"
rev: "0.49"
hooks:
- id: check-manifest
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v0.982'
hooks:
- id: mypy
files: '^src/'
additional_dependencies:
- types-attrs
# g
- repo: https://github.com/pre-commit/pygrep-hooks
rev: v1.9.0
rev: v1.10.0
hooks:
- id: rst-backticks
5 changes: 5 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[mypy]
strict = True

[mypy-tests.*]
disable_error_code = ["empty-body"]
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
numpy
pandas>=0.25.3
numpy>=1.23.2
pandas>=2.0.0
attrs>=19.3.0
networkx>=2.4
multimethod>=1.4
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,10 @@
zip_safe=False,
classifiers=[
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
],
)
7 changes: 1 addition & 6 deletions src/visions/backends/numpy/sequences.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,7 @@ def get_sequences() -> Dict[str, Sequence]:
complex(3, -1),
],
"bool_series3": np.array([1, 0, 0, 1], dtype=np.bool_),
"complex_series_nan": [
complex(0, 0),
complex(1, 2),
complex(3, -1),
complex(np.nan, np.nan),
],
"complex_series_nan": [complex(0, 0), complex(1, 2), complex(3, -1), None],
"complex_series_nan_2": [
complex(0, 0),
complex(1, 2),
Expand Down
1 change: 1 addition & 0 deletions src/visions/backends/numpy/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
A selection of testing utilities for visions.
"""

import functools
from typing import Callable, Dict, List, Optional, Type, Union

Expand Down
3 changes: 2 additions & 1 deletion src/visions/backends/numpy/types/date_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from visions.backends.numpy import test_utils
from visions.backends.numpy.array_utils import array_handle_nulls, array_not_empty
from visions.backends.pandas.types.date_time import pandas_infer_datetime
from visions.types import DateTime, String


Expand All @@ -32,7 +33,7 @@ def string_is_datetime(array: np.ndarray, state: dict) -> bool:
@DateTime.register_transformer(String, np.ndarray)
def string_to_datetime(array: np.ndarray, state: dict) -> np.ndarray:
# return array.astype(np.datetime64)
return pd.to_datetime(array).to_numpy()
return pandas_infer_datetime(pd.Series(array), state).to_numpy()


@DateTime.contains_op.register
Expand Down
7 changes: 4 additions & 3 deletions src/visions/backends/numpy/types/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,16 @@


@Integer.register_relationship(Float, np.ndarray)
@array_handle_nulls
def float_is_integer(series: np.ndarray, state: dict) -> bool:
return (series.astype(np.int_) == series).all()
return np.all(np.mod(series[~np.isnan(series)], 1) == 0)


# TODO: The array_handle_nulls is actually removing nulls from the result. This is _far_ from ideal but there is no
# other native way to represent nullable integers in numpy
@Integer.register_transformer(Float, np.ndarray)
@array_handle_nulls
def float_to_integer(series: np.ndarray, state: dict) -> np.ndarray:
return series.astype(np.int_)
return series.astype(int)


@Integer.contains_op.register
Expand Down
2 changes: 1 addition & 1 deletion src/visions/backends/pandas/sequences.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def get_sequences() -> Dict[str, Iterable]:
pd.date_range(
start="2013-05-18 12:00:01",
periods=2,
freq="H",
freq="h",
tz="Europe/Brussels",
)
),
Expand Down
5 changes: 2 additions & 3 deletions src/visions/backends/pandas/series_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from typing import Callable

import pandas as pd
from pandas.api import types as pdt


# For future reference: get the dtype from the subtype when the series is sparse
Expand All @@ -11,7 +10,7 @@ def series_handle_sparse_dtype(fn: Callable[..., bool]) -> Callable[..., bool]:

@functools.wraps(fn)
def inner(series: pd.Series, state: dict, *args, **kwargs) -> bool:
if pdt.is_sparse(series):
if isinstance(series.dtype, pd.SparseDtype):
dtype = series.dtype.subtype
else:
dtype = series.dtype
Expand Down Expand Up @@ -43,7 +42,7 @@ def series_not_sparse(fn: Callable[..., bool]) -> Callable[..., bool]:

@functools.wraps(fn)
def inner(series: pd.Series, *args, **kwargs) -> bool:
if pdt.is_sparse(series):
if isinstance(series, pd.SparseDtype):
return False
return fn(series, *args, **kwargs)

Expand Down
5 changes: 3 additions & 2 deletions src/visions/backends/pandas/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
A selection of testing utilities for visions.
"""

import functools
from typing import Callable, Dict, List, Optional, Type, Union

Expand Down Expand Up @@ -160,10 +161,10 @@ def coercion_map(mapping: Union[List[Dict], Dict]) -> Callable[[pd.Series], pd.S
Returns:
A callable that maps the series.
"""
if type(mapping) == list:
if isinstance(mapping, list):
mapping = {k: v for d in mapping for k, v in d.items()}

elif type(mapping) != dict:
elif not isinstance(mapping, dict):
raise ValueError("Mapping should be dict or list of dicts")

def f(series: pd.Series) -> pd.Series:
Expand Down
30 changes: 23 additions & 7 deletions src/visions/backends/pandas/types/complex.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,50 @@
import math
from typing import Union

import numpy as np
import pandas as pd
from pandas.api import types as pdt

from visions.backends.pandas import test_utils
from visions.backends.pandas.series_utils import series_not_empty, series_not_sparse
from visions.backends.pandas.types.float import string_is_float
from visions.backends.shared.parallelization_engines import pandas_apply
from visions.types.complex import Complex
from visions.types.string import String


def imaginary_in_string(series: pd.Series, imaginary_indicator: tuple = ("j", "i")):
def imaginary_in_string(
series: pd.Series, imaginary_indicator: tuple = ("j", "i")
) -> bool:
return any(any(v in s for v in imaginary_indicator) for s in series)


def convert_val_to_complex(val: str) -> Union[complex, float]:
result = complex(val)
return (
np.nan if any(math.isnan(val) for val in (result.real, result.imag)) else result
)


def convert_to_complex_series(series: pd.Series) -> pd.Series:
return pandas_apply(series, convert_val_to_complex)


@Complex.register_relationship(String, pd.Series)
def string_is_complex(series: pd.Series, state: dict) -> bool:
def f(s: pd.Series) -> pd.Series:
return pandas_apply(s, complex)
coerced_series = test_utils.option_coercion_evaluator(convert_to_complex_series)(
series
)

coerced_series = test_utils.option_coercion_evaluator(f)(series)
return (
coerced_series is not None
and not string_is_float(series, state)
and not all(v.imag == 0 for v in coerced_series.dropna())
and imaginary_in_string(series)
)


@Complex.register_transformer(String, pd.Series)
def string_to_complex(series: pd.Series, state: dict) -> pd.Series:
return pandas_apply(series, complex)
return convert_to_complex_series(series)


@Complex.contains_op.register
Expand Down
20 changes: 15 additions & 5 deletions src/visions/backends/pandas/types/date_time.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from functools import partial

import pandas as pd
from pandas.api import types as pdt

Expand All @@ -12,14 +10,26 @@
from visions.types import DateTime, String


def pandas_infer_datetime(series: pd.Series, state: dict) -> pd.Series:
try:
return pd.to_datetime(series)
except Exception:
pass

return pd.to_datetime(series, format="mixed")


@DateTime.register_relationship(String, pd.Series)
@series_handle_nulls
def string_is_datetime(series: pd.Series, state: dict) -> bool:
exceptions = [OverflowError, TypeError]
def string_to_datetime_func(series: pd.Series) -> pd.Series:
return pandas_infer_datetime(series, state)

exceptions = [OverflowError, TypeError]
coerced_series = test_utils.option_coercion_evaluator(
partial(string_to_datetime, state=state), exceptions
string_to_datetime_func, exceptions
)(series)

if coerced_series is None:
return False
else:
Expand All @@ -28,7 +38,7 @@ def string_is_datetime(series: pd.Series, state: dict) -> bool:

@DateTime.register_transformer(String, pd.Series)
def string_to_datetime(series: pd.Series, state: dict) -> pd.Series:
return pd.to_datetime(series)
return pandas_infer_datetime(series, state)


@DateTime.contains_op.register
Expand Down
6 changes: 2 additions & 4 deletions src/visions/backends/shared/nan_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,9 @@ def is_missing(x):
else:
return False

nb.extending.overload(is_missing)(lambda x: is_missing)

nb.extending.overload(is_missing)(is_missing)


@nb.jit
@nb.jit(nopython=True)
def hasna(x: np.ndarray) -> bool:
for item in x:
if is_missing(item):
Expand Down
Loading

0 comments on commit 5fe9dd0

Please sign in to comment.