Skip to content

Commit

Permalink
Vendorize skimage (#1091)
Browse files Browse the repository at this point in the history
Scikit-image is a difficult dependency to manage (especially since it makes strong assumptions about pooch). Even an import of scikit-image mutates the users ~/.cache folder, which can lead to hard to debug permissions errors and other issues. We work around this via careful dependency pinning, but I still frequently run into this problem when e.g. updating versions. We only use two small functions from scikit-image, so it's easy to vendorize them in this repo.

We can reimplement this using `xarray.coarsen` at another time (see #1048).

Added public API:
- vcm.testing.checksum_dataarray/checksum_dataarray_mapping/regression_data

Significant internal changes:
- rewired the coarening tests to use regression checksums instead.

Requirement changes:
- removed scikit-image
- I would've liked to remove pooch too, but metpy uses it too, albeit only in one module that we probably don't import.
  • Loading branch information
nbren12 authored Mar 18, 2021
1 parent 1a5679b commit 2ef7137
Show file tree
Hide file tree
Showing 22 changed files with 519 additions and 250 deletions.
111 changes: 44 additions & 67 deletions conda-linux-64.lock

Large diffs are not rendered by default.

121 changes: 49 additions & 72 deletions conda-osx-64.lock

Large diffs are not rendered by default.

115 changes: 46 additions & 69 deletions conda-win-64.lock

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion constraints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,6 @@ requests-oauthlib==1.3.0
requests-toolbelt==0.8.0
requests==2.24.0
rsa==4.6
scikit-image==0.16.2
scikit-learn==0.22.1
scipy==1.5.4
send2trash==1.5.0
Expand Down
3 changes: 0 additions & 3 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,14 @@ dependencies:
- pandas=1.0.1
- intake
- intake-xarray
- metpy
- netcdf4>=1.4
- numba
- numpy
- partd
- pooch==1.1.1
- pyyaml==5.3
- pytest
- pytest-mpl
- python-kubernetes
- scikit-image
- scikit-learn
- scipy >= 1.5.0
- toolz
Expand Down
2 changes: 0 additions & 2 deletions external/fv3fit/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ partd
Pillow
Pint
pluggy
pooch
py
pyarrow
pyasn1
Expand All @@ -73,7 +72,6 @@ PyWavelets
requests
requests-oauthlib
rsa
scikit-image
scipy >= 1.5.0
six
traitlets
Expand Down
3 changes: 1 addition & 2 deletions external/vcm/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@
"xarray",
"toolz",
"scipy",
"scikit-image",
"metpy",
"pooch==1.1.1",
"joblib",
"intake",
"gcsfs",
"zarr",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
919ca49721b5b0a8628f273067a8fb4f
xarray.Dataset {
dimensions:
x = 2 ;
y = 2 ;
z = 2 ;

variables:
float32 a(x, y, z) ;
a:units = m ;

// global attributes:
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
919ca49721b5b0a8628f273067a8fb4f
xarray.Dataset {
dimensions:
x = 2 ;
y = 2 ;
z = 2 ;

variables:
float32 a(x, y, z) ;
a:units = m ;

// global attributes:
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
919ca49721b5b0a8628f273067a8fb4f
xarray.Dataset {
dimensions:
x = 2 ;
y = 2 ;
z = 2 ;

variables:
float32 a(x, y, z) ;
a:units = m ;

// global attributes:
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
919ca49721b5b0a8628f273067a8fb4f
xarray.Dataset {
dimensions:
x = 2 ;
y = 2 ;
z = 2 ;

variables:
float32 a(x, y, z) ;
a:units = m ;

// global attributes:
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
a9aa230ec5c5bf71f92e77753b13023d
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[('one', 'a9aa230ec5c5bf71f92e77753b13023d'), ('two', '9a75cafb8e8600ef3aa16a87b7befc0c')]
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Array hash:
a9aa230ec5c5bf71f92e77753b13023d
Coordinate info:
Coordinate x : [1]
xarray.Dataset {
dimensions:
x = 1 ;

variables:
int64 x(x) ;
int64 a(x) ;
a:hello = world ;

// global attributes:
}
25 changes: 9 additions & 16 deletions external/vcm/tests/test_cubedsphere.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import numpy as np
import pytest
import xarray as xr
from skimage.measure import block_reduce as skimage_block_reduce
import xgcm

from vcm.cubedsphere.coarsen import (
Expand Down Expand Up @@ -30,6 +29,7 @@
from vcm.cubedsphere.io import all_filenames
from vcm.cubedsphere import create_fv3_grid
from vcm.xarray_utils import assert_identical_including_dtype
import vcm.testing


def remove_duplicate_coords(ds):
Expand Down Expand Up @@ -260,27 +260,20 @@ def input_dataset(input_dataarray):

@pytest.mark.parametrize("reduction_function", [np.mean, np.median])
@pytest.mark.parametrize("use_dask", [False, True])
def test_xarray_block_reduce_dataarray(reduction_function, use_dask, input_dataarray):
block_size = (2, 2, 1)
expected_data = skimage_block_reduce(
input_dataarray.values, block_size=block_size, func=reduction_function
)
expected = xr.DataArray(
expected_data,
dims=input_dataarray.dims,
coords=None,
name="foo",
attrs={"units": "m"},
)

def test_xarray_block_reduce_dataarray(
reduction_function, use_dask, input_dataarray, regtest
):
if use_dask:
input_dataarray = input_dataarray.chunk({"x": 2, "y": 2, "z": -1})

block_sizes = {"x": 2, "y": 2}
result = _xarray_block_reduce_dataarray(
input_dataarray, block_sizes, reduction_function
)
assert_identical_including_dtype(result, expected)
print(vcm.testing.checksum_dataarray(result), file=regtest)
for coord in result.coords:
print(np.asarray(result[coord]), file=regtest)
# This ensures the metadata is correct
result.to_dataset(name="a").info(regtest)


def test_xarray_block_reduce_dataarray_bad_chunk_size(input_dataarray):
Expand Down
53 changes: 53 additions & 0 deletions external/vcm/tests/test_testing.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,63 @@
from external.vcm.vcm.testing import regression_data
import warnings
import pytest

from vcm import testing
import xarray


def test_no_warning():
with pytest.raises(AssertionError):
with testing.no_warning(None):
warnings.warn("Warning")


def test_checksum_dataarray(regtest):
"""If these checksums fail then some changed probably happened in
joblib.hash
"""
array = xarray.DataArray([1], dims=["x"])
print(testing.checksum_dataarray(array), file=regtest)


def test_checksum_dataarray_mapping(regtest):
"""If these checksums fail then some changed probably happened in
joblib.hash
"""
ds = xarray.Dataset({"one": ("x", [1]), "two": ("x", [2])})
print(testing.checksum_dataarray_mapping(ds), file=regtest)


def test_regression_data_unchanged(regtest):
"""This checks that the integration with regtests works and that the
checksum report hasn't change
"""
a = xarray.DataArray([1], dims=["x"], coords={"x": [1]}, attrs={"hello": "world"})
print(regression_data(a), file=regtest)


def test_regression_data_different_attrs():
a = xarray.DataArray([1], dims=["x"], attrs={"some": "attr"})
b = xarray.DataArray([1], dims=["x"], attrs={"another": "attr"})

assert testing.regression_data(a) != testing.regression_data(b)
assert testing.regression_data(a, attrs=False) == testing.regression_data(
b, attrs=False
)


def test_regression_data_different_coords():
a = xarray.DataArray([1], dims=["x"], coords={"x": [1]})
b = xarray.DataArray([1], dims=["x"], coords={"x": [2]})

assert testing.regression_data(a) != testing.regression_data(b)
assert testing.regression_data(a, coords=False) == testing.regression_data(
b, coords=False
)


def test_regression_data_different_array():
a = xarray.DataArray([2], dims=["x"])
b = xarray.DataArray([1], dims=["x"])

assert testing.regression_data(a) != testing.regression_data(b)
Loading

0 comments on commit 2ef7137

Please sign in to comment.