-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Extend padding functionalities #9353
Changes from 12 commits
7998afb
44e7d26
fd3e304
9f66ee2
2f33b40
b70b25d
d939030
48a972f
098706b
afda62d
8bdb6f1
992e20e
d3f0275
0a51b7c
3d40800
a848351
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6704,18 +6704,80 @@ def test_polyfit_warnings(self) -> None: | |
ds.var1.polyfit("dim2", 10, full=True) | ||
assert len(ws) == 1 | ||
|
||
def test_pad(self) -> None: | ||
ds = create_test_data(seed=1) | ||
padded = ds.pad(dim2=(1, 1), constant_values=42) | ||
|
||
assert padded["dim2"].shape == (11,) | ||
assert padded["var1"].shape == (8, 11) | ||
assert padded["var2"].shape == (8, 11) | ||
assert padded["var3"].shape == (10, 8) | ||
assert dict(padded.sizes) == {"dim1": 8, "dim2": 11, "dim3": 10, "time": 20} | ||
@staticmethod | ||
def _test_data_var_interior( | ||
original_data_var, padded_data_var, padded_dim_name, expected_pad_values | ||
): | ||
np.testing.assert_equal( | ||
np.unique(padded_data_var.isel({padded_dim_name: [0, -1]})), | ||
expected_pad_values, | ||
) | ||
np.testing.assert_array_equal( | ||
padded_data_var.isel({padded_dim_name: slice(1, -1)}), original_data_var | ||
) | ||
|
||
np.testing.assert_equal(padded["var1"].isel(dim2=[0, -1]).data, 42) | ||
np.testing.assert_equal(padded["dim2"][[0, -1]].data, np.nan) | ||
@pytest.mark.parametrize("padded_dim_name", ["dim1", "dim2", "dim3", "time"]) | ||
@pytest.mark.parametrize( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this work if you want to pad along a dimension coordinate (aka. a variable that is called the same as it's dimension)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good question, I'll investigate :D There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, as I understand it most dims in the test dataset are dimension coordinates and they pad correctly, so I think so. In any case I've extended the tests to pad all dimensions just to be sure nothing is behaving incorrectly. Let me know if I missed any case. |
||
["constant_values"], | ||
[ | ||
pytest.param(None, id="default"), | ||
pytest.param(42, id="scalar"), | ||
pytest.param((42, 43), id="tuple"), | ||
pytest.param({"dim1": 42, "dim2": 43}, id="per dim scalar"), | ||
pytest.param({"dim1": (42, 43), "dim2": (43, 44)}, id="per dim tuple"), | ||
pytest.param({"var1": 42, "var2": (42, 43)}, id="per var"), | ||
pytest.param({"var1": 42, "dim1": (42, 43)}, id="mixed"), | ||
], | ||
) | ||
def test_pad(self, padded_dim_name, constant_values) -> None: | ||
ds = create_test_data(seed=1) | ||
padded = ds.pad({padded_dim_name: (1, 1)}, constant_values=constant_values) | ||
|
||
# test padded dim values and size | ||
for ds_dim_name, ds_dim in ds.sizes.items(): | ||
if ds_dim_name == padded_dim_name: | ||
np.testing.assert_equal(padded.sizes[ds_dim_name], ds_dim + 2) | ||
if ds_dim_name in padded.coords: | ||
assert padded[ds_dim_name][[0, -1]].isnull().all() | ||
else: | ||
np.testing.assert_equal(padded.sizes[ds_dim_name], ds_dim) | ||
|
||
# check if coord "numbers" with dimention dim3 is paded correctly | ||
if padded_dim_name == "dim3": | ||
assert padded["numbers"][[0, -1]].isnull().all() | ||
# twarning: passes but dtype changes from int to float | ||
np.testing.assert_array_equal(padded["numbers"][1:-1], ds["numbers"]) | ||
|
||
# test if data_vars are paded with correct values | ||
for data_var_name, data_var in padded.data_vars.items(): | ||
if padded_dim_name in data_var.dims: | ||
if utils.is_dict_like(constant_values): | ||
if ( | ||
expected := constant_values.get(data_var_name, None) | ||
) is not None: | ||
self._test_data_var_interior( | ||
ds[data_var_name], data_var, padded_dim_name, expected | ||
) | ||
elif ( | ||
expected := constant_values.get(padded_dim_name, None) | ||
) is not None: | ||
self._test_data_var_interior( | ||
ds[data_var_name], data_var, padded_dim_name, expected | ||
) | ||
else: | ||
self._test_data_var_interior( | ||
ds[data_var_name], data_var, padded_dim_name, 0 | ||
) | ||
elif constant_values: | ||
self._test_data_var_interior( | ||
ds[data_var_name], data_var, padded_dim_name, constant_values | ||
) | ||
else: | ||
self._test_data_var_interior( | ||
ds[data_var_name], data_var, padded_dim_name, np.nan | ||
) | ||
else: | ||
assert_array_equal(data_var, ds[data_var_name]) | ||
|
||
@pytest.mark.parametrize( | ||
["keep_attrs", "attrs", "expected"], | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If I'm not mistaken
Probably a test should cover this as well?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we change that 0 to
None
then there is a discrepancy between what happens if we don't set alldim
inconstant_variables
(this defaults to padding with 0 and is coded in thepad
function fromvariable.py
) and if we don't set allvar
(code I added inpad
function fromdataset.py
which will then pad dimensions withnp.nan
).I can either change every where which I think might be a breaking change (?) or just leave everything as is.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this would be worth changing but doesn't need to be in this PR (sorry if I slowed things down by introducing this). Assuming it isn't trivial, adding a TODO with a link to this would be very sufficient
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No worries, added the TODO 🚀
Lemme know if there's anything else left to do and thanks for all the comments!