From 5dd0eca09a6bb3f85547e4477a220395df62542a Mon Sep 17 00:00:00 2001 From: Mate Kadlicsko Date: Fri, 1 Nov 2024 19:59:41 +0100 Subject: [PATCH 1/2] fix: fixed an error in the implementation --- sklego/preprocessing/monotonicspline.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/sklego/preprocessing/monotonicspline.py b/sklego/preprocessing/monotonicspline.py index 86722cd7..39638d11 100644 --- a/sklego/preprocessing/monotonicspline.py +++ b/sklego/preprocessing/monotonicspline.py @@ -61,12 +61,6 @@ def fit(self, X, y=None): ) for col in range(X.shape[1]) } - self.sorted_X = {col: np.sort(X[:, col]) for col in range(X.shape[1])} - self.sorted_X_output_ = { - col: self.spline_transformer_[col].transform(np.sort(X[:, col]).reshape(-1, 1)).cumsum(axis=0) - for col in range(X.shape[1]) - } - self.sorted_idx_ = np.arange(X.shape[0]).astype(int) return self def transform(self, X): @@ -95,6 +89,10 @@ def transform(self, X): ) out = [] for col in range(X.shape[1]): - mapping = np.interp(X[:, col], self.sorted_X[col], self.sorted_idx_).astype(int) - out.append(self.sorted_X_output_[col][mapping]) + out.append( + np.cumsum( + self.spline_transformer_[col].transform(X[:, [col]])[:, ::-1], + axis=1, + ) + ) return np.concatenate(out, axis=1) From f601bc6306ee8090f43641c7b4145b2776f11279 Mon Sep 17 00:00:00 2001 From: Mate Kadlicsko Date: Fri, 1 Nov 2024 20:01:01 +0100 Subject: [PATCH 2/2] feat: tests now directly test monotonicity and boundedness --- tests/test_preprocessing/test_monospline.py | 29 ++++++++++++++++----- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/tests/test_preprocessing/test_monospline.py b/tests/test_preprocessing/test_monospline.py index 1d7bcabf..85533f84 100644 --- a/tests/test_preprocessing/test_monospline.py +++ b/tests/test_preprocessing/test_monospline.py @@ -10,7 +10,9 @@ @pytest.mark.parametrize("knots", ["uniform", "quantile"]) def test_monotonic_spline_transformer(n_knots, degree, knots): X = np.random.uniform(size=(100, 10)) - transformer = MonotonicSplineTransformer(n_knots=n_knots, degree=degree, knots=knots) + transformer = MonotonicSplineTransformer( + n_knots=n_knots, degree=degree, knots=knots + ) transformer_sk = SplineTransformer(n_knots=n_knots, degree=degree, knots=knots) transformer.fit(X) transformer_sk.fit(X) @@ -21,8 +23,23 @@ def test_monotonic_spline_transformer(n_knots, degree, knots): # Both should have the same shape assert out.shape == out_sk.shape - # Check that the monotonic variant always has a higher value than the non-monotonic variant - for col in range(out.shape[1]): - col_values = out[:, col] - col_values_sk = out_sk[:, col] - assert np.all(col_values >= col_values_sk), f"Column {col} is not monotonically increasing" + n_splines_per_feature = n_knots + degree - 1 + assert out.shape[1] == X.shape[1] * n_splines_per_feature + + # I splines should be bounded by 0 and 1 + assert np.logical_or(out >= 0, np.isclose(out, 0)).all() + assert np.logical_or(out <= 1, np.isclose(out, 1)).all() + + # The features should be monotonically increasing + for i in range(X.shape[1]): + feature = X[:, i] + sorted_out = out[ + np.argsort(feature), + i * n_splines_per_feature : (i + 1) * n_splines_per_feature + ] + differences = np.diff(sorted_out, axis=0) + + # All differences should be greater or equal to zero upto floating point errors + assert np.logical_or( + np.greater_equal(differences, 0), np.isclose(differences, 0) + ).all()