From 8b495e8fafd4eda9d2181d3d5a07e5da277c932f Mon Sep 17 00:00:00 2001 From: niklas melton Date: Wed, 8 Jan 2025 21:10:03 -0600 Subject: [PATCH 1/9] improve error message and add helper functions --- artlib/common/BaseART.py | 58 ++++++++++++++++++++++++++++++++-- unit_tests/test_FuzzyART.py | 63 +++++++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+), 2 deletions(-) diff --git a/artlib/common/BaseART.py b/artlib/common/BaseART.py index 6e6abce..409816b 100644 --- a/artlib/common/BaseART.py +++ b/artlib/common/BaseART.py @@ -106,6 +106,45 @@ def set_params(self, **params): self.validate_params(local_params) return self + def set_data_bounds(self, lower_bounds: np.ndarray, upper_bounds: np.ndarray): + """Manually set the data bounds for normalization. + + Parameters + ---------- + lower_bounds : np.ndarray + The lower bounds for each column. + + upper_bounds : np.ndarray + The upper bounds for each column. + + """ + if self.is_fitted_: + raise ValueError("Cannot change data limits after fit.") + self.d_min_ = lower_bounds + self.d_max_ = upper_bounds + + def find_data_bounds( + self, *data_batches: list[np.ndarray] + ) -> Tuple[np.ndarray, np.ndarray]: + """Manually set the data bounds for normalization. + + Parameters + ---------- + *data_batches : list[np.ndarray] + Batches of data to be presented to the model + + Returns + ------- + tuple[np.ndarray, np.ndarray] + Lower and upper bounds for data. + + """ + all_data = np.vstack(data_batches) + lower_bounds = np.min(all_data) + upper_bounds = np.max(all_data) + + return lower_bounds, upper_bounds + def prepare_data(self, X: np.ndarray) -> np.ndarray: """Prepare data for clustering. @@ -187,8 +226,23 @@ def validate_data(self, X: np.ndarray): - X: data set """ - assert np.all(X >= 0), "Data has not been normalized" - assert np.all(X <= 1.0), "Data has not been normalized" + normalization_message = ( + "Data has not been normalized or was not normalized " + "correctly. All values must fall between 0 and 1, " + "inclusively." + ) + if self.is_fitted_: + normalization_message += ( + "\nThis appears to not be the first batch of " + "data. Data boundaries must be calculated for " + "the entire data space. Prior to fitting, use " + "BaseART.set_data_bounds() to manually set the " + "bounds for your data or use " + "BaseART.find_data_bounds() to identify the " + "bounds automatically for multiple batches." + ) + assert np.all(X >= 0), normalization_message + assert np.all(X <= 1.0), normalization_message self.check_dimensions(X) def category_choice( diff --git a/unit_tests/test_FuzzyART.py b/unit_tests/test_FuzzyART.py index e07c848..e4cd338 100644 --- a/unit_tests/test_FuzzyART.py +++ b/unit_tests/test_FuzzyART.py @@ -148,3 +148,66 @@ def test_clustering(art_model): labels = art_model.fit_predict(data) assert np.all(np.equal(labels, np.array([0, 0, 1, 2, 3]))) + + +def test_validate_data(art_model): + # Test validate_data with normalized data + X = np.array([[-0.1, 0.2], [1.1, 0.4]]) + art_model.is_fitted_ = False + with pytest.raises(AssertionError): + art_model.validate_data(X) + +def test_validate_data_again(art_model): + # Test validate_data with normalized data + X = np.array([[0.1, 0.2], [0.3, 0.4]]) + art_model.is_fitted_ = False + art_model.validate_data(X) # Should pass without assertion error + + # Test validate_data with data out of bounds + X_invalid = np.array([[-0.1, 0.2], [1.1, 0.4]]) + with pytest.raises(AssertionError): + art_model.validate_data(X_invalid) + + +def test_set_data_bounds(art_model): + # Test set_data_bounds with valid bounds + lower_bounds = np.array([0.0, 0.0]) + upper_bounds = np.array([1.0, 1.0]) + art_model.is_fitted_ = False + art_model.set_data_bounds(lower_bounds, upper_bounds) + assert np.all(art_model.d_min_ == lower_bounds) + assert np.all(art_model.d_max_ == upper_bounds) + + # Test set_data_bounds after the model is fitted + art_model.is_fitted_ = True + with pytest.raises(ValueError, match="Cannot change data limits after fit."): + art_model.set_data_bounds(lower_bounds, upper_bounds) + X = np.array([[0.1, 0.2], [0.3, 0.4]]) + X_norm = art_model.prepare_data(X) + assert np.all(X_norm == X) + + +def test_find_data_bounds(art_model): + # Test find_data_bounds with multiple data batches + batch_1 = np.array([[0.1, 0.2], [0.3, 0.4]]) + batch_2 = np.array([[0.0, 0.1], [0.5, 0.6]]) + lower_bounds, upper_bounds = art_model.find_data_bounds(batch_1, batch_2) + np.testing.assert_array_equal(lower_bounds, np.array([0.0, 0.1])) + np.testing.assert_array_equal(upper_bounds, np.array([0.5, 0.6])) + + +def test_prepare_data(art_model): + # Test prepare_data with valid data + X = np.array([[0.0, 0.5], [0.2, 1.0]]) + art_model.d_min_ = np.array([0.0, 0.0]) + art_model.d_max_ = np.array([1.0, 1.0]) + normalized_X = art_model.prepare_data(X) + np.testing.assert_array_almost_equal(normalized_X, X) # Already normalized + + # Test prepare_data with data requiring normalization + X = np.array([[1.0, 10.0], [5.0, 20.0]]) + art_model.d_min_ = np.array([1.0, 10.0]) + art_model.d_max_ = np.array([5.0, 20.0]) + normalized_X = art_model.prepare_data(X) + expected_normalized_X = np.array([[0.0, 0.0, 1.0, 1.0], [1.0, 1.0, 0.0, 0.0]]) + np.testing.assert_array_almost_equal(normalized_X, expected_normalized_X) \ No newline at end of file From 69a0d0e1f77e853fbcb8925399998312e7d542e9 Mon Sep 17 00:00:00 2001 From: niklas melton Date: Wed, 8 Jan 2025 21:22:06 -0600 Subject: [PATCH 2/9] default is_fitted --- artlib/common/BaseART.py | 1 + 1 file changed, 1 insertion(+) diff --git a/artlib/common/BaseART.py b/artlib/common/BaseART.py index 409816b..3ed600f 100644 --- a/artlib/common/BaseART.py +++ b/artlib/common/BaseART.py @@ -28,6 +28,7 @@ def __init__(self, params: Dict): self.weight_sample_counter_: List[int] = [] self.d_min_ = None self.d_max_ = None + self.is_fitted_ = False def __getattr__(self, key): if key in self.params: From ca9cdda4de4c9f5b172cc557f1bda75c1b3406b3 Mon Sep 17 00:00:00 2001 From: niklas melton Date: Wed, 8 Jan 2025 21:27:36 -0600 Subject: [PATCH 3/9] fix some broken unit tests --- artlib/common/BaseART.py | 4 ++-- unit_tests/test_FuzzyART.py | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/artlib/common/BaseART.py b/artlib/common/BaseART.py index 3ed600f..0c75961 100644 --- a/artlib/common/BaseART.py +++ b/artlib/common/BaseART.py @@ -141,8 +141,8 @@ def find_data_bounds( """ all_data = np.vstack(data_batches) - lower_bounds = np.min(all_data) - upper_bounds = np.max(all_data) + lower_bounds = np.min(all_data, axis=0) + upper_bounds = np.max(all_data, axis=0) return lower_bounds, upper_bounds diff --git a/unit_tests/test_FuzzyART.py b/unit_tests/test_FuzzyART.py index e4cd338..0b9ad26 100644 --- a/unit_tests/test_FuzzyART.py +++ b/unit_tests/test_FuzzyART.py @@ -2,6 +2,7 @@ import numpy as np from unittest.mock import MagicMock from artlib.elementary.FuzzyART import FuzzyART +from artlib.common.utils import compliment_code # Assuming BaseART is imported and available in the current namespace @@ -161,7 +162,8 @@ def test_validate_data_again(art_model): # Test validate_data with normalized data X = np.array([[0.1, 0.2], [0.3, 0.4]]) art_model.is_fitted_ = False - art_model.validate_data(X) # Should pass without assertion error + X_cc = compliment_code(X) + art_model.validate_data(X_cc) # Should pass without assertion error # Test validate_data with data out of bounds X_invalid = np.array([[-0.1, 0.2], [1.1, 0.4]]) @@ -183,8 +185,9 @@ def test_set_data_bounds(art_model): with pytest.raises(ValueError, match="Cannot change data limits after fit."): art_model.set_data_bounds(lower_bounds, upper_bounds) X = np.array([[0.1, 0.2], [0.3, 0.4]]) + X_cc = compliment_code(X) X_norm = art_model.prepare_data(X) - assert np.all(X_norm == X) + assert np.all(X_norm == X_cc) def test_find_data_bounds(art_model): From 2b83265f156ee23aa64aaab5c41e1ac5a422e659 Mon Sep 17 00:00:00 2001 From: niklas melton Date: Wed, 8 Jan 2025 21:30:30 -0600 Subject: [PATCH 4/9] fix some broken unit tests --- unit_tests/test_FuzzyART.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/unit_tests/test_FuzzyART.py b/unit_tests/test_FuzzyART.py index 0b9ad26..01ab2b6 100644 --- a/unit_tests/test_FuzzyART.py +++ b/unit_tests/test_FuzzyART.py @@ -202,10 +202,11 @@ def test_find_data_bounds(art_model): def test_prepare_data(art_model): # Test prepare_data with valid data X = np.array([[0.0, 0.5], [0.2, 1.0]]) + X_cc = compliment_code(X) art_model.d_min_ = np.array([0.0, 0.0]) art_model.d_max_ = np.array([1.0, 1.0]) normalized_X = art_model.prepare_data(X) - np.testing.assert_array_almost_equal(normalized_X, X) # Already normalized + np.testing.assert_array_almost_equal(normalized_X, X_cc) # Already normalized # Test prepare_data with data requiring normalization X = np.array([[1.0, 10.0], [5.0, 20.0]]) @@ -213,4 +214,5 @@ def test_prepare_data(art_model): art_model.d_max_ = np.array([5.0, 20.0]) normalized_X = art_model.prepare_data(X) expected_normalized_X = np.array([[0.0, 0.0, 1.0, 1.0], [1.0, 1.0, 0.0, 0.0]]) - np.testing.assert_array_almost_equal(normalized_X, expected_normalized_X) \ No newline at end of file + expected_normalized_X_cc = compliment_code(expected_normalized_X) + np.testing.assert_array_almost_equal(normalized_X, expected_normalized_X_cc) \ No newline at end of file From 0715e33b9c53f839f8627421ddd08feda01e306e Mon Sep 17 00:00:00 2001 From: niklas melton Date: Wed, 8 Jan 2025 21:32:17 -0600 Subject: [PATCH 5/9] fix some broken unit tests --- unit_tests/test_FuzzyART.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/unit_tests/test_FuzzyART.py b/unit_tests/test_FuzzyART.py index 01ab2b6..57d6f84 100644 --- a/unit_tests/test_FuzzyART.py +++ b/unit_tests/test_FuzzyART.py @@ -213,6 +213,5 @@ def test_prepare_data(art_model): art_model.d_min_ = np.array([1.0, 10.0]) art_model.d_max_ = np.array([5.0, 20.0]) normalized_X = art_model.prepare_data(X) - expected_normalized_X = np.array([[0.0, 0.0, 1.0, 1.0], [1.0, 1.0, 0.0, 0.0]]) - expected_normalized_X_cc = compliment_code(expected_normalized_X) + expected_normalized_X_cc = np.array([[0.0, 0.0, 1.0, 1.0], [1.0, 1.0, 0.0, 0.0]]) np.testing.assert_array_almost_equal(normalized_X, expected_normalized_X_cc) \ No newline at end of file From 58c368ab98955983990dabdda50d2dcf2fd6c3b6 Mon Sep 17 00:00:00 2001 From: niklas melton Date: Wed, 8 Jan 2025 22:12:23 -0600 Subject: [PATCH 6/9] update docs and add clip method --- README.md | 30 ++++++++++++++++++++++++++++++ artlib/common/BaseART.py | 6 +++++- artlib/common/BaseARTMAP.py | 10 ++++++++-- artlib/fusion/FusionART.py | 8 +++++++- artlib/hierarchical/DeepARTMAP.py | 8 ++++++-- artlib/supervised/ARTMAP.py | 14 ++++++++++---- artlib/supervised/SimpleARTMAP.py | 18 ++++++++++++++++-- 7 files changed, 82 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index aecb18a..f01bc2b 100644 --- a/README.md +++ b/README.md @@ -167,6 +167,36 @@ model.fit(train_X_prep, train_y) pred_y = model.predict_regression(test_Xy_prep, target_channels=[1]) ``` +### Data Normalization + +AdaptiveResonanceLib models require feature data to be normalized between 0.0 +and 1.0 inclusively. This requires identifying the boundaries of the data space. + +If the first batch of your training data is representative of the entire data space, +you dont need to do anything and artlib will identify the data bounds automatically. +However, this will often not be sufficient and the following work-arounds will be +needed: + +Users can manually set the bounds using the following code snippet or similar: +```python +# Set the boundaries of your data for normalization +lower_bounds = np.array([0.]*n_features) +upper_bounds = np.array([1.]*n_features) +model.set_data_bounds(lower_bounds, upper_bounds) +``` + +Or users can present all batches of data to the model for automatic +boundary identification: +```python +# Find the boundaries of your data for normalization +all_data = [train_X, test_X] +_, _ = model.find_data_bounds(all_data) +``` + +If only the boundaries of your testing data are unknown, you can call +`model.predict()` with `clip=True` to clip testing data to the bounds seen during +training. Only use this if you understand what you are doing. + diff --git a/artlib/common/BaseART.py b/artlib/common/BaseART.py index 0c75961..fd64c74 100644 --- a/artlib/common/BaseART.py +++ b/artlib/common/BaseART.py @@ -797,13 +797,15 @@ def fit_gif( self.post_fit(X) return self - def predict(self, X: np.ndarray) -> np.ndarray: + def predict(self, X: np.ndarray, clip: bool = False) -> np.ndarray: """Predict labels for the data. Parameters ---------- X : np.ndarray The dataset. + clip : bool + clip the input values to be between the previously seen data limits Returns ------- @@ -812,6 +814,8 @@ def predict(self, X: np.ndarray) -> np.ndarray: """ check_is_fitted(self) + if clip: + X = np.clip(X, self.d_min_, self.d_max_) self.validate_data(X) self.check_dimensions(X) diff --git a/artlib/common/BaseARTMAP.py b/artlib/common/BaseARTMAP.py index fa5da4c..8de9c10 100644 --- a/artlib/common/BaseARTMAP.py +++ b/artlib/common/BaseARTMAP.py @@ -138,13 +138,15 @@ def partial_fit( """ raise NotImplementedError - def predict(self, X: np.ndarray) -> np.ndarray: + def predict(self, X: np.ndarray, clip: bool = False) -> np.ndarray: """Predict labels for the data. Parameters ---------- X : np.ndarray Dataset A. + clip : bool + clip the input values to be between the previously seen data limits Returns ------- @@ -154,13 +156,17 @@ def predict(self, X: np.ndarray) -> np.ndarray: """ raise NotImplementedError - def predict_ab(self, X: np.ndarray) -> tuple[np.ndarray, np.ndarray]: + def predict_ab( + self, X: np.ndarray, clip: bool = False + ) -> tuple[np.ndarray, np.ndarray]: """Predict labels for the data, both A-side and B-side. Parameters ---------- X : np.ndarray Dataset A. + clip : bool + clip the input values to be between the previously seen data limits Returns ------- diff --git a/artlib/fusion/FusionART.py b/artlib/fusion/FusionART.py index 781e66a..3b81a15 100644 --- a/artlib/fusion/FusionART.py +++ b/artlib/fusion/FusionART.py @@ -528,13 +528,17 @@ def step_pred(self, x, skip_channels: List[int] = []) -> int: c_ = int(np.argmax(T)) return c_ - def predict(self, X: np.ndarray, skip_channels: List[int] = []) -> np.ndarray: + def predict( + self, X: np.ndarray, clip: bool = False, skip_channels: List[int] = [] + ) -> np.ndarray: """Predict labels for the input data. Parameters ---------- X : np.ndarray Input dataset. + clip : bool + clip the input values to be between the previously seen data limits skip_channels : list of int, optional Channels to skip (default is []). @@ -545,6 +549,8 @@ def predict(self, X: np.ndarray, skip_channels: List[int] = []) -> np.ndarray: """ check_is_fitted(self) + if clip: + X = np.clip(X, self.d_min_, self.d_max_) self.validate_data(X) self.check_dimensions(X) diff --git a/artlib/hierarchical/DeepARTMAP.py b/artlib/hierarchical/DeepARTMAP.py index b11475a..6545a64 100644 --- a/artlib/hierarchical/DeepARTMAP.py +++ b/artlib/hierarchical/DeepARTMAP.py @@ -410,13 +410,17 @@ def partial_fit( x_i += 1 return self - def predict(self, X: Union[np.ndarray, list[np.ndarray]]) -> list[np.ndarray]: + def predict( + self, X: Union[np.ndarray, list[np.ndarray]], clip: bool = False + ) -> list[np.ndarray]: """Predict the labels for the input data. Parameters ---------- X : np.ndarray or list of np.ndarray The input data set for prediction. + clip : bool + clip the input values to be between the previously seen data limits Returns ------- @@ -428,7 +432,7 @@ def predict(self, X: Union[np.ndarray, list[np.ndarray]]) -> list[np.ndarray]: x = X[-1] else: x = X - pred_a, pred_b = self.layers[-1].predict_ab(x) + pred_a, pred_b = self.layers[-1].predict_ab(x, clip=clip) pred = [pred_a, pred_b] for layer in self.layers[:-1][::-1]: pred.append(layer.map_a2b(pred[-1])) diff --git a/artlib/supervised/ARTMAP.py b/artlib/supervised/ARTMAP.py index a24a389..597b2f5 100644 --- a/artlib/supervised/ARTMAP.py +++ b/artlib/supervised/ARTMAP.py @@ -257,13 +257,15 @@ def partial_fit( ) return self - def predict(self, X: np.ndarray) -> np.ndarray: + def predict(self, X: np.ndarray, clip: bool = False) -> np.ndarray: """Predict the labels for the given data. Parameters ---------- X : np.ndarray Data set A (independent channel). + clip : bool + clip the input values to be between the previously seen data limits Returns ------- @@ -272,15 +274,19 @@ def predict(self, X: np.ndarray) -> np.ndarray: """ check_is_fitted(self) - return super(ARTMAP, self).predict(X) + return super(ARTMAP, self).predict(X, clip) - def predict_ab(self, X: np.ndarray) -> tuple[np.ndarray, np.ndarray]: + def predict_ab( + self, X: np.ndarray, clip: bool = False + ) -> tuple[np.ndarray, np.ndarray]: """Predict both A-side and B-side labels for the given data. Parameters ---------- X : np.ndarray Data set A (independent channel). + clip : bool + clip the input values to be between the previously seen data limits Returns ------- @@ -289,7 +295,7 @@ def predict_ab(self, X: np.ndarray) -> tuple[np.ndarray, np.ndarray]: """ check_is_fitted(self) - return super(ARTMAP, self).predict_ab(X) + return super(ARTMAP, self).predict_ab(X, clip) def predict_regression(self, X: np.ndarray) -> np.ndarray: """ diff --git a/artlib/supervised/SimpleARTMAP.py b/artlib/supervised/SimpleARTMAP.py index 34c59c9..cc6c5c1 100644 --- a/artlib/supervised/SimpleARTMAP.py +++ b/artlib/supervised/SimpleARTMAP.py @@ -405,13 +405,15 @@ def step_pred(self, x: np.ndarray) -> tuple[int, int]: c_b = self.map[c_a] return c_a, c_b - def predict(self, X: np.ndarray) -> np.ndarray: + def predict(self, X: np.ndarray, clip: bool = False) -> np.ndarray: """Predict labels for the data. Parameters ---------- X : np.ndarray Data set A. + clip : bool + clip the input values to be between the previously seen data limits Returns ------- @@ -420,19 +422,27 @@ def predict(self, X: np.ndarray) -> np.ndarray: """ check_is_fitted(self) + if clip: + X = np.clip(X, self.module_a.d_min_, self.module_a.d_max_) + self.module_a.validate_data(X) + self.module_a.check_dimensions(X) y_b = np.zeros((X.shape[0],), dtype=int) for i, x in enumerate(X): c_a, c_b = self.step_pred(x) y_b[i] = c_b return y_b - def predict_ab(self, X: np.ndarray) -> tuple[np.ndarray, np.ndarray]: + def predict_ab( + self, X: np.ndarray, clip: bool = False + ) -> tuple[np.ndarray, np.ndarray]: """Predict labels for the data, both A-side and B-side. Parameters ---------- X : np.ndarray Data set A. + clip : bool + clip the input values to be between the previously seen data limits Returns ------- @@ -441,6 +451,10 @@ def predict_ab(self, X: np.ndarray) -> tuple[np.ndarray, np.ndarray]: """ check_is_fitted(self) + if clip: + X = np.clip(X, self.module_a.d_min_, self.module_a.d_max_) + self.module_a.validate_data(X) + self.module_a.check_dimensions(X) y_a = np.zeros((X.shape[0],), dtype=int) y_b = np.zeros((X.shape[0],), dtype=int) for i, x in enumerate(X): From c6c0cfa6e51e445ebef246c3b7a6c2f6bdeba8ee Mon Sep 17 00:00:00 2001 From: niklas melton Date: Wed, 8 Jan 2025 22:18:22 -0600 Subject: [PATCH 7/9] update docs and add clip method --- artlib/common/BaseART.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/artlib/common/BaseART.py b/artlib/common/BaseART.py index fd64c74..ff732e7 100644 --- a/artlib/common/BaseART.py +++ b/artlib/common/BaseART.py @@ -127,7 +127,8 @@ def set_data_bounds(self, lower_bounds: np.ndarray, upper_bounds: np.ndarray): def find_data_bounds( self, *data_batches: list[np.ndarray] ) -> Tuple[np.ndarray, np.ndarray]: - """Manually set the data bounds for normalization. + """Automatically find the data bounds for normalization from a list of data + batches. Parameters ---------- From 89391c9c67776261afd48e41596c8f20c898cc06 Mon Sep 17 00:00:00 2001 From: niklas melton Date: Thu, 9 Jan 2025 21:39:51 -0600 Subject: [PATCH 8/9] update readme examples with mnist and normalization info --- README.md | 61 +++++++++++++++++++++++-------------- artlib/fusion/FusionART.py | 6 ++-- artlib/supervised/ARTMAP.py | 6 ++-- 3 files changed, 46 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index f01bc2b..15feef5 100644 --- a/README.md +++ b/README.md @@ -76,17 +76,25 @@ Here are some quick examples to get you started with AdaptiveResonanceLib: ```python from artlib import FuzzyART import numpy as np +from tensorflow.keras.datasets import mnist -# Your dataset -train_X = np.array([...]) # shape (n_samples, n_features) -test_X = np.array([...]) +# Load the MNIST dataset +n_dim = 28*28 +(X_train, _), (X_test, _) = mnist.load_data() +X_train = X_train.reshape((-1, n_dim)) # flatten images +X_test = X_test.reshape((-1, n_dim)) # Initialize the Fuzzy ART model model = FuzzyART(rho=0.7, alpha = 0.0, beta=1.0) +# (Optional) Tell the model the data limits for normalization +lower_bounds = np.array([0.]*n_dim) +upper_bounds = np.array([1.]*n_dim) +model.set_data_bounds(lower_bounds, upper_bounds) + # Prepare Data -train_X_prep = model.prepare_data(train_X) -test_X_prep = model.prepare_data(test_X) +train_X_prep = model.prepare_data(X_train) +test_X_prep = model.prepare_data(X_test) # Fit the model model.fit(train_X_prep) @@ -100,25 +108,32 @@ predictions = model.predict(test_X_prep) ```python from artlib import GaussianART, SimpleARTMAP import numpy as np +from tensorflow.keras.datasets import mnist -# Your dataset -train_X = np.array([...]) # shape (n_samples, n_features) -train_y = np.array([...]) # shape (n_samples, ), must be integers -test_X = np.array([...]) +# Load the MNIST dataset +n_dim = 28*28 +(X_train, y_train), (X_test, y_test) = mnist.load_data() +X_train = X_train.reshape((-1, n_dim)) # flatten images +X_test = X_test.reshape((-1, n_dim)) # Initialize the Gaussian ART model -sigma_init = np.array([0.5]*train_X.shape[1]) # variance estimate for each feature +sigma_init = np.array([0.5]*X_train.shape[1]) # variance estimate for each feature module_a = GaussianART(rho=0.0, sigma_init=sigma_init) +# (Optional) Tell the model the data limits for normalization +lower_bounds = np.array([0.]*n_dim) +upper_bounds = np.array([1.]*n_dim) +module_a.set_data_bounds(lower_bounds, upper_bounds) + # Initialize the SimpleARTMAP model model = SimpleARTMAP(module_a=module_a) # Prepare Data -train_X_prep = model.prepare_data(train_X) -test_X_prep = model.prepare_data(test_X) +train_X_prep = model.prepare_data(X_train) +test_X_prep = model.prepare_data(X_test) # Fit the model -model.fit(train_X_prep, train_y) +model.fit(train_X_prep, y_train) # Predict data labels predictions = model.predict(test_X_prep) @@ -131,22 +146,22 @@ from artlib import FuzzyART, HypersphereART, FusionART import numpy as np # Your dataset -train_X = np.array([...]) # shape (n_samples, n_features_X) -train_y = np.array([...]) # shape (n_samples, n_features_y) +X_train = np.array([...]) # shape (n_samples, n_features_X) +y_train = np.array([...]) # shape (n_samples, n_features_y) test_X = np.array([...]) # Initialize the Fuzzy ART model module_x = FuzzyART(rho=0.0, alpha = 0.0, beta=1.0) # Initialize the Hypersphere ART model -r_hat = 0.5*np.sqrt(train_X.shape[1]) # no restriction on hyperpshere size +r_hat = 0.5*np.sqrt(X_train.shape[1]) # no restriction on hyperpshere size module_y = HypersphereART(rho=0.0, alpha = 0.0, beta=1.0, r_hat=r_hat) # Initialize the FusionARTMAP model gamma_values = [0.5, 0.5] # eqaul weight to both channels channel_dims = [ - 2*train_X.shape[1], # fuzzy ART complement codes data so channel dim is 2*n_features - train_y.shape[1] + 2*X_train.shape[1], # fuzzy ART complement codes data so channel dim is 2*n_features + y_train.shape[1] ] model = FusionART( modules=[module_x, module_y], @@ -155,16 +170,16 @@ model = FusionART( ) # Prepare Data -train_Xy = model.join_channel_data(channel_data=[train_X, train_y]) +train_Xy = model.join_channel_data(channel_data=[X_train, y_train]) train_Xy_prep = model.prepare_data(train_Xy) -test_Xy = model.join_channel_data(channel_data=[train_X], skip_channels=[1]) +test_Xy = model.join_channel_data(channel_data=[X_train], skip_channels=[1]) test_Xy_prep = model.prepare_data(test_Xy) # Fit the model -model.fit(train_X_prep, train_y) +model.fit(train_Xy_prep) -# Predict y-channel values -pred_y = model.predict_regression(test_Xy_prep, target_channels=[1]) +# Predict y-channel values and clip X values outside previously observed ranges +pred_y = model.predict_regression(test_Xy_prep, target_channels=[1], clip=True) ``` ### Data Normalization diff --git a/artlib/fusion/FusionART.py b/artlib/fusion/FusionART.py index 3b81a15..ecd0e40 100644 --- a/artlib/fusion/FusionART.py +++ b/artlib/fusion/FusionART.py @@ -679,7 +679,7 @@ def get_channel_centers(self, channel: int) -> List[np.ndarray]: return self.modules[channel].get_cluster_centers() def predict_regression( - self, X: np.ndarray, target_channels: List[int] = [-1] + self, X: np.ndarray, clip: bool = False, target_channels: List[int] = [-1] ) -> Union[np.ndarray, List[np.ndarray]]: """Predict regression values for the input data using the target channels. @@ -687,6 +687,8 @@ def predict_regression( ---------- X : np.ndarray Input dataset. + clip : bool + clip the input values to be between the previously seen data limits target_channels : list of int, optional List of target channels to use for regression. If negative values are used, they are considered as channels counting backward from the last channel. @@ -701,7 +703,7 @@ def predict_regression( """ target_channels = [self.n + k if k < 0 else k for k in target_channels] - C = self.predict(X, skip_channels=target_channels) + C = self.predict(X, clip=clip, skip_channels=target_channels) centers = [self.get_channel_centers(k) for k in target_channels] if len(target_channels) == 1: return np.array([centers[0][c] for c in C]) diff --git a/artlib/supervised/ARTMAP.py b/artlib/supervised/ARTMAP.py index 597b2f5..8588c6a 100644 --- a/artlib/supervised/ARTMAP.py +++ b/artlib/supervised/ARTMAP.py @@ -297,7 +297,7 @@ def predict_ab( check_is_fitted(self) return super(ARTMAP, self).predict_ab(X, clip) - def predict_regression(self, X: np.ndarray) -> np.ndarray: + def predict_regression(self, X: np.ndarray, clip: bool = False) -> np.ndarray: """ Predict values for the given data using cluster centers. Note: ARTMAP is not recommended for regression. @@ -307,6 +307,8 @@ def predict_regression(self, X: np.ndarray) -> np.ndarray: ---------- X : np.ndarray Data set A (independent channel). + clip : bool + clip the input values to be between the previously seen data limits Returns ------- @@ -314,6 +316,6 @@ def predict_regression(self, X: np.ndarray) -> np.ndarray: Predicted values using cluster centers. """ check_is_fitted(self) - C = self.predict(X) + C = self.predict(X, clip=clip) centers = self.module_b.get_cluster_centers() return np.array([centers[c] for c in C]) From 6442d952b4618b8d35ddb77635e03bbf2c29e16d Mon Sep 17 00:00:00 2001 From: niklas melton Date: Thu, 9 Jan 2025 21:52:54 -0600 Subject: [PATCH 9/9] update readme examples with mnist and normalization info --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 15feef5..d47196d 100644 --- a/README.md +++ b/README.md @@ -89,7 +89,7 @@ model = FuzzyART(rho=0.7, alpha = 0.0, beta=1.0) # (Optional) Tell the model the data limits for normalization lower_bounds = np.array([0.]*n_dim) -upper_bounds = np.array([1.]*n_dim) +upper_bounds = np.array([255.]*n_dim) model.set_data_bounds(lower_bounds, upper_bounds) # Prepare Data @@ -122,7 +122,7 @@ module_a = GaussianART(rho=0.0, sigma_init=sigma_init) # (Optional) Tell the model the data limits for normalization lower_bounds = np.array([0.]*n_dim) -upper_bounds = np.array([1.]*n_dim) +upper_bounds = np.array([255.]*n_dim) module_a.set_data_bounds(lower_bounds, upper_bounds) # Initialize the SimpleARTMAP model