From 8b495e8fafd4eda9d2181d3d5a07e5da277c932f Mon Sep 17 00:00:00 2001
From: niklas melton <niklasmelton@gmail.com>
Date: Wed, 8 Jan 2025 21:10:03 -0600
Subject: [PATCH 1/9] improve error message and add helper functions

---
 artlib/common/BaseART.py    | 58 ++++++++++++++++++++++++++++++++--
 unit_tests/test_FuzzyART.py | 63 +++++++++++++++++++++++++++++++++++++
 2 files changed, 119 insertions(+), 2 deletions(-)

diff --git a/artlib/common/BaseART.py b/artlib/common/BaseART.py
index 6e6abce..409816b 100644
--- a/artlib/common/BaseART.py
+++ b/artlib/common/BaseART.py
@@ -106,6 +106,45 @@ def set_params(self, **params):
         self.validate_params(local_params)
         return self
 
+    def set_data_bounds(self, lower_bounds: np.ndarray, upper_bounds: np.ndarray):
+        """Manually set the data bounds for normalization.
+
+        Parameters
+        ----------
+        lower_bounds : np.ndarray
+            The lower bounds for each column.
+
+        upper_bounds : np.ndarray
+            The upper bounds for each column.
+
+        """
+        if self.is_fitted_:
+            raise ValueError("Cannot change data limits after fit.")
+        self.d_min_ = lower_bounds
+        self.d_max_ = upper_bounds
+
+    def find_data_bounds(
+        self, *data_batches: list[np.ndarray]
+    ) -> Tuple[np.ndarray, np.ndarray]:
+        """Manually set the data bounds for normalization.
+
+        Parameters
+        ----------
+        *data_batches : list[np.ndarray]
+            Batches of data to be presented to the model
+
+        Returns
+        -------
+        tuple[np.ndarray, np.ndarray]
+            Lower and upper bounds for data.
+
+        """
+        all_data = np.vstack(data_batches)
+        lower_bounds = np.min(all_data)
+        upper_bounds = np.max(all_data)
+
+        return lower_bounds, upper_bounds
+
     def prepare_data(self, X: np.ndarray) -> np.ndarray:
         """Prepare data for clustering.
 
@@ -187,8 +226,23 @@ def validate_data(self, X: np.ndarray):
         - X: data set
 
         """
-        assert np.all(X >= 0), "Data has not been normalized"
-        assert np.all(X <= 1.0), "Data has not been normalized"
+        normalization_message = (
+            "Data has not been normalized or was not normalized "
+            "correctly. All values must fall between 0 and 1, "
+            "inclusively."
+        )
+        if self.is_fitted_:
+            normalization_message += (
+                "\nThis appears to not be the first batch of "
+                "data. Data boundaries must be calculated for "
+                "the entire data space. Prior to fitting, use "
+                "BaseART.set_data_bounds() to manually set the "
+                "bounds for your data or use "
+                "BaseART.find_data_bounds() to identify the "
+                "bounds automatically for multiple batches."
+            )
+        assert np.all(X >= 0), normalization_message
+        assert np.all(X <= 1.0), normalization_message
         self.check_dimensions(X)
 
     def category_choice(
diff --git a/unit_tests/test_FuzzyART.py b/unit_tests/test_FuzzyART.py
index e07c848..e4cd338 100644
--- a/unit_tests/test_FuzzyART.py
+++ b/unit_tests/test_FuzzyART.py
@@ -148,3 +148,66 @@ def test_clustering(art_model):
     labels = art_model.fit_predict(data)
 
     assert np.all(np.equal(labels, np.array([0, 0, 1, 2, 3])))
+
+
+def test_validate_data(art_model):
+    # Test validate_data with normalized data
+    X = np.array([[-0.1, 0.2], [1.1, 0.4]])
+    art_model.is_fitted_ = False
+    with pytest.raises(AssertionError):
+        art_model.validate_data(X)
+
+def test_validate_data_again(art_model):
+    # Test validate_data with normalized data
+    X = np.array([[0.1, 0.2], [0.3, 0.4]])
+    art_model.is_fitted_ = False
+    art_model.validate_data(X)  # Should pass without assertion error
+
+    # Test validate_data with data out of bounds
+    X_invalid = np.array([[-0.1, 0.2], [1.1, 0.4]])
+    with pytest.raises(AssertionError):
+        art_model.validate_data(X_invalid)
+
+
+def test_set_data_bounds(art_model):
+    # Test set_data_bounds with valid bounds
+    lower_bounds = np.array([0.0, 0.0])
+    upper_bounds = np.array([1.0, 1.0])
+    art_model.is_fitted_ = False
+    art_model.set_data_bounds(lower_bounds, upper_bounds)
+    assert np.all(art_model.d_min_ == lower_bounds)
+    assert np.all(art_model.d_max_ == upper_bounds)
+
+    # Test set_data_bounds after the model is fitted
+    art_model.is_fitted_ = True
+    with pytest.raises(ValueError, match="Cannot change data limits after fit."):
+        art_model.set_data_bounds(lower_bounds, upper_bounds)
+    X = np.array([[0.1, 0.2], [0.3, 0.4]])
+    X_norm = art_model.prepare_data(X)
+    assert np.all(X_norm == X)
+
+
+def test_find_data_bounds(art_model):
+    # Test find_data_bounds with multiple data batches
+    batch_1 = np.array([[0.1, 0.2], [0.3, 0.4]])
+    batch_2 = np.array([[0.0, 0.1], [0.5, 0.6]])
+    lower_bounds, upper_bounds = art_model.find_data_bounds(batch_1, batch_2)
+    np.testing.assert_array_equal(lower_bounds, np.array([0.0, 0.1]))
+    np.testing.assert_array_equal(upper_bounds, np.array([0.5, 0.6]))
+
+
+def test_prepare_data(art_model):
+    # Test prepare_data with valid data
+    X = np.array([[0.0, 0.5], [0.2, 1.0]])
+    art_model.d_min_ = np.array([0.0, 0.0])
+    art_model.d_max_ = np.array([1.0, 1.0])
+    normalized_X = art_model.prepare_data(X)
+    np.testing.assert_array_almost_equal(normalized_X, X)  # Already normalized
+
+    # Test prepare_data with data requiring normalization
+    X = np.array([[1.0, 10.0], [5.0, 20.0]])
+    art_model.d_min_ = np.array([1.0, 10.0])
+    art_model.d_max_ = np.array([5.0, 20.0])
+    normalized_X = art_model.prepare_data(X)
+    expected_normalized_X = np.array([[0.0, 0.0, 1.0, 1.0], [1.0, 1.0, 0.0, 0.0]])
+    np.testing.assert_array_almost_equal(normalized_X, expected_normalized_X)
\ No newline at end of file

From 69a0d0e1f77e853fbcb8925399998312e7d542e9 Mon Sep 17 00:00:00 2001
From: niklas melton <niklasmelton@gmail.com>
Date: Wed, 8 Jan 2025 21:22:06 -0600
Subject: [PATCH 2/9] default is_fitted

---
 artlib/common/BaseART.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/artlib/common/BaseART.py b/artlib/common/BaseART.py
index 409816b..3ed600f 100644
--- a/artlib/common/BaseART.py
+++ b/artlib/common/BaseART.py
@@ -28,6 +28,7 @@ def __init__(self, params: Dict):
         self.weight_sample_counter_: List[int] = []
         self.d_min_ = None
         self.d_max_ = None
+        self.is_fitted_ = False
 
     def __getattr__(self, key):
         if key in self.params:

From ca9cdda4de4c9f5b172cc557f1bda75c1b3406b3 Mon Sep 17 00:00:00 2001
From: niklas melton <niklasmelton@gmail.com>
Date: Wed, 8 Jan 2025 21:27:36 -0600
Subject: [PATCH 3/9] fix some broken unit tests

---
 artlib/common/BaseART.py    | 4 ++--
 unit_tests/test_FuzzyART.py | 7 +++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/artlib/common/BaseART.py b/artlib/common/BaseART.py
index 3ed600f..0c75961 100644
--- a/artlib/common/BaseART.py
+++ b/artlib/common/BaseART.py
@@ -141,8 +141,8 @@ def find_data_bounds(
 
         """
         all_data = np.vstack(data_batches)
-        lower_bounds = np.min(all_data)
-        upper_bounds = np.max(all_data)
+        lower_bounds = np.min(all_data, axis=0)
+        upper_bounds = np.max(all_data, axis=0)
 
         return lower_bounds, upper_bounds
 
diff --git a/unit_tests/test_FuzzyART.py b/unit_tests/test_FuzzyART.py
index e4cd338..0b9ad26 100644
--- a/unit_tests/test_FuzzyART.py
+++ b/unit_tests/test_FuzzyART.py
@@ -2,6 +2,7 @@
 import numpy as np
 from unittest.mock import MagicMock
 from artlib.elementary.FuzzyART import FuzzyART
+from artlib.common.utils import compliment_code
 
 # Assuming BaseART is imported and available in the current namespace
 
@@ -161,7 +162,8 @@ def test_validate_data_again(art_model):
     # Test validate_data with normalized data
     X = np.array([[0.1, 0.2], [0.3, 0.4]])
     art_model.is_fitted_ = False
-    art_model.validate_data(X)  # Should pass without assertion error
+    X_cc = compliment_code(X)
+    art_model.validate_data(X_cc)  # Should pass without assertion error
 
     # Test validate_data with data out of bounds
     X_invalid = np.array([[-0.1, 0.2], [1.1, 0.4]])
@@ -183,8 +185,9 @@ def test_set_data_bounds(art_model):
     with pytest.raises(ValueError, match="Cannot change data limits after fit."):
         art_model.set_data_bounds(lower_bounds, upper_bounds)
     X = np.array([[0.1, 0.2], [0.3, 0.4]])
+    X_cc = compliment_code(X)
     X_norm = art_model.prepare_data(X)
-    assert np.all(X_norm == X)
+    assert np.all(X_norm == X_cc)
 
 
 def test_find_data_bounds(art_model):

From 2b83265f156ee23aa64aaab5c41e1ac5a422e659 Mon Sep 17 00:00:00 2001
From: niklas melton <niklasmelton@gmail.com>
Date: Wed, 8 Jan 2025 21:30:30 -0600
Subject: [PATCH 4/9] fix some broken unit tests

---
 unit_tests/test_FuzzyART.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/unit_tests/test_FuzzyART.py b/unit_tests/test_FuzzyART.py
index 0b9ad26..01ab2b6 100644
--- a/unit_tests/test_FuzzyART.py
+++ b/unit_tests/test_FuzzyART.py
@@ -202,10 +202,11 @@ def test_find_data_bounds(art_model):
 def test_prepare_data(art_model):
     # Test prepare_data with valid data
     X = np.array([[0.0, 0.5], [0.2, 1.0]])
+    X_cc = compliment_code(X)
     art_model.d_min_ = np.array([0.0, 0.0])
     art_model.d_max_ = np.array([1.0, 1.0])
     normalized_X = art_model.prepare_data(X)
-    np.testing.assert_array_almost_equal(normalized_X, X)  # Already normalized
+    np.testing.assert_array_almost_equal(normalized_X, X_cc)  # Already normalized
 
     # Test prepare_data with data requiring normalization
     X = np.array([[1.0, 10.0], [5.0, 20.0]])
@@ -213,4 +214,5 @@ def test_prepare_data(art_model):
     art_model.d_max_ = np.array([5.0, 20.0])
     normalized_X = art_model.prepare_data(X)
     expected_normalized_X = np.array([[0.0, 0.0, 1.0, 1.0], [1.0, 1.0, 0.0, 0.0]])
-    np.testing.assert_array_almost_equal(normalized_X, expected_normalized_X)
\ No newline at end of file
+    expected_normalized_X_cc = compliment_code(expected_normalized_X)
+    np.testing.assert_array_almost_equal(normalized_X, expected_normalized_X_cc)
\ No newline at end of file

From 0715e33b9c53f839f8627421ddd08feda01e306e Mon Sep 17 00:00:00 2001
From: niklas melton <niklasmelton@gmail.com>
Date: Wed, 8 Jan 2025 21:32:17 -0600
Subject: [PATCH 5/9] fix some broken unit tests

---
 unit_tests/test_FuzzyART.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/unit_tests/test_FuzzyART.py b/unit_tests/test_FuzzyART.py
index 01ab2b6..57d6f84 100644
--- a/unit_tests/test_FuzzyART.py
+++ b/unit_tests/test_FuzzyART.py
@@ -213,6 +213,5 @@ def test_prepare_data(art_model):
     art_model.d_min_ = np.array([1.0, 10.0])
     art_model.d_max_ = np.array([5.0, 20.0])
     normalized_X = art_model.prepare_data(X)
-    expected_normalized_X = np.array([[0.0, 0.0, 1.0, 1.0], [1.0, 1.0, 0.0, 0.0]])
-    expected_normalized_X_cc = compliment_code(expected_normalized_X)
+    expected_normalized_X_cc = np.array([[0.0, 0.0, 1.0, 1.0], [1.0, 1.0, 0.0, 0.0]])
     np.testing.assert_array_almost_equal(normalized_X, expected_normalized_X_cc)
\ No newline at end of file

From 58c368ab98955983990dabdda50d2dcf2fd6c3b6 Mon Sep 17 00:00:00 2001
From: niklas melton <niklasmelton@gmail.com>
Date: Wed, 8 Jan 2025 22:12:23 -0600
Subject: [PATCH 6/9] update docs and add clip method

---
 README.md                         | 30 ++++++++++++++++++++++++++++++
 artlib/common/BaseART.py          |  6 +++++-
 artlib/common/BaseARTMAP.py       | 10 ++++++++--
 artlib/fusion/FusionART.py        |  8 +++++++-
 artlib/hierarchical/DeepARTMAP.py |  8 ++++++--
 artlib/supervised/ARTMAP.py       | 14 ++++++++++----
 artlib/supervised/SimpleARTMAP.py | 18 ++++++++++++++++--
 7 files changed, 82 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index aecb18a..f01bc2b 100644
--- a/README.md
+++ b/README.md
@@ -167,6 +167,36 @@ model.fit(train_X_prep, train_y)
 pred_y = model.predict_regression(test_Xy_prep, target_channels=[1])
 ```
 
+### Data Normalization
+
+AdaptiveResonanceLib models require feature data to be normalized between 0.0
+and 1.0 inclusively. This requires identifying the boundaries of the data space.
+
+If the first batch of your training data is representative of the entire data space,
+you dont need to do anything and artlib will identify the data bounds automatically.
+However, this will often not be sufficient and the following work-arounds will be
+needed:
+
+Users can manually set the bounds using the following code snippet or similar:
+```python
+# Set the boundaries of your data for normalization
+lower_bounds = np.array([0.]*n_features)
+upper_bounds = np.array([1.]*n_features)
+model.set_data_bounds(lower_bounds, upper_bounds)
+```
+
+Or users can present all batches of data to the model for automatic
+boundary identification:
+```python
+# Find the boundaries of your data for normalization
+all_data = [train_X, test_X]
+_, _ = model.find_data_bounds(all_data)
+```
+
+If only the boundaries of your testing data are unknown, you can call
+`model.predict()` with `clip=True` to clip testing data to the bounds seen during
+training. Only use this if you understand what you are doing.
+
 <!-- END quick-start -->
 
 <!-- START documentation -->
diff --git a/artlib/common/BaseART.py b/artlib/common/BaseART.py
index 0c75961..fd64c74 100644
--- a/artlib/common/BaseART.py
+++ b/artlib/common/BaseART.py
@@ -797,13 +797,15 @@ def fit_gif(
             self.post_fit(X)
             return self
 
-    def predict(self, X: np.ndarray) -> np.ndarray:
+    def predict(self, X: np.ndarray, clip: bool = False) -> np.ndarray:
         """Predict labels for the data.
 
         Parameters
         ----------
         X : np.ndarray
             The dataset.
+        clip : bool
+            clip the input values to be between the previously seen data limits
 
         Returns
         -------
@@ -812,6 +814,8 @@ def predict(self, X: np.ndarray) -> np.ndarray:
 
         """
         check_is_fitted(self)
+        if clip:
+            X = np.clip(X, self.d_min_, self.d_max_)
         self.validate_data(X)
         self.check_dimensions(X)
 
diff --git a/artlib/common/BaseARTMAP.py b/artlib/common/BaseARTMAP.py
index fa5da4c..8de9c10 100644
--- a/artlib/common/BaseARTMAP.py
+++ b/artlib/common/BaseARTMAP.py
@@ -138,13 +138,15 @@ def partial_fit(
         """
         raise NotImplementedError
 
-    def predict(self, X: np.ndarray) -> np.ndarray:
+    def predict(self, X: np.ndarray, clip: bool = False) -> np.ndarray:
         """Predict labels for the data.
 
         Parameters
         ----------
         X : np.ndarray
             Dataset A.
+        clip : bool
+            clip the input values to be between the previously seen data limits
 
         Returns
         -------
@@ -154,13 +156,17 @@ def predict(self, X: np.ndarray) -> np.ndarray:
         """
         raise NotImplementedError
 
-    def predict_ab(self, X: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
+    def predict_ab(
+        self, X: np.ndarray, clip: bool = False
+    ) -> tuple[np.ndarray, np.ndarray]:
         """Predict labels for the data, both A-side and B-side.
 
         Parameters
         ----------
         X : np.ndarray
             Dataset A.
+        clip : bool
+            clip the input values to be between the previously seen data limits
 
         Returns
         -------
diff --git a/artlib/fusion/FusionART.py b/artlib/fusion/FusionART.py
index 781e66a..3b81a15 100644
--- a/artlib/fusion/FusionART.py
+++ b/artlib/fusion/FusionART.py
@@ -528,13 +528,17 @@ def step_pred(self, x, skip_channels: List[int] = []) -> int:
         c_ = int(np.argmax(T))
         return c_
 
-    def predict(self, X: np.ndarray, skip_channels: List[int] = []) -> np.ndarray:
+    def predict(
+        self, X: np.ndarray, clip: bool = False, skip_channels: List[int] = []
+    ) -> np.ndarray:
         """Predict labels for the input data.
 
         Parameters
         ----------
         X : np.ndarray
             Input dataset.
+        clip : bool
+            clip the input values to be between the previously seen data limits
         skip_channels : list of int, optional
             Channels to skip (default is []).
 
@@ -545,6 +549,8 @@ def predict(self, X: np.ndarray, skip_channels: List[int] = []) -> np.ndarray:
 
         """
         check_is_fitted(self)
+        if clip:
+            X = np.clip(X, self.d_min_, self.d_max_)
         self.validate_data(X)
         self.check_dimensions(X)
 
diff --git a/artlib/hierarchical/DeepARTMAP.py b/artlib/hierarchical/DeepARTMAP.py
index b11475a..6545a64 100644
--- a/artlib/hierarchical/DeepARTMAP.py
+++ b/artlib/hierarchical/DeepARTMAP.py
@@ -410,13 +410,17 @@ def partial_fit(
             x_i += 1
         return self
 
-    def predict(self, X: Union[np.ndarray, list[np.ndarray]]) -> list[np.ndarray]:
+    def predict(
+        self, X: Union[np.ndarray, list[np.ndarray]], clip: bool = False
+    ) -> list[np.ndarray]:
         """Predict the labels for the input data.
 
         Parameters
         ----------
         X : np.ndarray or list of np.ndarray
             The input data set for prediction.
+        clip : bool
+            clip the input values to be between the previously seen data limits
 
         Returns
         -------
@@ -428,7 +432,7 @@ def predict(self, X: Union[np.ndarray, list[np.ndarray]]) -> list[np.ndarray]:
             x = X[-1]
         else:
             x = X
-        pred_a, pred_b = self.layers[-1].predict_ab(x)
+        pred_a, pred_b = self.layers[-1].predict_ab(x, clip=clip)
         pred = [pred_a, pred_b]
         for layer in self.layers[:-1][::-1]:
             pred.append(layer.map_a2b(pred[-1]))
diff --git a/artlib/supervised/ARTMAP.py b/artlib/supervised/ARTMAP.py
index a24a389..597b2f5 100644
--- a/artlib/supervised/ARTMAP.py
+++ b/artlib/supervised/ARTMAP.py
@@ -257,13 +257,15 @@ def partial_fit(
         )
         return self
 
-    def predict(self, X: np.ndarray) -> np.ndarray:
+    def predict(self, X: np.ndarray, clip: bool = False) -> np.ndarray:
         """Predict the labels for the given data.
 
         Parameters
         ----------
         X : np.ndarray
             Data set A (independent channel).
+        clip : bool
+            clip the input values to be between the previously seen data limits
 
         Returns
         -------
@@ -272,15 +274,19 @@ def predict(self, X: np.ndarray) -> np.ndarray:
 
         """
         check_is_fitted(self)
-        return super(ARTMAP, self).predict(X)
+        return super(ARTMAP, self).predict(X, clip)
 
-    def predict_ab(self, X: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
+    def predict_ab(
+        self, X: np.ndarray, clip: bool = False
+    ) -> tuple[np.ndarray, np.ndarray]:
         """Predict both A-side and B-side labels for the given data.
 
         Parameters
         ----------
         X : np.ndarray
             Data set A (independent channel).
+        clip : bool
+            clip the input values to be between the previously seen data limits
 
         Returns
         -------
@@ -289,7 +295,7 @@ def predict_ab(self, X: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
 
         """
         check_is_fitted(self)
-        return super(ARTMAP, self).predict_ab(X)
+        return super(ARTMAP, self).predict_ab(X, clip)
 
     def predict_regression(self, X: np.ndarray) -> np.ndarray:
         """
diff --git a/artlib/supervised/SimpleARTMAP.py b/artlib/supervised/SimpleARTMAP.py
index 34c59c9..cc6c5c1 100644
--- a/artlib/supervised/SimpleARTMAP.py
+++ b/artlib/supervised/SimpleARTMAP.py
@@ -405,13 +405,15 @@ def step_pred(self, x: np.ndarray) -> tuple[int, int]:
         c_b = self.map[c_a]
         return c_a, c_b
 
-    def predict(self, X: np.ndarray) -> np.ndarray:
+    def predict(self, X: np.ndarray, clip: bool = False) -> np.ndarray:
         """Predict labels for the data.
 
         Parameters
         ----------
         X : np.ndarray
             Data set A.
+        clip : bool
+            clip the input values to be between the previously seen data limits
 
         Returns
         -------
@@ -420,19 +422,27 @@ def predict(self, X: np.ndarray) -> np.ndarray:
 
         """
         check_is_fitted(self)
+        if clip:
+            X = np.clip(X, self.module_a.d_min_, self.module_a.d_max_)
+        self.module_a.validate_data(X)
+        self.module_a.check_dimensions(X)
         y_b = np.zeros((X.shape[0],), dtype=int)
         for i, x in enumerate(X):
             c_a, c_b = self.step_pred(x)
             y_b[i] = c_b
         return y_b
 
-    def predict_ab(self, X: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
+    def predict_ab(
+        self, X: np.ndarray, clip: bool = False
+    ) -> tuple[np.ndarray, np.ndarray]:
         """Predict labels for the data, both A-side and B-side.
 
         Parameters
         ----------
         X : np.ndarray
             Data set A.
+        clip : bool
+            clip the input values to be between the previously seen data limits
 
         Returns
         -------
@@ -441,6 +451,10 @@ def predict_ab(self, X: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
 
         """
         check_is_fitted(self)
+        if clip:
+            X = np.clip(X, self.module_a.d_min_, self.module_a.d_max_)
+        self.module_a.validate_data(X)
+        self.module_a.check_dimensions(X)
         y_a = np.zeros((X.shape[0],), dtype=int)
         y_b = np.zeros((X.shape[0],), dtype=int)
         for i, x in enumerate(X):

From c6c0cfa6e51e445ebef246c3b7a6c2f6bdeba8ee Mon Sep 17 00:00:00 2001
From: niklas melton <niklasmelton@gmail.com>
Date: Wed, 8 Jan 2025 22:18:22 -0600
Subject: [PATCH 7/9] update docs and add clip method

---
 artlib/common/BaseART.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/artlib/common/BaseART.py b/artlib/common/BaseART.py
index fd64c74..ff732e7 100644
--- a/artlib/common/BaseART.py
+++ b/artlib/common/BaseART.py
@@ -127,7 +127,8 @@ def set_data_bounds(self, lower_bounds: np.ndarray, upper_bounds: np.ndarray):
     def find_data_bounds(
         self, *data_batches: list[np.ndarray]
     ) -> Tuple[np.ndarray, np.ndarray]:
-        """Manually set the data bounds for normalization.
+        """Automatically find the data bounds for normalization from a list of data
+        batches.
 
         Parameters
         ----------

From 89391c9c67776261afd48e41596c8f20c898cc06 Mon Sep 17 00:00:00 2001
From: niklas melton <niklasmelton@gmail.com>
Date: Thu, 9 Jan 2025 21:39:51 -0600
Subject: [PATCH 8/9] update readme examples with mnist and normalization info

---
 README.md                   | 61 +++++++++++++++++++++++--------------
 artlib/fusion/FusionART.py  |  6 ++--
 artlib/supervised/ARTMAP.py |  6 ++--
 3 files changed, 46 insertions(+), 27 deletions(-)

diff --git a/README.md b/README.md
index f01bc2b..15feef5 100644
--- a/README.md
+++ b/README.md
@@ -76,17 +76,25 @@ Here are some quick examples to get you started with AdaptiveResonanceLib:
 ```python
 from artlib import FuzzyART
 import numpy as np
+from tensorflow.keras.datasets import mnist
 
-# Your dataset
-train_X = np.array([...]) # shape (n_samples, n_features)
-test_X = np.array([...])
+# Load the MNIST dataset
+n_dim = 28*28
+(X_train, _), (X_test, _) = mnist.load_data()
+X_train = X_train.reshape((-1, n_dim)) # flatten images
+X_test = X_test.reshape((-1, n_dim))
 
 # Initialize the Fuzzy ART model
 model = FuzzyART(rho=0.7, alpha = 0.0, beta=1.0)
 
+# (Optional) Tell the model the data limits for normalization
+lower_bounds = np.array([0.]*n_dim)
+upper_bounds = np.array([1.]*n_dim)
+model.set_data_bounds(lower_bounds, upper_bounds)
+
 # Prepare Data
-train_X_prep = model.prepare_data(train_X)
-test_X_prep = model.prepare_data(test_X)
+train_X_prep = model.prepare_data(X_train)
+test_X_prep = model.prepare_data(X_test)
 
 # Fit the model
 model.fit(train_X_prep)
@@ -100,25 +108,32 @@ predictions = model.predict(test_X_prep)
 ```python
 from artlib import GaussianART, SimpleARTMAP
 import numpy as np
+from tensorflow.keras.datasets import mnist
 
-# Your dataset
-train_X = np.array([...]) # shape (n_samples, n_features)
-train_y = np.array([...]) # shape (n_samples, ), must be integers
-test_X = np.array([...])
+# Load the MNIST dataset
+n_dim = 28*28
+(X_train, y_train), (X_test, y_test) = mnist.load_data()
+X_train = X_train.reshape((-1, n_dim)) # flatten images
+X_test = X_test.reshape((-1, n_dim))
 
 # Initialize the Gaussian ART model
-sigma_init = np.array([0.5]*train_X.shape[1]) # variance estimate for each feature
+sigma_init = np.array([0.5]*X_train.shape[1]) # variance estimate for each feature
 module_a = GaussianART(rho=0.0, sigma_init=sigma_init)
 
+# (Optional) Tell the model the data limits for normalization
+lower_bounds = np.array([0.]*n_dim)
+upper_bounds = np.array([1.]*n_dim)
+module_a.set_data_bounds(lower_bounds, upper_bounds)
+
 # Initialize the SimpleARTMAP model
 model = SimpleARTMAP(module_a=module_a)
 
 # Prepare Data
-train_X_prep = model.prepare_data(train_X)
-test_X_prep = model.prepare_data(test_X)
+train_X_prep = model.prepare_data(X_train)
+test_X_prep = model.prepare_data(X_test)
 
 # Fit the model
-model.fit(train_X_prep, train_y)
+model.fit(train_X_prep, y_train)
 
 # Predict data labels
 predictions = model.predict(test_X_prep)
@@ -131,22 +146,22 @@ from artlib import FuzzyART, HypersphereART, FusionART
 import numpy as np
 
 # Your dataset
-train_X = np.array([...]) # shape (n_samples, n_features_X)
-train_y = np.array([...]) # shape (n_samples, n_features_y)
+X_train = np.array([...]) # shape (n_samples, n_features_X)
+y_train = np.array([...]) # shape (n_samples, n_features_y)
 test_X = np.array([...])
 
 # Initialize the Fuzzy ART model
 module_x = FuzzyART(rho=0.0, alpha = 0.0, beta=1.0)
 
 # Initialize the Hypersphere ART model
-r_hat = 0.5*np.sqrt(train_X.shape[1]) # no restriction on hyperpshere size
+r_hat = 0.5*np.sqrt(X_train.shape[1]) # no restriction on hyperpshere size
 module_y = HypersphereART(rho=0.0, alpha = 0.0, beta=1.0, r_hat=r_hat)
 
 # Initialize the FusionARTMAP model
 gamma_values = [0.5, 0.5] # eqaul weight to both channels
 channel_dims = [
-  2*train_X.shape[1], # fuzzy ART complement codes data so channel dim is 2*n_features
-  train_y.shape[1]
+  2*X_train.shape[1], # fuzzy ART complement codes data so channel dim is 2*n_features
+  y_train.shape[1]
 ]
 model = FusionART(
   modules=[module_x, module_y],
@@ -155,16 +170,16 @@ model = FusionART(
 )
 
 # Prepare Data
-train_Xy = model.join_channel_data(channel_data=[train_X, train_y])
+train_Xy = model.join_channel_data(channel_data=[X_train, y_train])
 train_Xy_prep = model.prepare_data(train_Xy)
-test_Xy = model.join_channel_data(channel_data=[train_X], skip_channels=[1])
+test_Xy = model.join_channel_data(channel_data=[X_train], skip_channels=[1])
 test_Xy_prep = model.prepare_data(test_Xy)
 
 # Fit the model
-model.fit(train_X_prep, train_y)
+model.fit(train_Xy_prep)
 
-# Predict y-channel values
-pred_y = model.predict_regression(test_Xy_prep, target_channels=[1])
+# Predict y-channel values and clip X values outside previously observed ranges
+pred_y = model.predict_regression(test_Xy_prep, target_channels=[1], clip=True)
 ```
 
 ### Data Normalization
diff --git a/artlib/fusion/FusionART.py b/artlib/fusion/FusionART.py
index 3b81a15..ecd0e40 100644
--- a/artlib/fusion/FusionART.py
+++ b/artlib/fusion/FusionART.py
@@ -679,7 +679,7 @@ def get_channel_centers(self, channel: int) -> List[np.ndarray]:
         return self.modules[channel].get_cluster_centers()
 
     def predict_regression(
-        self, X: np.ndarray, target_channels: List[int] = [-1]
+        self, X: np.ndarray, clip: bool = False, target_channels: List[int] = [-1]
     ) -> Union[np.ndarray, List[np.ndarray]]:
         """Predict regression values for the input data using the target channels.
 
@@ -687,6 +687,8 @@ def predict_regression(
         ----------
         X : np.ndarray
             Input dataset.
+        clip : bool
+            clip the input values to be between the previously seen data limits
         target_channels : list of int, optional
             List of target channels to use for regression. If negative values are used,
             they are considered as channels counting backward from the last channel.
@@ -701,7 +703,7 @@ def predict_regression(
 
         """
         target_channels = [self.n + k if k < 0 else k for k in target_channels]
-        C = self.predict(X, skip_channels=target_channels)
+        C = self.predict(X, clip=clip, skip_channels=target_channels)
         centers = [self.get_channel_centers(k) for k in target_channels]
         if len(target_channels) == 1:
             return np.array([centers[0][c] for c in C])
diff --git a/artlib/supervised/ARTMAP.py b/artlib/supervised/ARTMAP.py
index 597b2f5..8588c6a 100644
--- a/artlib/supervised/ARTMAP.py
+++ b/artlib/supervised/ARTMAP.py
@@ -297,7 +297,7 @@ def predict_ab(
         check_is_fitted(self)
         return super(ARTMAP, self).predict_ab(X, clip)
 
-    def predict_regression(self, X: np.ndarray) -> np.ndarray:
+    def predict_regression(self, X: np.ndarray, clip: bool = False) -> np.ndarray:
         """
         Predict values for the given data using cluster centers.
         Note: ARTMAP is not recommended for regression.
@@ -307,6 +307,8 @@ def predict_regression(self, X: np.ndarray) -> np.ndarray:
         ----------
         X : np.ndarray
             Data set A (independent channel).
+        clip : bool
+            clip the input values to be between the previously seen data limits
 
         Returns
         -------
@@ -314,6 +316,6 @@ def predict_regression(self, X: np.ndarray) -> np.ndarray:
             Predicted values using cluster centers.
         """
         check_is_fitted(self)
-        C = self.predict(X)
+        C = self.predict(X, clip=clip)
         centers = self.module_b.get_cluster_centers()
         return np.array([centers[c] for c in C])

From 6442d952b4618b8d35ddb77635e03bbf2c29e16d Mon Sep 17 00:00:00 2001
From: niklas melton <niklasmelton@gmail.com>
Date: Thu, 9 Jan 2025 21:52:54 -0600
Subject: [PATCH 9/9] update readme examples with mnist and normalization info

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 15feef5..d47196d 100644
--- a/README.md
+++ b/README.md
@@ -89,7 +89,7 @@ model = FuzzyART(rho=0.7, alpha = 0.0, beta=1.0)
 
 # (Optional) Tell the model the data limits for normalization
 lower_bounds = np.array([0.]*n_dim)
-upper_bounds = np.array([1.]*n_dim)
+upper_bounds = np.array([255.]*n_dim)
 model.set_data_bounds(lower_bounds, upper_bounds)
 
 # Prepare Data
@@ -122,7 +122,7 @@ module_a = GaussianART(rho=0.0, sigma_init=sigma_init)
 
 # (Optional) Tell the model the data limits for normalization
 lower_bounds = np.array([0.]*n_dim)
-upper_bounds = np.array([1.]*n_dim)
+upper_bounds = np.array([255.]*n_dim)
 module_a.set_data_bounds(lower_bounds, upper_bounds)
 
 # Initialize the SimpleARTMAP model