Skip to content

Commit

Permalink
update docs and add clip method
Browse files Browse the repository at this point in the history
  • Loading branch information
NiklasMelton committed Jan 9, 2025
1 parent 0715e33 commit 58c368a
Show file tree
Hide file tree
Showing 7 changed files with 82 additions and 12 deletions.
30 changes: 30 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,36 @@ model.fit(train_X_prep, train_y)
pred_y = model.predict_regression(test_Xy_prep, target_channels=[1])
```

### Data Normalization

AdaptiveResonanceLib models require feature data to be normalized between 0.0
and 1.0 inclusively. This requires identifying the boundaries of the data space.

If the first batch of your training data is representative of the entire data space,
you dont need to do anything and artlib will identify the data bounds automatically.
However, this will often not be sufficient and the following work-arounds will be
needed:

Users can manually set the bounds using the following code snippet or similar:
```python
# Set the boundaries of your data for normalization
lower_bounds = np.array([0.]*n_features)
upper_bounds = np.array([1.]*n_features)
model.set_data_bounds(lower_bounds, upper_bounds)
```

Or users can present all batches of data to the model for automatic
boundary identification:
```python
# Find the boundaries of your data for normalization
all_data = [train_X, test_X]
_, _ = model.find_data_bounds(all_data)
```

If only the boundaries of your testing data are unknown, you can call
`model.predict()` with `clip=True` to clip testing data to the bounds seen during
training. Only use this if you understand what you are doing.

<!-- END quick-start -->

<!-- START documentation -->
Expand Down
6 changes: 5 additions & 1 deletion artlib/common/BaseART.py
Original file line number Diff line number Diff line change
Expand Up @@ -797,13 +797,15 @@ def fit_gif(
self.post_fit(X)
return self

def predict(self, X: np.ndarray) -> np.ndarray:
def predict(self, X: np.ndarray, clip: bool = False) -> np.ndarray:
"""Predict labels for the data.
Parameters
----------
X : np.ndarray
The dataset.
clip : bool
clip the input values to be between the previously seen data limits
Returns
-------
Expand All @@ -812,6 +814,8 @@ def predict(self, X: np.ndarray) -> np.ndarray:
"""
check_is_fitted(self)
if clip:
X = np.clip(X, self.d_min_, self.d_max_)
self.validate_data(X)
self.check_dimensions(X)

Expand Down
10 changes: 8 additions & 2 deletions artlib/common/BaseARTMAP.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,13 +138,15 @@ def partial_fit(
"""
raise NotImplementedError

def predict(self, X: np.ndarray) -> np.ndarray:
def predict(self, X: np.ndarray, clip: bool = False) -> np.ndarray:
"""Predict labels for the data.
Parameters
----------
X : np.ndarray
Dataset A.
clip : bool
clip the input values to be between the previously seen data limits
Returns
-------
Expand All @@ -154,13 +156,17 @@ def predict(self, X: np.ndarray) -> np.ndarray:
"""
raise NotImplementedError

def predict_ab(self, X: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
def predict_ab(
self, X: np.ndarray, clip: bool = False
) -> tuple[np.ndarray, np.ndarray]:
"""Predict labels for the data, both A-side and B-side.
Parameters
----------
X : np.ndarray
Dataset A.
clip : bool
clip the input values to be between the previously seen data limits
Returns
-------
Expand Down
8 changes: 7 additions & 1 deletion artlib/fusion/FusionART.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,13 +528,17 @@ def step_pred(self, x, skip_channels: List[int] = []) -> int:
c_ = int(np.argmax(T))
return c_

def predict(self, X: np.ndarray, skip_channels: List[int] = []) -> np.ndarray:
def predict(
self, X: np.ndarray, clip: bool = False, skip_channels: List[int] = []
) -> np.ndarray:
"""Predict labels for the input data.
Parameters
----------
X : np.ndarray
Input dataset.
clip : bool
clip the input values to be between the previously seen data limits
skip_channels : list of int, optional
Channels to skip (default is []).
Expand All @@ -545,6 +549,8 @@ def predict(self, X: np.ndarray, skip_channels: List[int] = []) -> np.ndarray:
"""
check_is_fitted(self)
if clip:
X = np.clip(X, self.d_min_, self.d_max_)
self.validate_data(X)
self.check_dimensions(X)

Expand Down
8 changes: 6 additions & 2 deletions artlib/hierarchical/DeepARTMAP.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,13 +410,17 @@ def partial_fit(
x_i += 1
return self

def predict(self, X: Union[np.ndarray, list[np.ndarray]]) -> list[np.ndarray]:
def predict(
self, X: Union[np.ndarray, list[np.ndarray]], clip: bool = False
) -> list[np.ndarray]:
"""Predict the labels for the input data.
Parameters
----------
X : np.ndarray or list of np.ndarray
The input data set for prediction.
clip : bool
clip the input values to be between the previously seen data limits
Returns
-------
Expand All @@ -428,7 +432,7 @@ def predict(self, X: Union[np.ndarray, list[np.ndarray]]) -> list[np.ndarray]:
x = X[-1]
else:
x = X
pred_a, pred_b = self.layers[-1].predict_ab(x)
pred_a, pred_b = self.layers[-1].predict_ab(x, clip=clip)
pred = [pred_a, pred_b]
for layer in self.layers[:-1][::-1]:
pred.append(layer.map_a2b(pred[-1]))
Expand Down
14 changes: 10 additions & 4 deletions artlib/supervised/ARTMAP.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,13 +257,15 @@ def partial_fit(
)
return self

def predict(self, X: np.ndarray) -> np.ndarray:
def predict(self, X: np.ndarray, clip: bool = False) -> np.ndarray:
"""Predict the labels for the given data.
Parameters
----------
X : np.ndarray
Data set A (independent channel).
clip : bool
clip the input values to be between the previously seen data limits
Returns
-------
Expand All @@ -272,15 +274,19 @@ def predict(self, X: np.ndarray) -> np.ndarray:
"""
check_is_fitted(self)
return super(ARTMAP, self).predict(X)
return super(ARTMAP, self).predict(X, clip)

def predict_ab(self, X: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
def predict_ab(
self, X: np.ndarray, clip: bool = False
) -> tuple[np.ndarray, np.ndarray]:
"""Predict both A-side and B-side labels for the given data.
Parameters
----------
X : np.ndarray
Data set A (independent channel).
clip : bool
clip the input values to be between the previously seen data limits
Returns
-------
Expand All @@ -289,7 +295,7 @@ def predict_ab(self, X: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
"""
check_is_fitted(self)
return super(ARTMAP, self).predict_ab(X)
return super(ARTMAP, self).predict_ab(X, clip)

def predict_regression(self, X: np.ndarray) -> np.ndarray:
"""
Expand Down
18 changes: 16 additions & 2 deletions artlib/supervised/SimpleARTMAP.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,13 +405,15 @@ def step_pred(self, x: np.ndarray) -> tuple[int, int]:
c_b = self.map[c_a]
return c_a, c_b

def predict(self, X: np.ndarray) -> np.ndarray:
def predict(self, X: np.ndarray, clip: bool = False) -> np.ndarray:
"""Predict labels for the data.
Parameters
----------
X : np.ndarray
Data set A.
clip : bool
clip the input values to be between the previously seen data limits
Returns
-------
Expand All @@ -420,19 +422,27 @@ def predict(self, X: np.ndarray) -> np.ndarray:
"""
check_is_fitted(self)
if clip:
X = np.clip(X, self.module_a.d_min_, self.module_a.d_max_)
self.module_a.validate_data(X)
self.module_a.check_dimensions(X)
y_b = np.zeros((X.shape[0],), dtype=int)
for i, x in enumerate(X):
c_a, c_b = self.step_pred(x)
y_b[i] = c_b
return y_b

def predict_ab(self, X: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
def predict_ab(
self, X: np.ndarray, clip: bool = False
) -> tuple[np.ndarray, np.ndarray]:
"""Predict labels for the data, both A-side and B-side.
Parameters
----------
X : np.ndarray
Data set A.
clip : bool
clip the input values to be between the previously seen data limits
Returns
-------
Expand All @@ -441,6 +451,10 @@ def predict_ab(self, X: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
"""
check_is_fitted(self)
if clip:
X = np.clip(X, self.module_a.d_min_, self.module_a.d_max_)
self.module_a.validate_data(X)
self.module_a.check_dimensions(X)
y_a = np.zeros((X.shape[0],), dtype=int)
y_b = np.zeros((X.shape[0],), dtype=int)
for i, x in enumerate(X):
Expand Down

0 comments on commit 58c368a

Please sign in to comment.