Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test bayesian art #41

Merged
merged 3 commits into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 93 additions & 0 deletions common/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
from matplotlib.axes import Axes

def normalize(data: np.ndarray) -> np.ndarray:
normalized = (data-np.min(data))/(np.max(data)-np.min(data))
Expand All @@ -16,3 +17,95 @@ def l2norm2(data: np.ndarray) -> float:

def fuzzy_and(x: np.ndarray, y: np.ndarray) -> np.ndarray:
return np.minimum(x, y)

def plot_gaussian_contours_fading(
ax: Axes,
mean: np.ndarray,
std_dev: np.ndarray,
color: np.ndarray,
max_std: int = 2,
sigma_steps: float = 0.25,
linewidth: int = 1
):
"""
Plots concentric ellipses to represent the contours of a 2D Gaussian distribution, with fading colors.

Parameters:
- ax: Matplotlib axis object. If None, creates a new figure and axis.
- mean: A numpy array representing the mean (μ) of the distribution.
- std_dev: A numpy array representing the standard deviation (σ) of the distribution.
- color: A 4D numpy array including RGB and alpha channels to specify the color and initial opacity.
- max_std: Max standard deviations to draw contours to. Default is 2.
- sigma_steps: Step size in standard deviations for each contour. Default is 0.25.

"""
from matplotlib.patches import Ellipse

# Calculate the number of steps
steps = int(max_std / sigma_steps)
alphas = np.linspace(1, 0.1, steps)

if len(color) != 4:
color = np.concatenate([color, [1.]])

for i, alpha in zip(range(1, steps + 1), alphas):
# Adjust the alpha value of the color
current_color = np.copy(color)
current_color[-1] = alpha # Update the alpha channel

# Width and height of the ellipse are 2*i*sigma_steps times the std_dev values
width, height = 2 * i * sigma_steps * std_dev[0], 2 * i * sigma_steps * std_dev[1]
ellipse = Ellipse(xy=(mean[0], mean[1]), width=width, height=height, edgecolor=current_color, facecolor='none', linewidth=linewidth,
linestyle='dashed', label=f'{i * sigma_steps}σ')
ax.add_patch(ellipse)


def plot_gaussian_contours_covariance(
ax: Axes,
mean: np.ndarray,
covariance: np.ndarray,
color: np.ndarray,
max_std: int = 2,
sigma_steps: float = 0.25,
linewidth: int = 1
):
"""
Plots concentric ellipses to represent the contours of a 2D Gaussian distribution, with fading colors.
Accepts a covariance matrix to properly represent the distribution's orientation and shape.


Parameters:
- ax: Matplotlib axis object. If None, creates a new figure and axis.
- mean: A numpy array representing the mean (μ) of the distribution.
- covariance: A 2x2 numpy array representing the covariance matrix of the distribution.
- color: A 4D numpy array including RGB and alpha channels to specify the color and initial opacity.
- max_std: Max standard deviations to draw contours to. Default is 2.
- sigma_steps: Step size in standard deviations for each contour. Default is 0.25.

"""
from matplotlib.patches import Ellipse

# Calculate the eigenvalues and eigenvectors of the covariance matrix
eigenvalues, eigenvectors = np.linalg.eig(covariance)
major_axis = np.sqrt(eigenvalues[0]) # The major axis length (sqrt of larger eigenvalue)
minor_axis = np.sqrt(eigenvalues[1]) # The minor axis length (sqrt of smaller eigenvalue)
angle = np.arctan2(
*eigenvectors[:, 0][::-1]) # Angle in radians between the x-axis and the major axis of the ellipse

# Calculate the number of steps
steps = int(max_std / sigma_steps)
alphas = np.linspace(1, 0.1, steps)

for i, alpha in zip(range(1, steps + 1), alphas):
# Adjust the alpha value of the color
current_color = np.copy(color)
current_color[-1] = alpha # Update the alpha channel

# Width and height of the ellipse based on the covariance
width, height = 2 * i * sigma_steps * major_axis * 2, 2 * i * sigma_steps * minor_axis * 2
ellipse = Ellipse(xy=(mean[0], mean[1]), width=width, height=height, angle=float(np.degrees(angle)),
edgecolor=current_color, facecolor='None', linewidth=linewidth,
linestyle='dashed', label=f'{i * sigma_steps}σ')
ax.add_patch(ellipse)


47 changes: 31 additions & 16 deletions elementary/BayesianART.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
IEEE Transactions on Neural Networks, 18, 1628–1644. doi:10.1109/TNN.2007.900234.
"""
import numpy as np
from typing import Optional
from typing import Optional, Iterable
from matplotlib.axes import Axes
from common.BaseART import BaseART
from common.utils import normalize
from common.utils import plot_gaussian_contours_covariance

def prepare_data(data: np.ndarray) -> np.ndarray:
normalized = normalize(data)
Expand All @@ -24,7 +26,7 @@ def validate_params(params: dict):
assert params["rho"] > 0

def check_dimensions(self, X: np.ndarray):
if not self.dim_:
if not hasattr(self, "dim_"):
self.dim_ = X.shape[1]
assert self.params["cov_init"].shape[0] == self.dim_
assert self.params["cov_init"].shape[1] == self.dim_
Expand All @@ -33,43 +35,56 @@ def check_dimensions(self, X: np.ndarray):

def category_choice(self, i: np.ndarray, w: np.ndarray, params: dict) -> tuple[float, Optional[dict]]:
mean = w[:self.dim_]
cov = w[self.dim_:self.dim_*self.dim_].reshape((self.dim_, self.dim_))
cov = w[self.dim_:-1].reshape((self.dim_, self.dim_))
n = w[-1]
dist = mean - i
exp_dist_cov_dist = np.exp(-0.5 * np.matmul(dist.T, np.matmul((1 / cov), dist)))
exp_dist_cov_dist = np.exp(-0.5 * np.matmul(dist.T, np.matmul(np.linalg.inv(cov), dist)))
det_cov = np.linalg.det(cov)
cache = {
"exp_dist_cov_dist": exp_dist_cov_dist,
"cov": cov
"cov": cov,
"det_cov": det_cov
}
p_i_cj = exp_dist_cov_dist / np.sqrt((self.pi2 ** self.dim_) * np.linalg.det(cov))
p_i_cj = exp_dist_cov_dist / np.sqrt((self.pi2 ** self.dim_) * det_cov)
p_cj = n / np.sum(w_[-1] for w_ in self.W)

return p_i_cj * p_cj, cache
activation = p_i_cj * p_cj

return activation, cache

def match_criterion(self, i: np.ndarray, w: np.ndarray, params: dict, cache: Optional[dict] = None) -> float:
if cache is None:
raise ValueError("No cache provided")
return cache["cov"]
# return cache["det_cov"]
return np.prod(np.diag(cache["cov"]))

def match_criterion_bin(self, i: np.ndarray, w: np.ndarray, params: dict, cache: Optional[dict] = None) -> bool:
return self.match_criterion(i, w, params=params, cache=cache) >= params["rho"]
return self.match_criterion(i, w, params=params, cache=cache) <= params["rho"]

def update(self, i: np.ndarray, w: np.ndarray, params, cache: Optional[dict] = None) -> np.ndarray:
if cache is None:
raise ValueError("No cache provided")

mean = w[:self.dim_]
cov = cache["cov"]
cov = w[self.dim_:-1].reshape((self.dim_, self.dim_))
n = w[-1]

n_new = n+1
mean_new = (1-(1/n_new))*mean + (1/n_new)*i
cov_new = (n/n_new)*cov + (1/n_new)*np.multiply(
((i-mean_new).reshape((-1, 1))*(i-mean_new).reshape((1, -1))).T,
np.identity(self.dim_)
)

i_mean_dist = i-mean_new
i_mean_dist_2 = i_mean_dist.reshape((-1, 1))*i_mean_dist.reshape((1, -1))

cov_new = (n / n_new) * cov + (1 / n_new) * i_mean_dist_2

return np.concatenate([mean_new, cov_new.flatten(), [n_new]])

def new_weight(self, i: np.ndarray, params: dict) -> np.ndarray:
return np.concatenate[i, params["cov_init"].flatten(), [1]]
return np.concatenate([i, params["cov_init"].flatten(), [1]])


def plot_cluster_bounds(self, ax: Axes, colors: Iterable, linewidth: int = 1):
for w, col in zip(self.W, colors):
mean = w[:self.dim_]
cov = w[self.dim_:-1].reshape((self.dim_, self.dim_))
# sigma = np.sqrt(np.diag(cov))
plot_gaussian_contours_covariance(ax, mean, cov, col, linewidth=linewidth)
43 changes: 1 addition & 42 deletions elementary/GaussianART.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,48 +8,7 @@
from typing import Optional, Iterable
from matplotlib.axes import Axes
from common.BaseART import BaseART


def plot_gaussian_contours_fading(
ax: Axes,
mean: np.ndarray,
std_dev: np.ndarray,
color: np.ndarray,
max_std: int = 2,
sigma_steps: float = 0.25,
linewidth: int = 1
):
"""
Plots concentric ellipses to represent the contours of a 2D Gaussian distribution, with fading colors.

Parameters:
- ax: Matplotlib axis object. If None, creates a new figure and axis.
- mean: A numpy array representing the mean (μ) of the distribution.
- std_dev: A numpy array representing the standard deviation (σ) of the distribution.
- color: A 4D numpy array including RGB and alpha channels to specify the color and initial opacity.
- max_std: Max standard deviations to draw contours to. Default is 2.
- sigma_steps: Step size in standard deviations for each contour. Default is 0.25.

"""
from matplotlib.patches import Ellipse

# Calculate the number of steps
steps = int(max_std / sigma_steps)
alphas = np.linspace(1, 0.1, steps)

if len(color) != 4:
color = np.concatenate([color, [1.]])

for i, alpha in zip(range(1, steps + 1), alphas):
# Adjust the alpha value of the color
current_color = np.copy(color)
current_color[-1] = alpha # Update the alpha channel

# Width and height of the ellipse are 2*i*sigma_steps times the std_dev values
width, height = 2 * i * sigma_steps * std_dev[0], 2 * i * sigma_steps * std_dev[1]
ellipse = Ellipse(xy=(mean[0], mean[1]), width=width, height=height, edgecolor=current_color, facecolor='none', linewidth=linewidth,
linestyle='dashed', label=f'{i * sigma_steps}σ')
ax.add_patch(ellipse)
from common.utils import plot_gaussian_contours_fading


class GaussianART(BaseART):
Expand Down
39 changes: 39 additions & 0 deletions examples/test_bayesian_art.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
import path
import sys

# directory reach
directory = path.Path(__file__).abspath()

print(directory.parent)
# setting path
sys.path.append(directory.parent.parent)

from elementary.BayesianART import BayesianART
from common.utils import normalize
import numpy as np


def cluster_blobs():
data, target = make_blobs(n_samples=150, centers=3, cluster_std=0.50, random_state=0, shuffle=False)
print("Data has shape:", data.shape)

X = normalize(data)
print("Prepared data has shape:", X.shape)

params = {
"rho": 0.00002,
"cov_init": np.array([[0.0001, 0.0], [0.0, 0.0001]]),
}
cls = BayesianART(params)
y = cls.fit_predict(X)

print(f"{cls.n_clusters} clusters found")

cls.visualize(X, y)
plt.show()


if __name__ == "__main__":
cluster_blobs()