Skip to content

Commit

Permalink
Merge pull request #41 from NiklasMelton/test-bayesian-art
Browse files Browse the repository at this point in the history
Test bayesian art
  • Loading branch information
NiklasMelton authored Mar 12, 2024
2 parents 915121e + e57d61c commit dfe4554
Show file tree
Hide file tree
Showing 4 changed files with 164 additions and 58 deletions.
93 changes: 93 additions & 0 deletions common/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
from matplotlib.axes import Axes

def normalize(data: np.ndarray) -> np.ndarray:
normalized = (data-np.min(data))/(np.max(data)-np.min(data))
Expand All @@ -16,3 +17,95 @@ def l2norm2(data: np.ndarray) -> float:

def fuzzy_and(x: np.ndarray, y: np.ndarray) -> np.ndarray:
return np.minimum(x, y)

def plot_gaussian_contours_fading(
ax: Axes,
mean: np.ndarray,
std_dev: np.ndarray,
color: np.ndarray,
max_std: int = 2,
sigma_steps: float = 0.25,
linewidth: int = 1
):
"""
Plots concentric ellipses to represent the contours of a 2D Gaussian distribution, with fading colors.
Parameters:
- ax: Matplotlib axis object. If None, creates a new figure and axis.
- mean: A numpy array representing the mean (μ) of the distribution.
- std_dev: A numpy array representing the standard deviation (σ) of the distribution.
- color: A 4D numpy array including RGB and alpha channels to specify the color and initial opacity.
- max_std: Max standard deviations to draw contours to. Default is 2.
- sigma_steps: Step size in standard deviations for each contour. Default is 0.25.
"""
from matplotlib.patches import Ellipse

# Calculate the number of steps
steps = int(max_std / sigma_steps)
alphas = np.linspace(1, 0.1, steps)

if len(color) != 4:
color = np.concatenate([color, [1.]])

for i, alpha in zip(range(1, steps + 1), alphas):
# Adjust the alpha value of the color
current_color = np.copy(color)
current_color[-1] = alpha # Update the alpha channel

# Width and height of the ellipse are 2*i*sigma_steps times the std_dev values
width, height = 2 * i * sigma_steps * std_dev[0], 2 * i * sigma_steps * std_dev[1]
ellipse = Ellipse(xy=(mean[0], mean[1]), width=width, height=height, edgecolor=current_color, facecolor='none', linewidth=linewidth,
linestyle='dashed', label=f'{i * sigma_steps}σ')
ax.add_patch(ellipse)


def plot_gaussian_contours_covariance(
ax: Axes,
mean: np.ndarray,
covariance: np.ndarray,
color: np.ndarray,
max_std: int = 2,
sigma_steps: float = 0.25,
linewidth: int = 1
):
"""
Plots concentric ellipses to represent the contours of a 2D Gaussian distribution, with fading colors.
Accepts a covariance matrix to properly represent the distribution's orientation and shape.
Parameters:
- ax: Matplotlib axis object. If None, creates a new figure and axis.
- mean: A numpy array representing the mean (μ) of the distribution.
- covariance: A 2x2 numpy array representing the covariance matrix of the distribution.
- color: A 4D numpy array including RGB and alpha channels to specify the color and initial opacity.
- max_std: Max standard deviations to draw contours to. Default is 2.
- sigma_steps: Step size in standard deviations for each contour. Default is 0.25.
"""
from matplotlib.patches import Ellipse

# Calculate the eigenvalues and eigenvectors of the covariance matrix
eigenvalues, eigenvectors = np.linalg.eig(covariance)
major_axis = np.sqrt(eigenvalues[0]) # The major axis length (sqrt of larger eigenvalue)
minor_axis = np.sqrt(eigenvalues[1]) # The minor axis length (sqrt of smaller eigenvalue)
angle = np.arctan2(
*eigenvectors[:, 0][::-1]) # Angle in radians between the x-axis and the major axis of the ellipse

# Calculate the number of steps
steps = int(max_std / sigma_steps)
alphas = np.linspace(1, 0.1, steps)

for i, alpha in zip(range(1, steps + 1), alphas):
# Adjust the alpha value of the color
current_color = np.copy(color)
current_color[-1] = alpha # Update the alpha channel

# Width and height of the ellipse based on the covariance
width, height = 2 * i * sigma_steps * major_axis * 2, 2 * i * sigma_steps * minor_axis * 2
ellipse = Ellipse(xy=(mean[0], mean[1]), width=width, height=height, angle=float(np.degrees(angle)),
edgecolor=current_color, facecolor='None', linewidth=linewidth,
linestyle='dashed', label=f'{i * sigma_steps}σ')
ax.add_patch(ellipse)


47 changes: 31 additions & 16 deletions elementary/BayesianART.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
IEEE Transactions on Neural Networks, 18, 1628–1644. doi:10.1109/TNN.2007.900234.
"""
import numpy as np
from typing import Optional
from typing import Optional, Iterable
from matplotlib.axes import Axes
from common.BaseART import BaseART
from common.utils import normalize
from common.utils import plot_gaussian_contours_covariance

def prepare_data(data: np.ndarray) -> np.ndarray:
normalized = normalize(data)
Expand All @@ -24,7 +26,7 @@ def validate_params(params: dict):
assert params["rho"] > 0

def check_dimensions(self, X: np.ndarray):
if not self.dim_:
if not hasattr(self, "dim_"):
self.dim_ = X.shape[1]
assert self.params["cov_init"].shape[0] == self.dim_
assert self.params["cov_init"].shape[1] == self.dim_
Expand All @@ -33,43 +35,56 @@ def check_dimensions(self, X: np.ndarray):

def category_choice(self, i: np.ndarray, w: np.ndarray, params: dict) -> tuple[float, Optional[dict]]:
mean = w[:self.dim_]
cov = w[self.dim_:self.dim_*self.dim_].reshape((self.dim_, self.dim_))
cov = w[self.dim_:-1].reshape((self.dim_, self.dim_))
n = w[-1]
dist = mean - i
exp_dist_cov_dist = np.exp(-0.5 * np.matmul(dist.T, np.matmul((1 / cov), dist)))
exp_dist_cov_dist = np.exp(-0.5 * np.matmul(dist.T, np.matmul(np.linalg.inv(cov), dist)))
det_cov = np.linalg.det(cov)
cache = {
"exp_dist_cov_dist": exp_dist_cov_dist,
"cov": cov
"cov": cov,
"det_cov": det_cov
}
p_i_cj = exp_dist_cov_dist / np.sqrt((self.pi2 ** self.dim_) * np.linalg.det(cov))
p_i_cj = exp_dist_cov_dist / np.sqrt((self.pi2 ** self.dim_) * det_cov)
p_cj = n / np.sum(w_[-1] for w_ in self.W)

return p_i_cj * p_cj, cache
activation = p_i_cj * p_cj

return activation, cache

def match_criterion(self, i: np.ndarray, w: np.ndarray, params: dict, cache: Optional[dict] = None) -> float:
if cache is None:
raise ValueError("No cache provided")
return cache["cov"]
# return cache["det_cov"]
return np.prod(np.diag(cache["cov"]))

def match_criterion_bin(self, i: np.ndarray, w: np.ndarray, params: dict, cache: Optional[dict] = None) -> bool:
return self.match_criterion(i, w, params=params, cache=cache) >= params["rho"]
return self.match_criterion(i, w, params=params, cache=cache) <= params["rho"]

def update(self, i: np.ndarray, w: np.ndarray, params, cache: Optional[dict] = None) -> np.ndarray:
if cache is None:
raise ValueError("No cache provided")

mean = w[:self.dim_]
cov = cache["cov"]
cov = w[self.dim_:-1].reshape((self.dim_, self.dim_))
n = w[-1]

n_new = n+1
mean_new = (1-(1/n_new))*mean + (1/n_new)*i
cov_new = (n/n_new)*cov + (1/n_new)*np.multiply(
((i-mean_new).reshape((-1, 1))*(i-mean_new).reshape((1, -1))).T,
np.identity(self.dim_)
)

i_mean_dist = i-mean_new
i_mean_dist_2 = i_mean_dist.reshape((-1, 1))*i_mean_dist.reshape((1, -1))

cov_new = (n / n_new) * cov + (1 / n_new) * i_mean_dist_2

return np.concatenate([mean_new, cov_new.flatten(), [n_new]])

def new_weight(self, i: np.ndarray, params: dict) -> np.ndarray:
return np.concatenate[i, params["cov_init"].flatten(), [1]]
return np.concatenate([i, params["cov_init"].flatten(), [1]])


def plot_cluster_bounds(self, ax: Axes, colors: Iterable, linewidth: int = 1):
for w, col in zip(self.W, colors):
mean = w[:self.dim_]
cov = w[self.dim_:-1].reshape((self.dim_, self.dim_))
# sigma = np.sqrt(np.diag(cov))
plot_gaussian_contours_covariance(ax, mean, cov, col, linewidth=linewidth)
43 changes: 1 addition & 42 deletions elementary/GaussianART.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,48 +8,7 @@
from typing import Optional, Iterable
from matplotlib.axes import Axes
from common.BaseART import BaseART


def plot_gaussian_contours_fading(
ax: Axes,
mean: np.ndarray,
std_dev: np.ndarray,
color: np.ndarray,
max_std: int = 2,
sigma_steps: float = 0.25,
linewidth: int = 1
):
"""
Plots concentric ellipses to represent the contours of a 2D Gaussian distribution, with fading colors.
Parameters:
- ax: Matplotlib axis object. If None, creates a new figure and axis.
- mean: A numpy array representing the mean (μ) of the distribution.
- std_dev: A numpy array representing the standard deviation (σ) of the distribution.
- color: A 4D numpy array including RGB and alpha channels to specify the color and initial opacity.
- max_std: Max standard deviations to draw contours to. Default is 2.
- sigma_steps: Step size in standard deviations for each contour. Default is 0.25.
"""
from matplotlib.patches import Ellipse

# Calculate the number of steps
steps = int(max_std / sigma_steps)
alphas = np.linspace(1, 0.1, steps)

if len(color) != 4:
color = np.concatenate([color, [1.]])

for i, alpha in zip(range(1, steps + 1), alphas):
# Adjust the alpha value of the color
current_color = np.copy(color)
current_color[-1] = alpha # Update the alpha channel

# Width and height of the ellipse are 2*i*sigma_steps times the std_dev values
width, height = 2 * i * sigma_steps * std_dev[0], 2 * i * sigma_steps * std_dev[1]
ellipse = Ellipse(xy=(mean[0], mean[1]), width=width, height=height, edgecolor=current_color, facecolor='none', linewidth=linewidth,
linestyle='dashed', label=f'{i * sigma_steps}σ')
ax.add_patch(ellipse)
from common.utils import plot_gaussian_contours_fading


class GaussianART(BaseART):
Expand Down
39 changes: 39 additions & 0 deletions examples/test_bayesian_art.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
import path
import sys

# directory reach
directory = path.Path(__file__).abspath()

print(directory.parent)
# setting path
sys.path.append(directory.parent.parent)

from elementary.BayesianART import BayesianART
from common.utils import normalize
import numpy as np


def cluster_blobs():
data, target = make_blobs(n_samples=150, centers=3, cluster_std=0.50, random_state=0, shuffle=False)
print("Data has shape:", data.shape)

X = normalize(data)
print("Prepared data has shape:", X.shape)

params = {
"rho": 0.00002,
"cov_init": np.array([[0.0001, 0.0], [0.0, 0.0001]]),
}
cls = BayesianART(params)
y = cls.fit_predict(X)

print(f"{cls.n_clusters} clusters found")

cls.visualize(X, y)
plt.show()


if __name__ == "__main__":
cluster_blobs()

0 comments on commit dfe4554

Please sign in to comment.