Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Visuals #60

Merged
merged 6 commits into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:
run: |
python -m pip install poetry
poetry lock --no-update
poetry install -E dev -E rnn
poetry install -E dev -E rnn -E visuals
- name: Verify that we can build the package
run: poetry build
- name: Test with pytest
Expand Down
24 changes: 23 additions & 1 deletion grainlearning/bayesian_calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
from numpy import argmax
from grainlearning.dynamic_systems import DynamicSystem, IODynamicSystem
from grainlearning.iterative_bayesian_filter import IterativeBayesianFilter
from grainlearning.tools import plot_param_stats, plot_posterior, plot_param_data, plot_obs_and_sim
from grainlearning.tools import plot_param_stats, plot_posterior, plot_param_data, plot_obs_and_sim, plot_pdf, \
close_plots


class BayesianCalibration:
Expand Down Expand Up @@ -180,6 +181,18 @@ def load_and_process(self, sigma: float = 0.1):
self.calibration.inference.data_assimilation_loop(sigma, self.system)
self.system.compute_estimated_params(self.calibration.inference.posteriors)

def load_all(self):
"""Simply load all previous iterations of Bayesian calibration
"""
for _ in range(self.num_iter - 1):
print(f"Bayesian calibration iter No. {self.curr_iter}")
self.load_system()
self.calibration.add_curr_param_data_to_list(self.system.param_data)
self.calibration.run_inference(self.system)
self.calibration.sigma_list.append(self.system.sigma_max)
self.plot_uq_in_time()
self.increase_curr_iter()

def resample(self):
"""Learn and resample from a proposal distribution
todo this should be refactored
Expand Down Expand Up @@ -239,6 +252,15 @@ def plot_uq_in_time(self):
self.save_fig
)

plot_pdf(
fig_name,
self.system.param_names,
self.calibration.param_data_list,
self.save_fig,
)

close_plots(self.save_fig)

def get_most_prob_params(self):
"""Return the most probable set of parameters

Expand Down
22 changes: 13 additions & 9 deletions grainlearning/dynamic_systems.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,15 +651,19 @@ def load_param_data(self):
# if param_data_file does not exit, get parameter data from text files
files = glob(self.sim_data_dir + f'/iter{self.curr_iter}/{self.sim_name}*_param*{self.sim_data_file_ext}')
self.num_samples = len(files)
self.sim_data_files = sorted(files)
self.param_data = np.zeros([self.num_samples, self.num_params])
for i, sim_data_file in enumerate(self.sim_data_files):
if self.sim_data_file_ext == '.npy':
data = np.load(sim_data_file, allow_pickle=True).item()
else:
data = get_keys_and_data(sim_data_file)
params = [data[key][0] for key in self.param_names]
self.param_data[i, :] = params
# if the number of files found is non-zero
if self.num_samples != 0:
self.sim_data_files = sorted(files)
self.param_data = np.zeros([self.num_samples, self.num_params])
for i, sim_data_file in enumerate(self.sim_data_files):
if self.sim_data_file_ext == '.npy':
data = np.load(sim_data_file, allow_pickle=True).item()
else:
data = get_keys_and_data(sim_data_file)
params = [data[key][0] for key in self.param_names]
self.param_data[i, :] = params
else:
raise RuntimeError(f'No data found for iteration {self.curr_iter}')

def run(self):
"""
Expand Down
4 changes: 4 additions & 0 deletions grainlearning/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,10 @@ def data_assimilation_loop(self, sigma: float, system: Type["DynamicSystem"], pr
self.compute_effective_sample_size()
return (self.ess[-1] - self.ess_target) ** 2

def set_posteriors(self, posteriors: np.ndarray = None):
"""Set the posterior distribution"""
self.posteriors = posteriors

def compute_effective_sample_size(self):
"""Compute the effective sample size"""
# compute the effective sample size for every time step
Expand Down
76 changes: 57 additions & 19 deletions grainlearning/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,28 +409,22 @@ def plot_param_stats(fig_name, param_names, means, covs, save_fig=0):
plt.subplot(2, n_cols, i + 1)
plt.plot(means[:, i])
plt.xlabel("'Time' step")
plt.ylabel(r'$|' + param_names[i] + r'|$')
plt.ylabel(f'Mean of {param_names[i]}')
plt.grid(True)
plt.tight_layout()
if save_fig:
plt.savefig(f'{fig_name}_param_means.png')
else:
plt.show()
plt.close()

plt.figure('Posterior coefficients of variance of the parameters')
for i in range(num):
plt.subplot(2, n_cols, i + 1)
plt.plot(covs[:, i])
plt.xlabel("'Time' step")
plt.ylabel(r'$COV(' + param_names[i] + ')$')
plt.ylabel(f'Coefficient of variation of {param_names[i]}')
plt.grid(True)
plt.tight_layout()
if save_fig:
plt.savefig(f'{fig_name}_param_covs.png')
else:
plt.show()
plt.close()


def plot_posterior(fig_name, param_names, param_data, posterior, save_fig=0):
Expand All @@ -445,27 +439,27 @@ def plot_posterior(fig_name, param_names, param_data, posterior, save_fig=0):
try:
import matplotlib.pylab as plt
except ImportError:
print('matplotlib is not installed, cannot plot posterior distribution. Please install with grainlearning[plot]')
print(
'matplotlib is not installed, cannot plot posterior distribution. Please install with grainlearning[plot]')
num_steps = posterior.shape[0]
for i, name in enumerate(param_names):
plt.figure(f'Posterior distribution of {name}')
for j in range(6):
plt.subplot(2, 3, j + 1)
plt.plot(param_data[:, i], posterior[int(num_steps * (j + 1) / 6 - 1), :], 'o')
plt.title(f"'Time' step No.{int(num_steps * (j + 1) / 6 - 1):3d} ")
plt.title(f"'Time' step {int(num_steps * (j + 1) / 6 - 1):3d} ")
plt.xlabel(r'$' + name + '$')
plt.ylabel('Posterior distribution')
plt.ylabel('Posterior probability mass')
plt.grid(True)
plt.tight_layout()
if save_fig:
plt.savefig(f'{fig_name}_posterior_{name}.png')
else:
plt.show()
plt.close()


def plot_param_data(fig_name, param_names, param_data_list, save_fig=0):
import matplotlib.pylab as plt
import seaborn as sns
flare_cmap = sns.color_palette("flare", as_cmap=True)
num = len(param_names)
n_cols = int(np.ceil(num / 2))
num = num - 1
Expand All @@ -474,17 +468,15 @@ def plot_param_data(fig_name, param_names, param_data_list, save_fig=0):
for j in range(num):
plt.subplot(2, n_cols, j + 1)
for i in range(num_iter):
plt.plot(param_data_list[i][:, j], param_data_list[i][:, j + 1], 'o', label=f'iterNo. {i:d}')
plt.scatter(param_data_list[i][:, j], param_data_list[i][:, j + 1], edgecolors='white',
color=flare_cmap(i / (num_iter - 1)), label=f'Iter No. {i:d}')
plt.xlabel(r'$' + param_names[j] + '$')
plt.ylabel(r'$' + param_names[j + 1] + '$')
plt.legend()
plt.legend()
plt.tight_layout()
if save_fig:
plt.savefig(f'{fig_name}_param_space.png')
else:
plt.show()
plt.close()


def plot_obs_and_sim(fig_name, ctrl_name, obs_names, ctrl_data, obs_data, sim_data, posteriors, save_fig=0):
Expand Down Expand Up @@ -534,7 +526,53 @@ def plot_obs_and_sim(fig_name, ctrl_name, obs_names, ctrl_data, obs_data, sim_da
plt.tight_layout()
if save_fig:
plt.savefig(f'{fig_name}_obs_and_sim.png')
else:


def plot_pdf(fig_name, param_names, samples, save_fig=0):
"""
Plot the posterior density function of the parameter distribution
:param fig_name: string
:param sim_name: string
:param param_names: list of strings containing parameter names
:param samples: list of ndarray of shape (num_samples, self.num_params)
:param save_fig: bool defaults to False
"""
import seaborn as sns
import pandas as pd
import matplotlib.pylab as plt

# create a deep copy of the samples (FIXME: this is a workaround for a bug in iBF constructor)
new_samples = [np.copy(sample) for sample in samples]
# pad with NaNs to make all samples have the same length
max_len = int(max([len(sample) for sample in new_samples]))
for i in range(len(new_samples)):
new_samples[i] = np.pad(new_samples[i], ((0, max_len - len(new_samples[i])), (0, 0)), 'constant',
constant_values=np.nan)
# fill the third dimension with iteration number
stacked_samples = np.vstack([new_samples[i] for i in range(len(new_samples))])
third_dim_indices = np.repeat(np.arange(len(new_samples)), new_samples[0].shape[0])
new_samples = np.hstack([stacked_samples, third_dim_indices[:, np.newaxis]])

# convert the array into a pandas dataframe
new_names = param_names + ['Iter']
df = pd.DataFrame(new_samples, columns=new_names)

# plot the parameter distribution in a scatter plot
fig = sns.PairGrid(df, diag_sharey=False, hue="Iter", palette='flare')
fig.map_upper(sns.scatterplot, s=15)
fig.map_lower(sns.kdeplot, thresh=1e-3, levels=5)
fig.map_diag(sns.kdeplot, lw=2)
fig.add_legend(loc='upper right')
fig.fig.canvas.manager.set_window_title('Estimated posterior probability density function')

fig.tight_layout()
if save_fig:
fig.savefig(f'{fig_name}_scatterplot.png')


def close_plots(save_fig=0):
import matplotlib.pylab as plt
if not save_fig:
plt.show()
plt.close()

Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,14 @@ h5py = {version ="^3.7.0", optional = true}
wandb = {version ="^0.13.4", optional = true}
tensorflow = {version ="2.10.0", optional = true}
ipykernel = {version = "*", optional = true}
seaborn = {version = '^0.12.2', optional = true}

[tool.poetry.extras]
docs = ["Sphinx", "sphinx-autodoc-typehints", "sphinx-mdinclude", "sphinx-rtd-theme"]
dev = ["pytest", "pytest-cov", "prospector", "pyroma", "h5py"]
rnn = ["wandb", "tensorflow"]
tutorials = ["ipykernel"]
visuals = ["seaborn"]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think we should add pandas to visuals? It is used in ```plot_pdf`` function.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think so, although pandas got installed automatically when installing seaborn.


[build-system]
requires = ["poetry-core>=1.0.0"]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""
This tutorial shows how to run one iteration of Bayesian calibration for a linear regression model.
"""
import os
from grainlearning import BayesianCalibration
from grainlearning.dynamic_systems import IODynamicSystem, DynamicSystem

PATH = os.path.abspath(os.path.dirname(__file__))

calibration = BayesianCalibration.from_dict(
{
"num_iter": 10,
"system": {
"system_type": IODynamicSystem,
"param_min": [0.001, 0.001],
"param_max": [1, 10],
"param_names": ['a', 'b'],
"num_samples": 20,
"obs_data_file": PATH + '/linear_obs.dat',
"obs_names": ['f'],
"ctrl_name": 'u',
"sim_name": 'linear',
"sim_data_dir": PATH + '/sim_data/',
"sim_data_file_ext": '.txt',
"sigma_tol": 0.01,
},
"calibration": {
"inference": {"ess_target": 0.3},
"sampling": {
"max_num_components": 2,
"n_init": 1,
"random_state": 0,
"covariance_type": "full",
}
},
"save_fig": 0,
}
)

# run GrainLearning for one iteration and generate the resampled parameter values
calibration.load_all()
Loading