Skip to content

Commit

Permalink
adding DER module and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
beckynevin committed Apr 11, 2024
1 parent 1aab45d commit d03dbd1
Show file tree
Hide file tree
Showing 3 changed files with 294 additions and 6 deletions.
240 changes: 240 additions & 0 deletions src/scripts/DeepEvidentialRegression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
import argparse
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
from scripts import train, models, io


def parse_args():
parser = argparse.ArgumentParser(description="data handling module")
parser.add_argument(
"--size_df",
type=float,
required=False,
default=1000,
help="Used to load the associated .h5 data file",
)
parser.add_argument(
"noise_level",
type=str,
default="low",
help="low, medium, high or vhigh, \
used to look up associated sigma value",
)
parser.add_argument(
"--normalize",
required=False,
action="store_true",
help="If true theres an option to normalize the dataset",
)
parser.add_argument(
"--val_proportion",
type=float,
required=False,
default=0.1,
help="Proportion of the dataset to use as validation",
)
parser.add_argument(
"--randomseed",
type=int,
required=False,
default=42,
help="Random seed used for shuffling the training and validation set",
)
parser.add_argument(
"--generatedata",
action="store_true",
default=False,
help="option to generate df, if not specified \
default behavior is to load from file",
)
parser.add_argument(
"--batchsize",
type=int,
required=False,
default=100,
help="Size of batched used in the traindataloader",
)
# now args for model
parser.add_argument(
"--init_lr",
type=float,
required=False,
default=0.001,
help="Learning rate",
)
parser.add_argument(
"--coeff",
type=float,
required=False,
default=0.5,
help="Coeff, see DER lit",
)
parser.add_argument(
"--loss_type",
type=str,
required=False,
default="SDER",
help="Loss types. \
For MVE, options are no_var_loss, var_loss, \
and bnn_loss. \
For DER, options are DER or SDER",
)
parser.add_argument(
"wd",
type=str,
help="Top level of directory, required arg",
)
parser.add_argument(
"--model_type",
type=str,
required=False,
default="DER",
help="Beginning of name for saved checkpoints and figures",
)
parser.add_argument(
"--n_epochs",
type=int,
required=False,
default=100,
help="number of epochs for each MVE",
)
parser.add_argument(
"--path_to_models",
type=str,
required=False,
default="models/",
help="path to where the checkpoints are saved",
)
parser.add_argument(
"--save_all_checkpoints",
action="store_true",
default=False,
help="option to save all checkpoints",
)
parser.add_argument(
"--save_final_checkpoint",
action="store_true", # Set to True if argument is present
default=False, # Set default value to False if argument is not present
help="option to save the final epoch checkpoint for each ensemble",
)
parser.add_argument(
"--overwrite_final_checkpoint",
action="store_true",
default=False,
help="option to overwite already saved checkpoints",
)
parser.add_argument(
"--plot",
action="store_true",
default=False,
help="option to plot in notebook",
)
parser.add_argument(
"--savefig",
action="store_true",
default=False,
help="option to save a figure of the true and predicted values",
)
parser.add_argument(
"--verbose",
action="store_true",
default=False,
help="verbose option for train",
)
return parser.parse_args()


if __name__ == "__main__":
namespace = parse_args()
size_df = namespace.size_df
noise = namespace.noise_level
norm = namespace.normalize
val_prop = namespace.val_proportion
rs = namespace.randomseed
BATCH_SIZE = namespace.batchsize
sigma = io.DataPreparation.get_sigma(noise)
if namespace.generatedata:
# generate the df
data = io.DataPreparation()
data.sample_params_from_prior(size_df)
data.simulate_data(data.params, sigma, "linear_homogeneous")
df_array = data.get_dict()
# Convert non-tensor entries to tensors
df = {}
for key, value in df_array.items():

if isinstance(value, TensorDataset):
# Keep tensors as they are
df[key] = value
else:
# Convert lists to tensors
df[key] = torch.tensor(value)
else:
loader = io.DataLoader()
df = loader.load_data_h5(
"linear_sigma_" + str(sigma) + "_size_" + str(size_df),
path="/Users/rnevin/Documents/DeepUQ/data/",
)
print('df', df)
len_df = len(df["params"][:, 0].numpy())
len_x = len(df["inputs"].numpy())
ms_array = np.repeat(df["params"][:, 0].numpy(), len_x)
bs_array = np.repeat(df["params"][:, 1].numpy(), len_x)
xs_array = np.tile(df["inputs"].numpy(), len_df)
ys_array = np.reshape(df["output"].numpy(), (len_df * len_x))
inputs = np.array([xs_array, ms_array, bs_array]).T
model_inputs, model_outputs = io.DataPreparation.normalize(inputs,
ys_array,
norm)
x_train, x_val, y_train, y_val = io.DataPreparation.train_val_split(
model_inputs, model_outputs, val_proportion=val_prop, random_state=rs
)
trainData = TensorDataset(torch.Tensor(x_train), torch.Tensor(y_train))
trainDataLoader = DataLoader(trainData,
batch_size=BATCH_SIZE,
shuffle=True)
print("[INFO] initializing the gal model...")
# set the device we will be using to train the model
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_name = namespace.model_type + "_noise_" + noise
model, lossFn = models.model_setup_DER(namespace.loss_type, DEVICE)
model_ensemble = train.train_DER(
trainDataLoader,
x_val,
y_val,
namespace.init_lr,
DEVICE,
namespace.coeff,
namespace.loss_type,
namespace.wd,
model_name,
EPOCHS=namespace.n_epochs,
path_to_model=namespace.path_to_models,
save_all_checkpoints=namespace.save_all_checkpoints,
save_final_checkpoint=namespace.save_final_checkpoint,
overwrite_final_checkpoint=namespace.overwrite_final_checkpoint,
plot=namespace.plot,
savefig=namespace.savefig,
verbose=namespace.verbose,
)
'''
trainDataLoader,
x_val,
y_val,
INIT_LR,
DEVICE,
COEFF,
loss_type,
wd,
model_name="DER",
EPOCHS=100,
path_to_model="models/",
save_all_checkpoints=False,
save_final_checkpoint=False,
overwrite_final_checkpoint=False,
plot=True,
savefig=True,
verbose=True
'''
2 changes: 1 addition & 1 deletion src/scripts/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def train_DER(
ax1.annotate(
str(loss_type)
+ " = "
+ str(round(loss, 2))
+ str(round(NIGloss_val, 2))
+ "\n"
+ r"MSE = "
+ str(round(mse, 2)),
Expand Down
58 changes: 53 additions & 5 deletions test/test_DeepEnsemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,55 @@ def temp_directory():
shutil.rmtree(temp_dir)


def test_chkpt_saved(temp_directory):
def test_DER_chkpt_saved(temp_directory):
noise_level = "low"
wd = str(temp_directory) + "/"
n_epochs = 2
subprocess_args = [
"python",
"src/scripts/DeepEvidentialRegression.py",
noise_level,
wd,
"--n_epochs",
str(n_epochs),
"--save_final_checkpoint",
"--savefig",
"--generatedata"
]
# now run the subprocess
subprocess.run(subprocess_args, check=True)
# check if the right number of checkpoints are saved
models_folder = os.path.join(temp_directory, "models")
# list all files in the "models" folder
files_in_models_folder = os.listdir(models_folder)
# assert that the number of files is equal to 10
assert (
len(files_in_models_folder) == 1
), "Expected 1 file in the 'models' folder"

# check if the right number of images were saved
animations_folder = os.path.join(temp_directory, "images/animations")
files_in_animations_folder = os.listdir(animations_folder)
# assert that the number of files is equal to 10
assert (
len(files_in_animations_folder) == 1
), "Expected 1 file in the 'images/animations' folder"

# also check that all files in here have the same name elements
expected_substring = "epoch_" + str(n_epochs - 1)
for file_name in files_in_models_folder:
assert (
expected_substring in file_name
), f"File '{file_name}' does not contain the expected substring"

# also check that all files in here have the same name elements
for file_name in files_in_animations_folder:
assert (
expected_substring in file_name
), f"File '{file_name}' does not contain the expected substring"


def test_DE_chkpt_saved(temp_directory):
noise_level = "low"
n_models = 10
wd = str(temp_directory) + "/"
Expand Down Expand Up @@ -79,7 +127,7 @@ def test_chkpt_saved(temp_directory):


@pytest.mark.xfail(strict=True)
def test_no_chkpt_saved_xfail(temp_directory):
def test_DE_no_chkpt_saved_xfail(temp_directory):
noise_level = "low"
n_models = 10
wd = str(temp_directory) + "/"
Expand All @@ -106,7 +154,7 @@ def test_no_chkpt_saved_xfail(temp_directory):
), "Expected 10 files in the 'models' folder"


def test_no_chkpt_saved(temp_directory):
def test_DE_no_chkpt_saved(temp_directory):
noise_level = "low"
n_models = 10
wd = str(temp_directory) + "/"
Expand All @@ -132,7 +180,7 @@ def test_no_chkpt_saved(temp_directory):
"Expect 0 files in the 'models' folder"


def test_run_simple_ensemble(temp_directory):
def test_DE_run_simple_ensemble(temp_directory):
noise_level = "low"
n_models = "10"
# here = os.getcwd()
Expand All @@ -154,7 +202,7 @@ def test_run_simple_ensemble(temp_directory):


@pytest.mark.xfail(strict=True)
def test_missing_req_arg(temp_directory):
def test_DE_missing_req_arg(temp_directory):
noise_level = "low"
n_models = "10"
subprocess_args = [
Expand Down

0 comments on commit d03dbd1

Please sign in to comment.