Skip to content

Commit

Permalink
Merge pull request #74 from deepskies/issue/argparse
Browse files Browse the repository at this point in the history
Issue/argparse
  • Loading branch information
beckynevin authored Apr 10, 2024
2 parents 973af91 + 0723da7 commit 1cc9a0e
Show file tree
Hide file tree
Showing 11 changed files with 1,003 additions and 1,128 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: Lint it

on: [pull_request]
on: push

jobs:
lint:
Expand Down
18 changes: 16 additions & 2 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@ jobs:
runs-on: ubuntu-latest

steps:
- name: Cache Poetry dependencies
uses: actions/cache@v2
with:
path: |
~/.cache
~/.local/share/virtualenvs
key: ${{ runner.os }}-poetry-${{ hashFiles('**/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-poetry-
- uses: actions/checkout@v2
- name: Set up Python 3.10
uses: actions/setup-python@v2
Expand All @@ -27,6 +36,11 @@ jobs:
shell: bash
run: python -m poetry install

- name: Create Environment File
run: echo "PYTHONPATH=$(pwd):$(pwd)/src" >> ${{ runner.workspace }}/.env

- name: Test with pytest
run: |
python3 -m poetry run pytest --cov
run: python -m poetry run pytest --cov
env:
PYTHONPATH: ${{ env.PYTHONPATH }}
ENV_FILE: ${{ runner.workspace }}/.env
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,16 @@ Getting a little more specific:

![python module overview](images/workflow_deepUQ.png)

These modules can be accessed via the ipython example notebooks or via the model modules (ie `DeepEnsemble.py`). For example, to ingest data and train a Deep Ensemble:
> cd src/scripts/
> python DeepEnsemble.py low 10 /Users/rnevin/Documents/DeepUQ/ --save_final_checkpoint --savefig --n_epochs=10
This command will train a 10 network, 10 epoch ensemble on the low noise data and will save figures and final checkpoints to the specified directory. Required arguments are the noise setting (low/medium/high), the number of ensembles, and the working directory.

For more information on the arguments:
> python DeepEnsemble.py --help
## Installation

### Clone this repo
Expand Down
1,260 changes: 295 additions & 965 deletions poetry.lock

Large diffs are not rendered by default.

16 changes: 7 additions & 9 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,18 @@ license = "MIT"
python = ">=3.9,<3.11"
jupyter = "^1.0.0"
matplotlib = "^3.7.1"
arviz = "^0.15.1"
corner = "^2.2.2"
scikit-learn = "^1.3.0"
graphviz = "^0.20.1"
seaborn = "^0.12.2"
torch = "^2.0.1"
pytest-cov = "^4.1.0"
deepbench = "^0.2.2"
sbi = "^0.22.0"
h5py = "^3.10.0"
flake8 = "^7.0.0"
black = "^24.2.0"

[tool.poetry.group.dev.dependencies]
pytest-cov = "^4.1.0"
flake8 = "^7.0.0"
pytest = "^7.3.2"
pre-commit = "^3.7.0"
black = "^24.3.0"

[build-system]
requires = ["poetry-core"]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
260 changes: 206 additions & 54 deletions src/scripts/DeepEnsemble.py
Original file line number Diff line number Diff line change
@@ -1,95 +1,247 @@
import argparse
import logging
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
from scripts import train, models, io


def parse_args():
parser = argparse.ArgumentParser(
description="Transferring data from embargo butler to another butler"
)
def beta_type(value):
if isinstance(value, float):
return value
elif value.lower() == "linear_decrease":
return value
elif value.lower() == "step_decrease_to_0.5":
return value
elif value.lower() == "step_decrease_to_1.0":
return value
else:
raise argparse.ArgumentTypeError(
"BETA must be a float or one of 'linear_decrease', \
'step_decrease_to_0.5', 'step_decrease_to_1.0'"
)


# at least one arg in dataId needed for 'where' clause.
def parse_args():
parser = argparse.ArgumentParser(description="data handling module")
parser.add_argument(
"fromrepo",
type=str,
nargs="?",
default="/repo/embargo",
help="Butler Repository path from which data is transferred. \
Input str. Default = '/repo/embargo'",
"--size_df",
type=float,
required=False,
default=1000,
help="Used to load the associated .h5 data file",
)
parser.add_argument(
"torepo",
"noise_level",
type=str,
help="Repository to which data is transferred. Input str",
default="low",
help="low, medium, high or vhigh, \
used to look up associated sigma value",
)
parser.add_argument(
"instrument",
type=str,
nargs="?",
default="LATISS",
help="Instrument. Input str",
"--normalize",
required=False,
action="store_true",
help="If true theres an option to normalize the dataset",
)
parser.add_argument(
"--embargohours",
"--val_proportion",
type=float,
required=False,
default=80.0,
help="Embargo time period in hours. Input float",
default=0.1,
help="Proportion of the dataset to use as validation",
)
parser.add_argument(
"--datasettype",
"--randomseed",
type=int,
required=False,
nargs="+",
# default=[]
help="Dataset type. Input list or str",
default=42,
help="Random seed used for shuffling the training and validation set",
)
parser.add_argument(
"--collections",
# type=str,
nargs="+",
"--batchsize",
type=int,
required=False,
default="LATISS/raw/all",
help="Data Collections. Input list or str",
default=100,
help="Size of batched used in the traindataloader",
)
# now args for model
parser.add_argument(
"--nowtime",
type=str,
"n_models",
type=int,
default=100,
help="Number of MVEs in the ensemble",
)
parser.add_argument(
"--init_lr",
type=float,
required=False,
default="now",
help="Now time in (ISO, TAI timescale). If left blank it will \
use astropy.time.Time.now.",
default=0.001,
help="Learning rate",
)
parser.add_argument(
"--move",
"--loss_type",
type=str,
required=False,
default="False",
help="Copies if False, deletes original if True",
default="bnll_loss",
help="Loss types for MVE, options are no_var_loss, var_loss, \
and bnn_loss",
)
parser.add_argument(
"--BETA",
type=beta_type,
required=False,
default=0.5,
help="If loss_type is bnn_loss, specify a beta as a float or \
there are string options: linear_decrease, \
step_decrease_to_0.5, and step_decrease_to_1.0",
)
parser.add_argument(
"--log",
"wd",
type=str,
help="Top level of directory, required arg",
)
parser.add_argument(
"--model_type",
type=str,
required=False,
default="DE",
help="Beginning of name for saved checkpoints and figures",
)
parser.add_argument(
"--n_epochs",
type=int,
required=False,
default="False",
help="No logging if False, longlog if True",
default=100,
help="number of epochs for each MVE",
)
parser.add_argument(
"--desturiprefix",
"--path_to_models",
type=str,
required=False,
default="False",
help="Define dest uri if you need to run ingest for raws",
default="models/",
help="path to where the checkpoints are saved",
)
parser.add_argument(
"--save_all_checkpoints",
action="store_true",
default=False,
help="option to save all checkpoints",
)
parser.add_argument(
"--save_final_checkpoint",
action="store_true", # Set to True if argument is present
default=False, # Set default value to False if argument is not present
help="option to save the final epoch checkpoint for each ensemble",
)
parser.add_argument(
"--overwrite_final_checkpoint",
action="store_true",
default=False,
help="option to overwite already saved checkpoints",
)
parser.add_argument(
"--plot",
action="store_true",
default=False,
help="option to plot in notebook",
)
parser.add_argument(
"--savefig",
action="store_true",
default=False,
help="option to save a figure of the true and predicted values",
)
parser.add_argument(
"--verbose",
action="store_true",
default=False,
help="verbose option for train",
)
return parser.parse_args()


if __name__ == "__main__":
namespace = parse_args()
# Define embargo and destination butler
# If move is true, then you'll need write
# permissions from the fromrepo (embargo)
dest_butler = namespace.torepo
if namespace.log == "True":
# CliLog.initLog(longlog=True)
logger = logging.getLogger("lsst.transfer.embargo")
logger.info("from path: %s", namespace.fromrepo)
logger.info("to path: %s", namespace.torepo)
size_df = namespace.size_df
noise = namespace.noise_level
norm = namespace.normalize
val_prop = namespace.val_proportion
rs = namespace.randomseed
BATCH_SIZE = namespace.batchsize
sigma = io.DataPreparation.get_sigma(noise)

# generate the df
data = io.DataPreparation()
data.sample_params_from_prior(size_df)
data.simulate_data(data.params, sigma, "linear_homogeneous")
df_array = data.get_dict()
# Convert non-tensor entries to tensors
df = {}
for key, value in df_array.items():

if isinstance(value, TensorDataset):
# Keep tensors as they are
df[key] = value
else:
# Convert lists to tensors
df[key] = torch.tensor(value)

len_df = len(df["params"][:, 0].numpy())
len_x = len(df["inputs"].numpy())
ms_array = np.repeat(df["params"][:, 0].numpy(), len_x)
bs_array = np.repeat(df["params"][:, 1].numpy(), len_x)
xs_array = np.tile(df["inputs"].numpy(), len_df)
ys_array = np.reshape(df["output"].numpy(), (len_df * len_x))

"""
loader = io.DataLoader()
df = loader.load_data_h5(
"linear_sigma_" + str(sigma) + "_size_" + str(size_df),
path="/Users/rnevin/Documents/DeepUQ/data/",
)
len_df = len(df["params"][:, 0].numpy())
len_x = len(df["inputs"].numpy())
ms_array = np.repeat(df["params"][:, 0].numpy(), len_x)
bs_array = np.repeat(df["params"][:, 1].numpy(), len_x)
xs_array = np.tile(df["inputs"].numpy(), len_df)
ys_array = np.reshape(df["output"].numpy(), (len_df * len_x))
print(df)
STOP
"""
inputs = np.array([xs_array, ms_array, bs_array]).T
model_inputs, model_outputs = io.DataPreparation.normalize(inputs,
ys_array,
norm)
x_train, x_val, y_train, y_val = io.DataPreparation.train_val_split(
model_inputs, model_outputs, val_proportion=val_prop, random_state=rs
)
trainData = TensorDataset(torch.Tensor(x_train), torch.Tensor(y_train))
trainDataLoader = DataLoader(trainData,
batch_size=BATCH_SIZE,
shuffle=True)
print("[INFO] initializing the gal model...")
# set the device we will be using to train the model
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_name = namespace.model_type + "_noise_" + noise
model, lossFn = models.model_setup_DE(namespace.loss_type, DEVICE)
model_ensemble = train.train_DE(
trainDataLoader,
x_val,
y_val,
namespace.init_lr,
DEVICE,
namespace.loss_type,
namespace.n_models,
namespace.wd,
model_name,
BETA=namespace.BETA,
EPOCHS=namespace.n_epochs,
path_to_model=namespace.path_to_models,
save_all_checkpoints=namespace.save_all_checkpoints,
save_final_checkpoint=namespace.save_final_checkpoint,
overwrite_final_checkpoint=namespace.overwrite_final_checkpoint,
plot=namespace.plot,
savefig=namespace.savefig,
verbose=namespace.verbose,
)
Loading

0 comments on commit 1cc9a0e

Please sign in to comment.