Merge pull request #59 from deepskies/issue/remove_inference

Issue/remove inference
deepskies · Feb 5, 2024 · 832c5f4 · 832c5f4
2 parents 9d2f71d + 2717219
commit 832c5f4
Show file tree

Hide file tree

Showing 16 changed files with 4 additions and 33,663 deletions.
diff --git a/notebooks/SBI.ipynb b/notebooks/SBI.ipynb
diff --git a/notebooks/SBI_hierarchical_csv.ipynb b/notebooks/SBI_hierarchical_csv.ipynb
diff --git a/notebooks/SBI_linefit.ipynb b/notebooks/SBI_linefit.ipynb
diff --git a/notebooks/example.ipynb b/notebooks/example.ipynb
diff --git a/notebooks/numpyro_iterative_dataset_varying_noise.ipynb b/notebooks/numpyro_iterative_dataset_varying_noise.ipynb
diff --git a/notebooks/numpyro_linefit.ipynb b/notebooks/numpyro_linefit.ipynb
diff --git a/notebooks/pendulum_error_one_moment_in_time_DeepEnsemble.ipynb b/notebooks/pendulum_error_one_moment_in_time_DeepEnsemble.ipynb
diff --git a/notebooks/pendulum_numpyro_many_times_hierarchical_ex.ipynb b/notebooks/pendulum_numpyro_many_times_hierarchical_ex.ipynb
diff --git a/notebooks/pendulum_one_time_hierarchical.ipynb b/notebooks/pendulum_one_time_hierarchical.ipynb
diff --git a/notebooks/pendulum_simple_numpyro_inference.ipynb b/notebooks/pendulum_simple_numpyro_inference.ipynb
diff --git a/notebooks/sampling_numpyro.ipynb b/notebooks/sampling_numpyro.ipynb
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,14 +1,14 @@
 [tool.poetry]
 name = "DeepUQ"
+packages = [{include = "*", from="src"}]
 version = "0.1.0"
-description = ""
+description = "a package for investigating and comparing ML model's predictive uncertainties"
 authors = ["beckynevin <[email protected]>"]
 readme = "README.md"
 license = "MIT"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<3.11"
-numpyro = "^0.13.2"
 jupyter = "^1.0.0"
 matplotlib = "^3.7.1"
 arviz = "^0.15.1"
@@ -17,7 +17,6 @@ scikit-learn = "^1.3.0"
 graphviz = "^0.20.1"
 seaborn = "^0.12.2"
 torch = "^2.0.1"
-sbi = "^0.21.0"
 pytest-cov = "^4.1.0"
 deepbench = "^0.2.2"
 

diff --git a/src/scripts/models.py b/src/scripts/models.py
@@ -1,8 +1,4 @@
-import numpyro
-import numpyro.distributions as dist
 import numpy as np
-import jax
-import jax.numpy as jnp # yes i know this is confusing
 import torch.nn as nn
 import torch
 import math
@@ -63,101 +59,9 @@ def forward(self, x):
 # HMC after SBI to look at degeneracies between params
 # different guides (some are slower but better at showing degeneracies)
 
-## define the platform and number of cores (one chain per core)
-numpyro.set_platform('cpu')
-core_num = 4
-numpyro.set_host_device_count(core_num)
-
-def hierarchical_model(planet_code,
-                       pendulum_code,
-                       times,
-                       exponential,
-                       pos_obs=None):
-    """
-    """
-    ## inputs to a numpyro model are rows from a dataframe:
-    ## planet code - array of embedded numbers representing which planet {0...1}
-    ## pendulum code - array of embedded numbers representing which pendulum {0...7}
-    ## times - moments in time (s)
-    ## pos_obs - this is optional, set to None but used to compare the model with data
-    ## (when data, xpos, is defined)
-
-    ## numpyro models function by drawing parameters from samples 
-    ## first, we define the global parameters, mean and sigma of a normal from
-    ## which the individual a_g values of each planet will be drawn
-
-
-    #μ_a_g = numpyro.sample("μ_a_g", dist.LogUniform(5.0,15.0))
-    μ_a_g = numpyro.sample("μ_a_g", dist.TruncatedNormal(12.5, 5, low=0.01))
-    # scale parameters should be log uniform so that they don't go negative 
-    # and so that they're not uniform
-    # 1 / x in linear space
-    σ_a_g = numpyro.sample("σ_a_g", dist.TruncatedNormal(0.1, 0.01, low=0.01))
-    n_planets = len(np.unique(planet_code))
-    n_pendulums = len(np.unique(pendulum_code))
-
-    ## plates are a numpyro primitive or context manager for handing conditionally independence
-    ## for instance, we wish to model a_g for each planet independently
-    with numpyro.plate("planet_i", n_planets):
-        a_g = numpyro.sample("a_g", dist.TruncatedNormal(μ_a_g, σ_a_g,
-                                                         low=0.01))
-        # helps because a_gs are being pulled from same normal dist
-        # removes dependency of a_g on sigma_a_g on a prior level
-        # removing one covariance from model, model is easier
-        # to sample from
-
-    ## we also wish to model L and theta for each pendulum independently
-    ## here we draw from an uniform distribution
-    with numpyro.plate("pend_i", n_pendulums):
-        L = numpyro.sample("L", dist.TruncatedNormal(5, 2, low=0.01))
-        theta = numpyro.sample("theta", dist.TruncatedNormal(jnp.pi/100,
-                                                             jnp.pi/500,
-                                                             low=0.00001))
-
-    ## σ is the error on the position measurement for each moment in time
-    ## we also model this
-    ## eventually, we should also model the error on each parameter independently?
-    ## draw from an exponential distribution parameterized by a rate parameter
-    ## the mean of an exponential distribution is 1/r where r is the rate parameter
-    ## exponential distributions are never negative. This is good for error.
-    σ = numpyro.sample("σ", dist.Exponential(exponential))
-
-    ## the moments in time are not independent, so we do not place the following in a plate
-    ## instead, the brackets segment the model by pendulum and by planet,
-    ## telling us how to conduct the inference
-    modelx = L[pendulum_code] * jnp.sin(theta[pendulum_code] * jnp.cos(jnp.sqrt(a_g[planet_code] / L[pendulum_code]) * times))
-    ## don't forget to use jnp instead of np so jax knows what to do
-    ## A BIG QUESTION I STILL HAVE IS WHAT IS THE LIKELIHOOD? IS IT JUST SAMPLED FROM?
-    ## again, for each pendulum we compare the observed to the modeled position:
-    with numpyro.plate("data", len(pendulum_code)):
-        pos = numpyro.sample("obs", dist.Normal(modelx, σ), obs=pos_obs)
-
-
-def unpooled_model(planet_code,
-                   pendulum_code,
-                   times,
-                   exponential,
-                   pos_obs=None):
-    n_planets = len(np.unique(planet_code))
-    n_pendulums = len(np.unique(pendulum_code))
-    with numpyro.plate("planet_i", n_planets):
-        a_g = numpyro.sample("a_g", dist.TruncatedNormal(12.5, 5,
-                                                         low=0, high=25))
-    with numpyro.plate("pend_i", n_pendulums):
-        L = numpyro.sample("L", dist.TruncatedNormal(5, 2, low = 0.01))
-        theta = numpyro.sample("theta", dist.TruncatedNormal(jnp.pi/100,
-                                                             jnp.pi/500,
-                                                             low=0.00001))
-    σ = numpyro.sample("σ", dist.Exponential(exponential))
-    modelx = L[pendulum_code] * jnp.sin(theta[pendulum_code] *
-                         jnp.cos(jnp.sqrt(a_g[planet_code] / L[pendulum_code]) * times))
-    with numpyro.plate("data", len(pendulum_code)):
-        pos = numpyro.sample("obs", dist.Normal(modelx, σ), obs=pos_obs)
-
 # This is from PasteurLabs - 
 # https://github.com/pasteurlabs/unreasonable_effective_der/blob/main/models.py
 
-
 class Model(nn.Module):
     def __init__(self, n_output, n_hidden=64):
         super().__init__()

diff --git a/src/scripts/train.py b/src/scripts/train.py
@@ -4,13 +4,12 @@
 """
 import argparse
 import torch
-import sbi
 import time
 import glob
 import numpy as np
 import matplotlib.pyplot as plt
 import torch
-from src.scripts import models
+from scripts import models
 import functools
 
 
@@ -428,27 +427,6 @@ def train_DE(trainDataLoader,
 
 
 
-def train_SBI_hierarchical(thetas, xs, prior):
-    # Now let's put them in a tensor form that SBI can read.
-    theta = torch.tensor(thetas, dtype=torch.float32)
-    x = torch.tensor(xs, dtype=torch.float32)
-
-    # instantiate the neural density estimator
-    neural_posterior = sbi.utils.posterior_nn(model='maf')#,
-                                  #embedding_net=embedding_net,
-                                  #hidden_features=hidden_features,
-                                  #num_transforms=num_transforms)
-    # setup the inference procedure with the SNPE-C procedure
-    inference = sbi.inference.SNPE(prior=prior,
-                                   density_estimator=neural_posterior,
-                                   device="cpu")
-
-    # now that we have both the simulated images and
-    # parameters defined properly, we can train the SBI.
-    density_estimator = inference.append_simulations(theta, x).train()
-    return inference.build_posterior(density_estimator)
-
-
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--data_source", type=str, help="Data used to train the model")