diff --git a/benchmarks/benchmarks/benchmarks.py b/benchmarks/benchmarks/benchmarks.py index 5035d7f08b..0fb1883906 100644 --- a/benchmarks/benchmarks/benchmarks.py +++ b/benchmarks/benchmarks/benchmarks.py @@ -25,23 +25,23 @@ def glm_hierarchical_model(random_seed=123): """Sample glm hierarchical model to use in benchmarks""" np.random.seed(random_seed) - data = pd.read_csv(pm.get_data('radon.csv')) - data['log_radon'] = data['log_radon'].astype(theano.config.floatX) + data = pd.read_csv(pm.get_data("radon.csv")) + data["log_radon"] = data["log_radon"].astype(theano.config.floatX) county_idx = data.county_code.values n_counties = len(data.county.unique()) with pm.Model() as model: - mu_a = pm.Normal('mu_a', mu=0., sd=100**2) - sigma_a = pm.HalfCauchy('sigma_a', 5) - mu_b = pm.Normal('mu_b', mu=0., sd=100**2) - sigma_b = pm.HalfCauchy('sigma_b', 5) - a = pm.Normal('a', mu=0, sd=1, shape=n_counties) - b = pm.Normal('b', mu=0, sd=1, shape=n_counties) + mu_a = pm.Normal("mu_a", mu=0.0, sd=100 ** 2) + sigma_a = pm.HalfCauchy("sigma_a", 5) + mu_b = pm.Normal("mu_b", mu=0.0, sd=100 ** 2) + sigma_b = pm.HalfCauchy("sigma_b", 5) + a = pm.Normal("a", mu=0, sd=1, shape=n_counties) + b = pm.Normal("b", mu=0, sd=1, shape=n_counties) a = mu_a + sigma_a * a b = mu_b + sigma_b * b - eps = pm.HalfCauchy('eps', 5) + eps = pm.HalfCauchy("eps", 5) radon_est = a[county_idx] + b[county_idx] * data.floor.values - pm.Normal('radon_like', mu=radon_est, sd=eps, observed=data.log_radon) + pm.Normal("radon_like", mu=radon_est, sd=eps, observed=data.log_radon) return model @@ -50,24 +50,27 @@ def mixture_model(random_seed=1234): np.random.seed(1234) size = 1000 w_true = np.array([0.35, 0.4, 0.25]) - mu_true = np.array([0., 2., 5.]) - sigma = np.array([0.5, 0.5, 1.]) + mu_true = np.array([0.0, 2.0, 5.0]) + sigma = np.array([0.5, 0.5, 1.0]) component = np.random.choice(mu_true.size, size=size, p=w_true) x = np.random.normal(mu_true[component], sigma[component], size=size) with pm.Model() as model: - w = pm.Dirichlet('w', a=np.ones_like(w_true)) - mu = pm.Normal('mu', mu=0., sd=10., shape=w_true.shape) - enforce_order = pm.Potential('enforce_order', tt.switch(mu[0] - mu[1] <= 0, 0., -np.inf) + - tt.switch(mu[1] - mu[2] <= 0, 0., -np.inf)) - tau = pm.Gamma('tau', alpha=1., beta=1., shape=w_true.shape) - pm.NormalMixture('x_obs', w=w, mu=mu, tau=tau, observed=x) + w = pm.Dirichlet("w", a=np.ones_like(w_true)) + mu = pm.Normal("mu", mu=0.0, sd=10.0, shape=w_true.shape) + enforce_order = pm.Potential( + "enforce_order", + tt.switch(mu[0] - mu[1] <= 0, 0.0, -np.inf) + + tt.switch(mu[1] - mu[2] <= 0, 0.0, -np.inf), + ) + tau = pm.Gamma("tau", alpha=1.0, beta=1.0, shape=w_true.shape) + pm.NormalMixture("x_obs", w=w, mu=mu, tau=tau, observed=x) # Initialization can be poorly specified, this is a hack to make it work start = { - 'mu': mu_true.copy(), - 'tau_log__': np.log(1. / sigma**2), - 'w_stickbreaking__': np.array([-0.03, 0.44]) + "mu": mu_true.copy(), + "tau_log__": np.log(1.0 / sigma ** 2), + "w_stickbreaking__": np.array([-0.03, 0.44]), } return model, start @@ -77,76 +80,175 @@ class OverheadSuite: Just tests how long sampling from a normal distribution takes for various samplers """ + params = [pm.NUTS, pm.HamiltonianMC, pm.Metropolis, pm.Slice] timer = timeit.default_timer def setup(self, step): self.n_steps = 10000 with pm.Model() as self.model: - pm.Normal('x', mu=0, sd=1) + pm.Normal("x", mu=0, sd=1) def time_overhead_sample(self, step): with self.model: - pm.sample(self.n_steps, step=step(), random_seed=1, - progressbar=False, compute_convergence_checks=False) + pm.sample( + self.n_steps, + step=step(), + random_seed=1, + progressbar=False, + compute_convergence_checks=False, + ) class ExampleSuite: """Implements examples to keep up with benchmarking them.""" + timeout = 360.0 # give it a few minutes timer = timeit.default_timer def time_drug_evaluation(self): - drug = np.array([101, 100, 102, 104, 102, 97, 105, 105, 98, 101, - 100, 123, 105, 103, 100, 95, 102, 106, 109, 102, 82, - 102, 100, 102, 102, 101, 102, 102, 103, 103, 97, 97, - 103, 101, 97, 104, 96, 103, 124, 101, 101, 100, 101, - 101, 104, 100, 101]) - placebo = np.array([99, 101, 100, 101, 102, 100, 97, 101, 104, 101, - 102, 102, 100, 105, 88, 101, 100, 104, 100, 100, - 100, 101, 102, 103, 97, 101, 101, 100, 101, 99, - 101, 100, 100, 101, 100, 99, 101, 100, 102, 99, - 100, 99]) - - y = pd.DataFrame({ - 'value': np.r_[drug, placebo], - 'group': np.r_[['drug']*len(drug), ['placebo']*len(placebo)] - }) + drug = np.array( + [ + 101, + 100, + 102, + 104, + 102, + 97, + 105, + 105, + 98, + 101, + 100, + 123, + 105, + 103, + 100, + 95, + 102, + 106, + 109, + 102, + 82, + 102, + 100, + 102, + 102, + 101, + 102, + 102, + 103, + 103, + 97, + 97, + 103, + 101, + 97, + 104, + 96, + 103, + 124, + 101, + 101, + 100, + 101, + 101, + 104, + 100, + 101, + ] + ) + placebo = np.array( + [ + 99, + 101, + 100, + 101, + 102, + 100, + 97, + 101, + 104, + 101, + 102, + 102, + 100, + 105, + 88, + 101, + 100, + 104, + 100, + 100, + 100, + 101, + 102, + 103, + 97, + 101, + 101, + 100, + 101, + 99, + 101, + 100, + 100, + 101, + 100, + 99, + 101, + 100, + 102, + 99, + 100, + 99, + ] + ) + + y = pd.DataFrame( + { + "value": np.r_[drug, placebo], + "group": np.r_[["drug"] * len(drug), ["placebo"] * len(placebo)], + } + ) y_mean = y.value.mean() y_std = y.value.std() * 2 sigma_low = 1 sigma_high = 10 with pm.Model(): - group1_mean = pm.Normal('group1_mean', y_mean, sd=y_std) - group2_mean = pm.Normal('group2_mean', y_mean, sd=y_std) - group1_std = pm.Uniform('group1_std', lower=sigma_low, upper=sigma_high) - group2_std = pm.Uniform('group2_std', lower=sigma_low, upper=sigma_high) - lambda_1 = group1_std**-2 - lambda_2 = group2_std**-2 - - nu = pm.Exponential('ν_minus_one', 1/29.) + 1 - - pm.StudentT('drug', nu=nu, mu=group1_mean, lam=lambda_1, observed=drug) - pm.StudentT('placebo', nu=nu, mu=group2_mean, lam=lambda_2, observed=placebo) - diff_of_means = pm.Deterministic('difference of means', group1_mean - group2_mean) - pm.Deterministic('difference of stds', group1_std - group2_std) + group1_mean = pm.Normal("group1_mean", y_mean, sd=y_std) + group2_mean = pm.Normal("group2_mean", y_mean, sd=y_std) + group1_std = pm.Uniform("group1_std", lower=sigma_low, upper=sigma_high) + group2_std = pm.Uniform("group2_std", lower=sigma_low, upper=sigma_high) + lambda_1 = group1_std ** -2 + lambda_2 = group2_std ** -2 + + nu = pm.Exponential("ν_minus_one", 1 / 29.0) + 1 + + pm.StudentT("drug", nu=nu, mu=group1_mean, lam=lambda_1, observed=drug) + pm.StudentT("placebo", nu=nu, mu=group2_mean, lam=lambda_2, observed=placebo) + diff_of_means = pm.Deterministic("difference of means", group1_mean - group2_mean) + pm.Deterministic("difference of stds", group1_std - group2_std) pm.Deterministic( - 'effect size', diff_of_means / np.sqrt((group1_std**2 + group2_std**2) / 2)) - pm.sample(draws=20000, cores=4, chains=4, - progressbar=False, compute_convergence_checks=False) + "effect size", diff_of_means / np.sqrt((group1_std ** 2 + group2_std ** 2) / 2) + ) + pm.sample( + draws=20000, cores=4, chains=4, progressbar=False, compute_convergence_checks=False + ) def time_glm_hierarchical(self): with glm_hierarchical_model(): - pm.sample(draws=20000, cores=4, chains=4, - progressbar=False, compute_convergence_checks=False) + pm.sample( + draws=20000, cores=4, chains=4, progressbar=False, compute_convergence_checks=False + ) class NUTSInitSuite: - """Tests initializations for NUTS sampler on models - """ + """Tests initializations for NUTS sampler on models""" + timeout = 360.0 - params = ('adapt_diag', 'jitter+adapt_diag', 'jitter+adapt_full', 'adapt_full') + params = ("adapt_diag", "jitter+adapt_diag", "jitter+adapt_full", "adapt_full") number = 1 repeat = 1 draws = 10000 @@ -159,32 +261,49 @@ def time_glm_hierarchical_init(self, init): def track_glm_hierarchical_ess(self, init): with glm_hierarchical_model(): - start, step = pm.init_nuts(init=init, chains=self.chains, progressbar=False, random_seed=123) + start, step = pm.init_nuts( + init=init, chains=self.chains, progressbar=False, random_seed=123 + ) t0 = time.time() - trace = pm.sample(draws=self.draws, step=step, cores=4, chains=self.chains, - start=start, random_seed=100, progressbar=False, - compute_convergence_checks=False) + trace = pm.sample( + draws=self.draws, + step=step, + cores=4, + chains=self.chains, + start=start, + random_seed=100, + progressbar=False, + compute_convergence_checks=False, + ) tot = time.time() - t0 - ess = float(pm.ess(trace, var_names=['mu_a'])['mu_a'].values) + ess = float(pm.ess(trace, var_names=["mu_a"])["mu_a"].values) return ess / tot def track_marginal_mixture_model_ess(self, init): model, start = mixture_model() with model: - _, step = pm.init_nuts(init=init, chains=self.chains, - progressbar=False, random_seed=123) + _, step = pm.init_nuts( + init=init, chains=self.chains, progressbar=False, random_seed=123 + ) start = [{k: v for k, v in start.items()} for _ in range(self.chains)] t0 = time.time() - trace = pm.sample(draws=self.draws, step=step, cores=4, chains=self.chains, - start=start, random_seed=100, progressbar=False, - compute_convergence_checks=False) + trace = pm.sample( + draws=self.draws, + step=step, + cores=4, + chains=self.chains, + start=start, + random_seed=100, + progressbar=False, + compute_convergence_checks=False, + ) tot = time.time() - t0 - ess = pm.ess(trace, var_names=['mu'])['mu'].values.min() # worst case + ess = pm.ess(trace, var_names=["mu"])["mu"].values.min() # worst case return ess / tot -NUTSInitSuite.track_glm_hierarchical_ess.unit = 'Effective samples per second' -NUTSInitSuite.track_marginal_mixture_model_ess.unit = 'Effective samples per second' +NUTSInitSuite.track_glm_hierarchical_ess.unit = "Effective samples per second" +NUTSInitSuite.track_marginal_mixture_model_ess.unit = "Effective samples per second" class CompareMetropolisNUTSSuite: @@ -200,15 +319,21 @@ def track_glm_hierarchical_ess(self, step): if step is not None: step = step() t0 = time.time() - trace = pm.sample(draws=self.draws, step=step, cores=4, chains=4, - random_seed=100, progressbar=False, - compute_convergence_checks=False) + trace = pm.sample( + draws=self.draws, + step=step, + cores=4, + chains=4, + random_seed=100, + progressbar=False, + compute_convergence_checks=False, + ) tot = time.time() - t0 - ess = float(pm.ess(trace, var_names=['mu_a'])['mu_a'].values) + ess = float(pm.ess(trace, var_names=["mu_a"])["mu_a"].values) return ess / tot -CompareMetropolisNUTSSuite.track_glm_hierarchical_ess.unit = 'Effective samples per second' +CompareMetropolisNUTSSuite.track_glm_hierarchical_ess.unit = "Effective samples per second" class DifferentialEquationSuite: @@ -223,30 +348,34 @@ def freefall(y, t, p): # Times for observation times = np.arange(0, 10, 0.5) - y = np.array([ - -2.01, - 9.49, - 15.58, - 16.57, - 27.58, - 32.26, - 35.13, - 38.07, - 37.36, - 38.83, - 44.86, - 43.58, - 44.59, - 42.75, - 46.9, - 49.32, - 44.06, - 49.86, - 46.48, - 48.18 - ]).reshape(-1, 1) - - ode_model = pm.ode.DifferentialEquation(func=freefall, times=times, n_states=1, n_theta=2, t0=0) + y = np.array( + [ + -2.01, + 9.49, + 15.58, + 16.57, + 27.58, + 32.26, + 35.13, + 38.07, + 37.36, + 38.83, + 44.86, + 43.58, + 44.59, + 42.75, + 46.9, + 49.32, + 44.06, + 49.86, + 46.48, + 48.18, + ] + ).reshape(-1, 1) + + ode_model = pm.ode.DifferentialEquation( + func=freefall, times=times, n_states=1, n_theta=2, t0=0 + ) with pm.Model() as model: # Specify prior distributions for some of our model parameters sigma = pm.HalfCauchy("sigma", 1) @@ -263,4 +392,4 @@ def freefall(y, t, p): return np.mean([ess.sigma, ess.gamma]) / tot -DifferentialEquationSuite.track_1var_2par_ode_ess.unit = 'Effective samples per second' +DifferentialEquationSuite.track_1var_2par_ode_ess.unit = "Effective samples per second" diff --git a/docs/source/conf.py b/docs/source/conf.py index 67f82eebb0..2d3743c01b 100755 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -155,7 +155,7 @@ ("Books + Videos", "learn"), ("API", "api"), ("Developer Guide", "developer_guide"), - ("About PyMC3", "about") + ("About PyMC3", "about"), ], # "fixed_sidebar": "false", # "description": "Probabilistic Programming in Python: Bayesian Modeling and Probabilistic Machine Learning with Theano" @@ -264,9 +264,7 @@ # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). -latex_documents = [ - (master_doc, "pymc3.tex", "PyMC3 Documentation", "PyMC developers", "manual") -] +latex_documents = [(master_doc, "pymc3.tex", "PyMC3 Documentation", "PyMC developers", "manual")] # The name of an image file (relative to this directory) to place at the top of # the title page. @@ -330,7 +328,5 @@ def setup(app): - app.add_css_file( - "https://cdn.jsdelivr.net/npm/semantic-ui@2.4.2/dist/semantic.min.css" - ) + app.add_css_file("https://cdn.jsdelivr.net/npm/semantic-ui@2.4.2/dist/semantic.min.css") app.add_css_file("default.css") diff --git a/docs/source/sphinxext/gallery_generator.py b/docs/source/sphinxext/gallery_generator.py index 2e9c716a8f..b3fdd23637 100644 --- a/docs/source/sphinxext/gallery_generator.py +++ b/docs/source/sphinxext/gallery_generator.py @@ -18,9 +18,7 @@ from matplotlib import image DOC_SRC = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -DEFAULT_IMG_LOC = os.path.join( - os.path.dirname(DOC_SRC), "logos", "PyMC3.png" - ) +DEFAULT_IMG_LOC = os.path.join(os.path.dirname(DOC_SRC), "logos", "PyMC3.png") TABLE_OF_CONTENTS_FILENAME = "table_of_contents_{}.js" INDEX_TEMPLATE = """ @@ -69,13 +67,9 @@ class NotebookGenerator: def __init__(self, filename, target_dir): self.basename = os.path.basename(filename) self.stripped_name = os.path.splitext(self.basename)[0] - self.output_html = os.path.join( - "..", "notebooks", f"{self.stripped_name}.html" - ) + self.output_html = os.path.join("..", "notebooks", f"{self.stripped_name}.html") self.image_dir = os.path.join(target_dir, "_images") - self.png_path = os.path.join( - self.image_dir, f"{self.stripped_name}.png" - ) + self.png_path = os.path.join(self.image_dir, f"{self.stripped_name}.png") with open(filename) as fid: self.json_source = json.load(fid) self.pagetitle = self.extract_title() @@ -141,8 +135,7 @@ def build_gallery(srcdir, gallery): source_dir = os.path.abspath( os.path.join(os.path.dirname(os.path.dirname(srcdir)), "notebooks") ) - table_of_contents_file = os.path.join( - source_dir, TABLE_OF_CONTENTS_FILENAME.format(gallery)) + table_of_contents_file = os.path.join(source_dir, TABLE_OF_CONTENTS_FILENAME.format(gallery)) tocjs = TableOfContentsJS() tocjs.load(table_of_contents_file) @@ -157,7 +150,7 @@ def build_gallery(srcdir, gallery): if not os.path.exists(source_dir): os.makedirs(source_dir) - + # Create default image default_png_path = os.path.join(os.path.join(target_dir, "_images"), "default.png") shutil.copy(DEFAULT_IMG_LOC, default_png_path) @@ -178,7 +171,7 @@ def build_gallery(srcdir, gallery): filename = basename.split(".")[0] data[basename] = { "title": " ".join(filename.split("_")), - "url": os.path.join(os.sep, gallery, "../"+filename+".html"), + "url": os.path.join(os.sep, gallery, "../" + filename + ".html"), "thumb": os.path.basename(default_png_path), } @@ -186,19 +179,17 @@ def build_gallery(srcdir, gallery): with open(table_of_contents_file) as toc: table_of_contents = toc.read() - js_contents = "Gallery.examples = {}\n{}".format( - json.dumps(data), table_of_contents - ) + js_contents = "Gallery.examples = {}\n{}".format(json.dumps(data), table_of_contents) with open(js_file, "w") as js: js.write(js_contents) with open(os.path.join(target_dir, "index.rst"), "w") as index: - index.write(INDEX_TEMPLATE.format( - sphinx_tag="notebook_gallery", - gallery=gallery, - Gallery=gallery.title().rstrip("s") - )) + index.write( + INDEX_TEMPLATE.format( + sphinx_tag="notebook_gallery", gallery=gallery, Gallery=gallery.title().rstrip("s") + ) + ) os.chdir(working_dir) diff --git a/pymc3/gp/cov.py b/pymc3/gp/cov.py index df8ff24fd4..c23b18817d 100644 --- a/pymc3/gp/cov.py +++ b/pymc3/gp/cov.py @@ -87,10 +87,13 @@ def full(self, X, Xs): def _slice(self, X, Xs): if self.input_dim != X.shape[-1]: - warnings.warn(f"Only {self.input_dim} column(s) out of {X.shape[-1]} are" - " being used to compute the covariance function. If this" - " is not intended, increase 'input_dim' parameter to" - " the number of columns to use. Ignore otherwise.", UserWarning) + warnings.warn( + f"Only {self.input_dim} column(s) out of {X.shape[-1]} are" + " being used to compute the covariance function. If this" + " is not intended, increase 'input_dim' parameter to" + " the number of columns to use. Ignore otherwise.", + UserWarning, + ) X = tt.as_tensor_variable(X[:, self.active_dims]) if Xs is not None: Xs = tt.as_tensor_variable(Xs[:, self.active_dims]) @@ -109,9 +112,9 @@ def __rmul__(self, other): return self.__mul__(other) def __pow__(self, other): - if( - isinstance(other, theano.compile.SharedVariable) and - other.get_value().squeeze().shape == () + if ( + isinstance(other, theano.compile.SharedVariable) + and other.get_value().squeeze().shape == () ): other = tt.squeeze(other) return Exponentiated(self, other) @@ -123,7 +126,6 @@ def __pow__(self, other): raise ValueError("A covariance function can only be exponentiated by a scalar value") - def __array_wrap__(self, result): """ Required to allow radd/rmul by numpy arrays. @@ -132,7 +134,9 @@ def __array_wrap__(self, result): if len(result.shape) <= 1: result = result.reshape(1, 1) elif len(result.shape) > 2: - raise ValueError(f"cannot combine a covariance function with array of shape {result.shape}") + raise ValueError( + f"cannot combine a covariance function with array of shape {result.shape}" + ) r, c = result.shape A = np.zeros((r, c)) for i in range(r): @@ -149,11 +153,7 @@ def __array_wrap__(self, result): class Combination(Covariance): def __init__(self, factor_list): input_dim = max( - [ - factor.input_dim - for factor in factor_list - if isinstance(factor, Covariance) - ] + [factor.input_dim for factor in factor_list if isinstance(factor, Covariance)] ) super().__init__(input_dim=input_dim) self.factor_list = [] @@ -205,10 +205,7 @@ class Exponentiated(Covariance): def __init__(self, kernel, power): self.kernel = kernel self.power = power - super().__init__( - input_dim=self.kernel.input_dim, - active_dims=self.kernel.active_dims - ) + super().__init__(input_dim=self.kernel.input_dim, active_dims=self.kernel.active_dims) def __call__(self, X, Xs=None, diag=False): return self.kernel(X, Xs, diag=diag) ** self.power @@ -247,9 +244,7 @@ def _split(self, X, Xs): def __call__(self, X, Xs=None, diag=False): X_split, Xs_split = self._split(X, Xs) - covs = [ - cov(x, xs, diag) for cov, x, xs in zip(self.factor_list, X_split, Xs_split) - ] + covs = [cov(x, xs, diag) for cov, x, xs in zip(self.factor_list, X_split, Xs_split)] return reduce(mul, covs) @@ -431,9 +426,7 @@ class Matern52(Stationary): def full(self, X, Xs=None): X, Xs = self._slice(X, Xs) r = self.euclidean_dist(X, Xs) - return (1.0 + np.sqrt(5.0) * r + 5.0 / 3.0 * tt.square(r)) * tt.exp( - -1.0 * np.sqrt(5.0) * r - ) + return (1.0 + np.sqrt(5.0) * r + 5.0 / 3.0 * tt.square(r)) * tt.exp(-1.0 * np.sqrt(5.0) * r) class Matern32(Stationary): @@ -605,14 +598,10 @@ def __init__(self, input_dim, lengthscale_func, args=None, active_dims=None): super().__init__(input_dim, active_dims) if active_dims is not None: if len(active_dims) > 1: - raise NotImplementedError( - ("Higher dimensional inputs ", "are untested") - ) + raise NotImplementedError(("Higher dimensional inputs ", "are untested")) else: if input_dim != 1: - raise NotImplementedError( - ("Higher dimensional inputs ", "are untested") - ) + raise NotImplementedError(("Higher dimensional inputs ", "are untested")) if not callable(lengthscale_func): raise TypeError("lengthscale_func must be callable") self.lfunc = handle_args(lengthscale_func, args) @@ -642,9 +631,7 @@ def full(self, X, Xs=None): r2 = self.square_dist(X, Xs) rx2 = tt.reshape(tt.square(rx), (-1, 1)) rz2 = tt.reshape(tt.square(rz), (1, -1)) - return tt.sqrt((2.0 * tt.outer(rx, rz)) / (rx2 + rz2)) * tt.exp( - -1.0 * r2 / (rx2 + rz2) - ) + return tt.sqrt((2.0 * tt.outer(rx, rz)) / (rx2 + rz2)) * tt.exp(-1.0 * r2 / (rx2 + rz2)) def diag(self, X): return tt.alloc(1.0, X.shape[0]) @@ -734,9 +721,7 @@ def __init__(self, input_dim, W=None, kappa=None, B=None, active_dims=None): raise ValueError("Coregion requires exactly one dimension to be active") make_B = W is not None or kappa is not None if make_B and B is not None: - raise ValueError( - "Exactly one of (W, kappa) and B must be provided to Coregion" - ) + raise ValueError("Exactly one of (W, kappa) and B must be provided to Coregion") if make_B: self.W = tt.as_tensor_variable(W) self.kappa = tt.as_tensor_variable(kappa) @@ -744,9 +729,7 @@ def __init__(self, input_dim, W=None, kappa=None, B=None, active_dims=None): elif B is not None: self.B = tt.as_tensor_variable(B) else: - raise ValueError( - "Exactly one of (W, kappa) and B must be provided to Coregion" - ) + raise ValueError("Exactly one of (W, kappa) and B must be provided to Coregion") def full(self, X, Xs=None): X, Xs = self._slice(X, Xs) diff --git a/pymc3/gp/mean.py b/pymc3/gp/mean.py index 33b1f7acfd..d2e93fdfe5 100644 --- a/pymc3/gp/mean.py +++ b/pymc3/gp/mean.py @@ -14,7 +14,7 @@ import theano.tensor as tt -__all__ = ['Zero', 'Constant', 'Linear'] +__all__ = ["Zero", "Constant", "Linear"] class Mean: @@ -48,6 +48,7 @@ class Zero(Mean): def __call__(self, X): return tt.alloc(0.0, X.shape[0]) + class Constant(Mean): R""" Constant mean function for Gaussian process. diff --git a/pymc3/gp/util.py b/pymc3/gp/util.py index cc2c73d8df..b35ffeac48 100644 --- a/pymc3/gp/util.py +++ b/pymc3/gp/util.py @@ -18,9 +18,9 @@ import warnings cholesky = tt.slinalg.cholesky -solve_lower = tt.slinalg.Solve(A_structure='lower_triangular') -solve_upper = tt.slinalg.Solve(A_structure='upper_triangular') -solve = tt.slinalg.Solve(A_structure='general') +solve_lower = tt.slinalg.Solve(A_structure="lower_triangular") +solve_upper = tt.slinalg.Solve(A_structure="upper_triangular") +solve = tt.slinalg.Solve(A_structure="general") def infer_shape(X, n_points=None): @@ -44,10 +44,12 @@ def kmeans_inducing_points(n_inducing, X): elif isinstance(X, (np.ndarray, tuple, list)): X = np.asarray(X) else: - raise TypeError("To use K-means initialization, " - "please provide X as a type that " - "can be cast to np.ndarray, instead " - "of {}".format(type(X))) + raise TypeError( + "To use K-means initialization, " + "please provide X as a type that " + "can be cast to np.ndarray, instead " + "of {}".format(type(X)) + ) scaling = np.std(X, 0) # if std of a column is very small (zero), don't normalize that column scaling[scaling <= 1e-6] = 1.0 @@ -58,35 +60,51 @@ def kmeans_inducing_points(n_inducing, X): def conditioned_vars(varnames): """ Decorator for validating attrs that are conditioned on. """ + def gp_wrapper(cls): def make_getter(name): def getter(self): value = getattr(self, name, None) if value is None: - raise AttributeError("'{}' not set. Provide as argument " - "to condition, or call 'prior' " - "first".format(name.lstrip("_"))) + raise AttributeError( + "'{}' not set. Provide as argument " + "to condition, or call 'prior' " + "first".format(name.lstrip("_")) + ) else: return value return getattr(self, name) + return getter def make_setter(name): def setter(self, val): setattr(self, name, val) + return setter for name in varnames: - getter = make_getter('_' + name) - setter = make_setter('_' + name) + getter = make_getter("_" + name) + setter = make_setter("_" + name) setattr(cls, name, property(getter, setter)) return cls + return gp_wrapper -def plot_gp_dist(ax, samples:np.ndarray, x:np.ndarray, plot_samples=True, palette="Reds", fill_alpha=0.8, samples_alpha=0.1, fill_kwargs=None, samples_kwargs=None): - """ A helper function for plotting 1D GP posteriors from trace - +def plot_gp_dist( + ax, + samples: np.ndarray, + x: np.ndarray, + plot_samples=True, + palette="Reds", + fill_alpha=0.8, + samples_alpha=0.1, + fill_kwargs=None, + samples_kwargs=None, +): + """A helper function for plotting 1D GP posteriors from trace + Parameters ---------- ax: axes @@ -95,7 +113,7 @@ def plot_gp_dist(ax, samples:np.ndarray, x:np.ndarray, plot_samples=True, palett Array of S posterior predictive sample from a GP. Expected shape: (S, X) x: numpy.ndarray - Grid of X values corresponding to the samples. + Grid of X values corresponding to the samples. Expected shape: (X,) or (X, 1), or (1, X) plot_samples: bool Plot the GP samples along with posterior (defaults True). @@ -123,9 +141,9 @@ def plot_gp_dist(ax, samples:np.ndarray, x:np.ndarray, plot_samples=True, palett samples_kwargs = {} if np.any(np.isnan(samples)): warnings.warn( - 'There are `nan` entries in the [samples] arguments. ' - 'The plot will not contain a band!', - UserWarning + "There are `nan` entries in the [samples] arguments. " + "The plot will not contain a band!", + UserWarning, ) cmap = plt.get_cmap(palette) @@ -135,13 +153,12 @@ def plot_gp_dist(ax, samples:np.ndarray, x:np.ndarray, plot_samples=True, palett x = x.flatten() for i, p in enumerate(percs[::-1]): upper = np.percentile(samples, p, axis=1) - lower = np.percentile(samples, 100-p, axis=1) + lower = np.percentile(samples, 100 - p, axis=1) color_val = colors[i] ax.fill_between(x, upper, lower, color=cmap(color_val), alpha=fill_alpha, **fill_kwargs) if plot_samples: # plot a few samples idx = np.random.randint(0, samples.shape[1], 30) - ax.plot(x, samples[:,idx], color=cmap(0.9), lw=1, alpha=samples_alpha, - **samples_kwargs) + ax.plot(x, samples[:, idx], color=cmap(0.9), lw=1, alpha=samples_alpha, **samples_kwargs) return ax diff --git a/pymc3/math.py b/pymc3/math.py index 770dbeebe2..dba3f938a6 100644 --- a/pymc3/math.py +++ b/pymc3/math.py @@ -382,9 +382,7 @@ class BlockDiagonalMatrix(Op): def __init__(self, sparse=False, format="csr"): if format not in ("csr", "csc"): - raise ValueError( - f"format must be one of: 'csr', 'csc', got {format}" - ) + raise ValueError(f"format must be one of: 'csr', 'csc', got {format}") self.sparse = sparse self.format = format @@ -395,9 +393,7 @@ def make_node(self, *matrices): if any(mat.type.ndim != 2 for mat in matrices): raise TypeError("all data arguments must be matrices") if self.sparse: - out_type = theano.sparse.matrix( - self.format, dtype=largest_common_dtype(matrices) - ) + out_type = theano.sparse.matrix(self.format, dtype=largest_common_dtype(matrices)) else: out_type = theano.tensor.matrix(dtype=largest_common_dtype(matrices)) return tt.Apply(self, matrices, [out_type]) diff --git a/pymc3/memoize.py b/pymc3/memoize.py index b43bca5b45..349d03188f 100644 --- a/pymc3/memoize.py +++ b/pymc3/memoize.py @@ -16,6 +16,7 @@ import pickle import collections from .util import biwrap + CACHE_REGISTRY = [] @@ -37,14 +38,15 @@ def memoizer(*args, **kwargs): else: # bound methods have self as first argument, remove it to compute key key = (hashable(args[1:]), hashable(kwargs)) - if not hasattr(args[0], '_cache'): - setattr(args[0], '_cache', collections.defaultdict(dict)) + if not hasattr(args[0], "_cache"): + setattr(args[0], "_cache", collections.defaultdict(dict)) # do not add to cache regestry - cache = getattr(args[0], '_cache')[obj.__name__] + cache = getattr(args[0], "_cache")[obj.__name__] if key not in cache: cache[key] = obj(*args, **kwargs) return cache[key] + return memoizer @@ -54,7 +56,7 @@ def clear_cache(obj=None): c.clear() else: if isinstance(obj, WithMemoization): - for v in getattr(obj, '_cache', {}).values(): + for v in getattr(obj, "_cache", {}).values(): v.clear() else: obj.cache.clear() @@ -66,7 +68,7 @@ def __hash__(self): def __getstate__(self): state = self.__dict__.copy() - state.pop('_cache', None) + state.pop("_cache", None) return state def __setstate__(self, state): @@ -87,7 +89,7 @@ def hashable(a): try: return hash(pickle.dumps(a)) except Exception: - if hasattr(a, '__dict__'): + if hasattr(a, "__dict__"): return hashable(a.__dict__) else: return id(a) diff --git a/pymc3/model_graph.py b/pymc3/model_graph.py index 93fc3a8aab..adede230ed 100644 --- a/pymc3/model_graph.py +++ b/pymc3/model_graph.py @@ -31,28 +31,30 @@ def __init__(self, model): self.model = model self.var_names = get_default_varnames(self.model.named_vars, include_transformed=False) self.var_list = self.model.named_vars.values() - self.transform_map = {v.transformed: v.name for v in self.var_list if hasattr(v, 'transformed')} + self.transform_map = { + v.transformed: v.name for v in self.var_list if hasattr(v, "transformed") + } self._deterministics = None def get_deterministics(self, var): """Compute the deterministic nodes of the graph, **not** including var itself.""" deterministics = [] - attrs = ('transformed', 'logpt') + attrs = ("transformed", "logpt") for v in self.var_list: if v != var and all(not hasattr(v, attr) for attr in attrs): deterministics.append(v) return deterministics def _get_ancestors(self, var: Tensor, func) -> Set[Tensor]: - """Get all ancestors of a function, doing some accounting for deterministics. - """ + """Get all ancestors of a function, doing some accounting for deterministics.""" # this contains all of the variables in the model EXCEPT var... vars = set(self.var_list) vars.remove(var) - blockers = set() # type: Set[Tensor] + blockers = set() # type: Set[Tensor] retval = set() # type: Set[Tensor] + def _expand(node) -> Optional[Iterator[Tensor]]: if node in blockers: return None @@ -66,14 +68,12 @@ def _expand(node) -> Optional[Iterator[Tensor]]: else: return None - stack_search(start = deque([func]), - expand=_expand, - mode='bfs') + stack_search(start=deque([func]), expand=_expand, mode="bfs") return retval def _filter_parents(self, var, parents) -> Set[VarName]: """Get direct parents of a var, as strings""" - keep = set() # type: Set[VarName] + keep = set() # type: Set[VarName] for p in parents: if p == var: continue @@ -83,14 +83,14 @@ def _filter_parents(self, var, parents) -> Set[VarName]: if self.transform_map[p] != var.name: keep.add(self.transform_map[p]) else: - raise AssertionError('Do not know what to do with {}'.format(get_var_name(p))) + raise AssertionError("Do not know what to do with {}".format(get_var_name(p))) return keep def get_parents(self, var: Tensor) -> Set[VarName]: """Get the named nodes that are direct inputs to the var""" - if hasattr(var, 'transformed'): + if hasattr(var, "transformed"): func = var.transformed.logpt - elif hasattr(var, 'logpt'): + elif hasattr(var, "logpt"): func = var.logpt else: func = var @@ -100,7 +100,8 @@ def get_parents(self, var: Tensor) -> Set[VarName]: def make_compute_graph(self) -> Dict[str, Set[VarName]]: """Get map of var_name -> set(input var names) for the model""" - input_map = {} # type: Dict[str, Set[VarName]] + input_map = {} # type: Dict[str, Set[VarName]] + def update_input_map(key: str, val: Set[VarName]): if key in input_map: input_map[key] = input_map[key].union(val) @@ -127,31 +128,29 @@ def _make_node(self, var_name, graph): # styling for node attrs = {} if isinstance(v, pm.model.ObservedRV): - attrs['style'] = 'filled' + attrs["style"] = "filled" # make Data be roundtangle, instead of rectangle if isinstance(v, SharedVariable): - attrs['style'] = 'rounded, filled' + attrs["style"] = "rounded, filled" # Get name for node if v in self.model.potentials: - distribution = 'Potential' - attrs['shape'] = 'octagon' - elif hasattr(v, 'distribution'): + distribution = "Potential" + attrs["shape"] = "octagon" + elif hasattr(v, "distribution"): distribution = v.distribution.__class__.__name__ elif isinstance(v, SharedVariable): - distribution = 'Data' - attrs['shape'] = 'box' + distribution = "Data" + attrs["shape"] = "box" else: - distribution = 'Deterministic' - attrs['shape'] = 'box' + distribution = "Deterministic" + attrs["shape"] = "box" - graph.node(var_name.replace(':', '&'), - f'{var_name}\n~\n{distribution}', - **attrs) + graph.node(var_name.replace(":", "&"), f"{var_name}\n~\n{distribution}", **attrs) def get_plates(self): - """ Rough but surprisingly accurate plate detection. + """Rough but surprisingly accurate plate detection. Just groups by the shape of the underlying distribution. Will be wrong if there are two plates with the same shape. @@ -163,14 +162,14 @@ def get_plates(self): plates = {} for var_name in self.var_names: v = self.model[var_name] - if hasattr(v, 'observations'): + if hasattr(v, "observations"): try: # To get shape of _observed_ data container `pm.Data` # (wrapper for theano.SharedVariable) we evaluate it. shape = tuple(v.observations.shape.eval()) except AttributeError: shape = v.observations.shape - elif hasattr(v, 'dshape'): + elif hasattr(v, "dshape"): shape = v.dshape else: shape = v.tag.test_value.shape @@ -191,28 +190,30 @@ def make_graph(self): try: import graphviz except ImportError: - raise ImportError('This function requires the python library graphviz, along with binaries. ' - 'The easiest way to install all of this is by running\n\n' - '\tconda install -c conda-forge python-graphviz') + raise ImportError( + "This function requires the python library graphviz, along with binaries. " + "The easiest way to install all of this is by running\n\n" + "\tconda install -c conda-forge python-graphviz" + ) graph = graphviz.Digraph(self.model.name) for shape, var_names in self.get_plates().items(): if isinstance(shape, SharedVariable): shape = shape.eval() - label = ' x '.join(map('{:,d}'.format, shape)) + label = " x ".join(map("{:,d}".format, shape)) if label: # must be preceded by 'cluster' to get a box around it - with graph.subgraph(name='cluster' + label) as sub: + with graph.subgraph(name="cluster" + label) as sub: for var_name in var_names: self._make_node(var_name, sub) # plate label goes bottom right - sub.attr(label=label, labeljust='r', labelloc='b', style='rounded') + sub.attr(label=label, labeljust="r", labelloc="b", style="rounded") else: for var_name in var_names: self._make_node(var_name, graph) for key, values in self.make_compute_graph().items(): for value in values: - graph.edge(value.replace(':', '&'), key.replace(':', '&')) + graph.edge(value.replace(":", "&"), key.replace(":", "&")) return graph diff --git a/pymc3/ode/ode.py b/pymc3/ode/ode.py index 21074e4822..da283ff39c 100644 --- a/pymc3/ode/ode.py +++ b/pymc3/ode/ode.py @@ -20,7 +20,7 @@ from ..ode import utils from ..exceptions import ShapeError, DtypeError -_log = logging.getLogger('pymc3') +_log = logging.getLogger("pymc3") floatX = theano.config.floatX @@ -48,24 +48,26 @@ class DifferentialEquation(theano.Op): Examples -------- - + .. code-block:: python def odefunc(y, t, p): #Logistic differential equation return p[0] * y[0] * (1 - y[0]) - + times = np.arange(0.5, 5, 0.5) ode_model = DifferentialEquation(func=odefunc, times=times, n_states=1, n_theta=1, t0=0) """ _itypes = [ - tt.TensorType(floatX, (False,)), # y0 as 1D floatX vector - tt.TensorType(floatX, (False,)) # theta as 1D floatX vector + tt.TensorType(floatX, (False,)), # y0 as 1D floatX vector + tt.TensorType(floatX, (False,)), # theta as 1D floatX vector ] _otypes = [ - tt.TensorType(floatX, (False, False)), # model states as floatX of shape (T, S) - tt.TensorType(floatX, (False, False, False)), # sensitivities as floatX of shape (T, S, len(y0) + len(theta)) + tt.TensorType(floatX, (False, False)), # model states as floatX of shape (T, S) + tt.TensorType( + floatX, (False, False, False) + ), # sensitivities as floatX of shape (T, S, len(y0) + len(theta)) ] __props__ = ("func", "times", "n_states", "n_theta", "t0") @@ -107,7 +109,7 @@ def _system(self, Y, t, p): p : array parameter vector (y0, theta) """ - dydt, ddt_dydp = self._augmented_func(Y[:self.n_states], t, p, Y[self.n_states:]) + dydt, ddt_dydp = self._augmented_func(Y[: self.n_states], t, p, Y[self.n_states :]) derivatives = np.concatenate([dydt, ddt_dydp]) return derivatives @@ -120,16 +122,16 @@ def _simulate(self, y0, theta): func=self._system, y0=s0, t=self._augmented_times, args=(np.concatenate([y0, theta]),) ).astype(floatX) # The solution - y = sol[1:, :self.n_states] + y = sol[1:, : self.n_states] # The sensitivities, reshaped to be a sequence of matrices - sens = sol[1:, self.n_states:].reshape(self.n_times, self.n_states, self.n_p) + sens = sol[1:, self.n_states :].reshape(self.n_times, self.n_states, self.n_p) return y, sens def make_node(self, y0, theta): inputs = (y0, theta) - _log.debug('make_node for inputs {}'.format(hash(inputs))) + _log.debug("make_node for inputs {}".format(hash(inputs))) states = self._otypes[0]() sens = self._otypes[1]() @@ -139,46 +141,65 @@ def make_node(self, y0, theta): def __call__(self, y0, theta, return_sens=False, **kwargs): if isinstance(y0, (list, tuple)) and not len(y0) == self.n_states: - raise ShapeError('Length of y0 is wrong.', actual=(len(y0),), expected=(self.n_states,)) + raise ShapeError("Length of y0 is wrong.", actual=(len(y0),), expected=(self.n_states,)) if isinstance(theta, (list, tuple)) and not len(theta) == self.n_theta: - raise ShapeError('Length of theta is wrong.', actual=(len(theta),), expected=(self.n_theta,)) - + raise ShapeError( + "Length of theta is wrong.", actual=(len(theta),), expected=(self.n_theta,) + ) + # convert inputs to tensors (and check their types) y0 = tt.cast(tt.unbroadcast(tt.as_tensor_variable(y0), 0), floatX) theta = tt.cast(tt.unbroadcast(tt.as_tensor_variable(theta), 0), floatX) inputs = [y0, theta] for i, (input_val, itype) in enumerate(zip(inputs, self._itypes)): if not input_val.type == itype: - raise ValueError(f'Input {i} of type {input_val.type} does not have the expected type of {itype}') - + raise ValueError( + f"Input {i} of type {input_val.type} does not have the expected type of {itype}" + ) + # use default implementation to prepare symbolic outputs (via make_node) states, sens = super(theano.Op, self).__call__(y0, theta, **kwargs) - if theano.config.compute_test_value != 'off': + if theano.config.compute_test_value != "off": # compute test values from input test values test_states, test_sens = self._simulate( - y0=self._get_test_value(y0), - theta=self._get_test_value(theta) + y0=self._get_test_value(y0), theta=self._get_test_value(theta) ) # check types of simulation result if not test_states.dtype == self._otypes[0].dtype: - raise DtypeError('Simulated states have the wrong type.', actual=test_states.dtype, expected=self._otypes[0].dtype) + raise DtypeError( + "Simulated states have the wrong type.", + actual=test_states.dtype, + expected=self._otypes[0].dtype, + ) if not test_sens.dtype == self._otypes[1].dtype: - raise DtypeError('Simulated sensitivities have the wrong type.', actual=test_sens.dtype, expected=self._otypes[1].dtype) + raise DtypeError( + "Simulated sensitivities have the wrong type.", + actual=test_sens.dtype, + expected=self._otypes[1].dtype, + ) # check shapes of simulation result expected_states_shape = (self.n_times, self.n_states) expected_sens_shape = (self.n_times, self.n_states, self.n_p) if not test_states.shape == expected_states_shape: - raise ShapeError('Simulated states have the wrong shape.', test_states.shape, expected_states_shape) + raise ShapeError( + "Simulated states have the wrong shape.", + test_states.shape, + expected_states_shape, + ) if not test_sens.shape == expected_sens_shape: - raise ShapeError('Simulated sensitivities have the wrong shape.', test_sens.shape, expected_sens_shape) + raise ShapeError( + "Simulated sensitivities have the wrong shape.", + test_sens.shape, + expected_sens_shape, + ) # attach results as test values to the outputs states.tag.test_value = test_states sens.tag.test_value = test_sens - + if return_sens: return states, sens return states @@ -194,25 +215,22 @@ def infer_shape(self, node, input_shapes): return output_shapes def grad(self, inputs, output_grads): - _log.debug('grad w.r.t. inputs {}'.format(hash(tuple(inputs)))) - + _log.debug("grad w.r.t. inputs {}".format(hash(tuple(inputs)))) + # fetch symbolic sensitivity output node from cache ihash = hash(tuple(inputs)) if ihash in self._output_sensitivities: sens = self._output_sensitivities[ihash] else: - _log.debug('No cached sensitivities found!') + _log.debug("No cached sensitivities found!") _, sens = self.__call__(*inputs, return_sens=True) ograds = output_grads[0] - + # for each parameter, multiply sensitivities with the output gradient and sum the result # sens is (n_times, n_states, n_p) # ograds is (n_times, n_states) - grads = [ - tt.sum(sens[:,:,p] * ograds) - for p in range(self.n_p) - ] - + grads = [tt.sum(sens[:, :, p] * ograds) for p in range(self.n_p)] + # return separate gradient tensors for y0 and theta inputs - result = tt.stack(grads[:self.n_states]), tt.stack(grads[self.n_states:]) + result = tt.stack(grads[: self.n_states]), tt.stack(grads[self.n_states :]) return result diff --git a/pymc3/ode/utils.py b/pymc3/ode/utils.py index 5e9e69d475..141c5503f1 100644 --- a/pymc3/ode/utils.py +++ b/pymc3/ode/utils.py @@ -18,43 +18,43 @@ def make_sens_ic(n_states, n_theta, floatX): - r""" - The sensitivity matrix will always have consistent form. (n_states, n_states + n_theta) + r""" + The sensitivity matrix will always have consistent form. (n_states, n_states + n_theta) - If the first n_states entries of the parameters vector in the simulate call - correspond to initial conditions of the system, - then the first n_states columns of the sensitivity matrix should form - an identity matrix. + If the first n_states entries of the parameters vector in the simulate call + correspond to initial conditions of the system, + then the first n_states columns of the sensitivity matrix should form + an identity matrix. - If the last n_theta entries of the parameters vector in the simulate call - correspond to ode paramaters, then the last n_theta columns in - the sensitivity matrix will be 0. + If the last n_theta entries of the parameters vector in the simulate call + correspond to ode paramaters, then the last n_theta columns in + the sensitivity matrix will be 0. - Parameters - ---------- - n_states : int - Number of state variables in the ODE - n_theta : int - Number of ODE parameters - floatX : str - dtype to be used for the array + Parameters + ---------- + n_states : int + Number of state variables in the ODE + n_theta : int + Number of ODE parameters + floatX : str + dtype to be used for the array - Returns - ------- - dydp : array - 1D-array of shape (n_states * (n_states + n_theta),), representing the initial condition of the sensitivities - """ + Returns + ------- + dydp : array + 1D-array of shape (n_states * (n_states + n_theta),), representing the initial condition of the sensitivities + """ - # Initialize the sensitivity matrix to be 0 everywhere - sens_matrix = np.zeros((n_states, n_states + n_theta), dtype=floatX) + # Initialize the sensitivity matrix to be 0 everywhere + sens_matrix = np.zeros((n_states, n_states + n_theta), dtype=floatX) - # Slip in the identity matrix in the appropirate place - sens_matrix[:,:n_states] = np.eye(n_states, dtype=floatX) + # Slip in the identity matrix in the appropirate place + sens_matrix[:, :n_states] = np.eye(n_states, dtype=floatX) - # We need the sensitivity matrix to be a vector (see augmented_function) - # Ravel and return - dydp = sens_matrix.ravel() - return dydp + # We need the sensitivity matrix to be a vector (see augmented_function) + # Ravel and return + dydp = sens_matrix.ravel() + return dydp def augment_system(ode_func, n_states, n_theta): @@ -83,21 +83,21 @@ def augment_system(ode_func, n_states, n_theta): """ # Present state of the system - t_y = tt.vector("y", dtype='float64') - t_y.tag.test_value = np.ones((n_states,), dtype='float64') + t_y = tt.vector("y", dtype="float64") + t_y.tag.test_value = np.ones((n_states,), dtype="float64") # Parameter(s). Should be vector to allow for generaliztion to multiparameter # systems of ODEs. Is m dimensional because it includes all initial conditions as well as ode parameters - t_p = tt.vector("p", dtype='float64') - t_p.tag.test_value = np.ones((n_states + n_theta,), dtype='float64') + t_p = tt.vector("p", dtype="float64") + t_p.tag.test_value = np.ones((n_states + n_theta,), dtype="float64") # Time. Allow for non-automonous systems of ODEs to be analyzed - t_t = tt.scalar("t", dtype='float64') + t_t = tt.scalar("t", dtype="float64") t_t.tag.test_value = 2.459 # Present state of the gradients: # Will always be 0 unless the parameter is the inital condition # Entry i,j is partial of y[i] wrt to p[j] - dydp_vec = tt.vector("dydp", dtype='float64') - dydp_vec.tag.test_value = make_sens_ic(n_states, n_theta, 'float64') + dydp_vec = tt.vector("dydp", dtype="float64") + dydp_vec.tag.test_value = make_sens_ic(n_states, n_theta, "float64") dydp = dydp_vec.reshape((n_states, n_states + n_theta)) @@ -119,9 +119,7 @@ def augment_system(ode_func, n_states, n_theta): ddt_dydp = (Jdfdy + grad_f).flatten() system = theano.function( - inputs=[t_y, t_t, t_p, dydp_vec], - outputs=[t_yhat, ddt_dydp], - on_unused_input="ignore" + inputs=[t_y, t_t, t_p, dydp_vec], outputs=[t_yhat, ddt_dydp], on_unused_input="ignore" ) return system diff --git a/pymc3/parallel_sampling.py b/pymc3/parallel_sampling.py index fb4b464b01..c1f13f9bbd 100644 --- a/pymc3/parallel_sampling.py +++ b/pymc3/parallel_sampling.py @@ -109,18 +109,16 @@ def _unpickle_step_method(self): "or forkserver." ) if self._step_method_is_pickled: - if self._pickle_backend == 'pickle': + if self._pickle_backend == "pickle": try: self._step_method = pickle.loads(self._step_method) except Exception: raise ValueError(unpickle_error) - elif self._pickle_backend == 'dill': + elif self._pickle_backend == "dill": try: import dill except ImportError: - raise ValueError( - "dill must be installed for pickle_backend='dill'." - ) + raise ValueError("dill must be installed for pickle_backend='dill'.") try: self._step_method = dill.loads(self._step_method) except Exception: @@ -206,9 +204,7 @@ def _start_loop(self): warns = self._collect_warnings() else: warns = None - self._msg_pipe.send( - ("writing_done", is_last, draw, tuning, stats, warns) - ) + self._msg_pipe.send(("writing_done", is_last, draw, tuning, stats, warns)) draw += 1 else: raise ValueError("Unknown message " + msg[0]) @@ -289,7 +285,7 @@ def __init__( tune, seed, pickle_backend, - ) + ), ) self._process.start() # Close the remote pipe, so that we get notified if the other @@ -318,11 +314,7 @@ def _send(self, msg, *args): if message is not None and message[0] == "error": warns, old_error = message[1:] if warns is not None: - error = ParallelSamplingError( - str(old_error), - self.chain, - warns - ) + error = ParallelSamplingError(str(old_error), self.chain, warns) else: error = RuntimeError("Chain %s failed." % self.chain) raise error from old_error @@ -387,8 +379,7 @@ def terminate_all(processes, patience=2): process.join(timeout) except multiprocessing.TimeoutError: logger.warn( - "Chain processes did not terminate as expected. " - "Terminating forcefully..." + "Chain processes did not terminate as expected. " "Terminating forcefully..." ) for process in processes: process.terminate() @@ -396,9 +387,7 @@ def terminate_all(processes, patience=2): process.join() -Draw = namedtuple( - "Draw", ["chain", "is_last", "draw_idx", "tuning", "stats", "point", "warnings"] -) +Draw = namedtuple("Draw", ["chain", "is_last", "draw_idx", "tuning", "stats", "point", "warnings"]) class ParallelSampler: @@ -414,7 +403,7 @@ def __init__( start_chain_num: int = 0, progressbar: bool = True, mp_ctx=None, - pickle_backend: str = 'pickle', + pickle_backend: str = "pickle", ): if any(len(arg) != chains for arg in [seeds, start_points]): @@ -422,21 +411,19 @@ def __init__( if mp_ctx is None or isinstance(mp_ctx, str): # Closes issue https://github.com/pymc-devs/pymc3/issues/3849 - if platform.system() == 'Darwin': + if platform.system() == "Darwin": mp_ctx = "forkserver" mp_ctx = multiprocessing.get_context(mp_ctx) step_method_pickled = None - if mp_ctx.get_start_method() != 'fork': - if pickle_backend == 'pickle': + if mp_ctx.get_start_method() != "fork": + if pickle_backend == "pickle": step_method_pickled = pickle.dumps(step_method, protocol=-1) - elif pickle_backend == 'dill': + elif pickle_backend == "dill": try: import dill except ImportError: - raise ValueError( - "dill must be installed for pickle_backend='dill'." - ) + raise ValueError("dill must be installed for pickle_backend='dill'.") step_method_pickled = dill.dumps(step_method, protocol=-1) self._samplers = [ @@ -449,7 +436,7 @@ def __init__( seed, start, mp_ctx, - pickle_backend + pickle_backend, ) for chain, seed, start in zip(range(chains), seeds, start_points) ] @@ -468,9 +455,7 @@ def __init__( self._desc = "Sampling {0._chains:d} chains, {0._divergences:,d} divergences" self._chains = chains if progressbar: - self._progress = progress_bar( - range(chains * (draws + tune)), display=progressbar - ) + self._progress = progress_bar(range(chains * (draws + tune)), display=progressbar) self._progress.comment = self._desc.format(self) def _make_active(self): diff --git a/pymc3/plots/posteriorplot.py b/pymc3/plots/posteriorplot.py index 76e3b3743a..fc44e914c8 100644 --- a/pymc3/plots/posteriorplot.py +++ b/pymc3/plots/posteriorplot.py @@ -18,6 +18,7 @@ pass import numpy as np + def plot_posterior_predictive_glm(trace, eval=None, lm=None, samples=30, **kwargs): """Plot posterior predictive of a linear model. :Arguments: @@ -35,21 +36,21 @@ def plot_posterior_predictive_glm(trace, eval=None, lm=None, samples=30, **kwarg Additional keyword arguments are passed to pylab.plot(). """ if lm is None: - lm = lambda x, sample: sample['Intercept'] + sample['x'] * x + lm = lambda x, sample: sample["Intercept"] + sample["x"] * x if eval is None: eval = np.linspace(0, 1, 100) # Set default plotting arguments - if 'lw' not in kwargs and 'linewidth' not in kwargs: - kwargs['lw'] = .2 - if 'c' not in kwargs and 'color' not in kwargs: - kwargs['c'] = 'k' + if "lw" not in kwargs and "linewidth" not in kwargs: + kwargs["lw"] = 0.2 + if "c" not in kwargs and "color" not in kwargs: + kwargs["c"] = "k" for rand_loc in np.random.randint(0, len(trace), samples): rand_sample = trace[rand_loc] plt.plot(eval, lm(eval, rand_sample), **kwargs) - # Make sure to not plot label multiple times - kwargs.pop('label', None) + # Make sure to not plot label multiple times + kwargs.pop("label", None) - plt.title('Posterior predictive') + plt.title("Posterior predictive") diff --git a/pymc3/tests/test_mixture.py b/pymc3/tests/test_mixture.py index 308a1aa2e5..4dfd9fb2ca 100644 --- a/pymc3/tests/test_mixture.py +++ b/pymc3/tests/test_mixture.py @@ -18,8 +18,20 @@ from .helpers import SeededTest import pymc3 as pm -from pymc3 import Dirichlet, Gamma, Normal, Lognormal, Poisson, Exponential, \ - Mixture, NormalMixture, MvNormal, sample, Metropolis, Model +from pymc3 import ( + Dirichlet, + Gamma, + Normal, + Lognormal, + Poisson, + Exponential, + Mixture, + NormalMixture, + MvNormal, + sample, + Metropolis, + Model, +) import scipy.stats as st from scipy.special import logsumexp from pymc3.theanof import floatX @@ -58,12 +70,12 @@ def setup_class(cls): super().setup_class() cls.norm_w = np.array([0.75, 0.25]) - cls.norm_mu = np.array([0., 5.]) + cls.norm_mu = np.array([0.0, 5.0]) cls.norm_sd = np.ones_like(cls.norm_mu) cls.norm_x = generate_normal_mixture_data(cls.norm_w, cls.norm_mu, cls.norm_sd, size=1000) cls.pois_w = np.array([0.4, 0.6]) - cls.pois_mu = np.array([5., 20.]) + cls.pois_mu = np.array([5.0, 20.0]) cls.pois_x = generate_poisson_mixture_data(cls.pois_w, cls.pois_mu, size=1000) def test_dimensions(self): @@ -79,79 +91,68 @@ def test_dimensions(self): def test_mixture_list_of_normals(self): with Model() as model: - w = Dirichlet('w', floatX(np.ones_like(self.norm_w)), shape=self.norm_w.size) - mu = Normal('mu', 0., 10., shape=self.norm_w.size) - tau = Gamma('tau', 1., 1., shape=self.norm_w.size) - Mixture('x_obs', w, - [Normal.dist(mu[0], tau=tau[0]), Normal.dist(mu[1], tau=tau[1])], - observed=self.norm_x) + w = Dirichlet("w", floatX(np.ones_like(self.norm_w)), shape=self.norm_w.size) + mu = Normal("mu", 0.0, 10.0, shape=self.norm_w.size) + tau = Gamma("tau", 1.0, 1.0, shape=self.norm_w.size) + Mixture( + "x_obs", + w, + [Normal.dist(mu[0], tau=tau[0]), Normal.dist(mu[1], tau=tau[1])], + observed=self.norm_x, + ) step = Metropolis() - trace = sample(5000, step, random_seed=self.random_seed, - progressbar=False, chains=1) + trace = sample(5000, step, random_seed=self.random_seed, progressbar=False, chains=1) - assert_allclose(np.sort(trace['w'].mean(axis=0)), - np.sort(self.norm_w), - rtol=0.1, atol=0.1) - assert_allclose(np.sort(trace['mu'].mean(axis=0)), - np.sort(self.norm_mu), - rtol=0.1, atol=0.1) + assert_allclose(np.sort(trace["w"].mean(axis=0)), np.sort(self.norm_w), rtol=0.1, atol=0.1) + assert_allclose( + np.sort(trace["mu"].mean(axis=0)), np.sort(self.norm_mu), rtol=0.1, atol=0.1 + ) def test_normal_mixture(self): with Model() as model: - w = Dirichlet('w', floatX(np.ones_like(self.norm_w)), shape=self.norm_w.size) - mu = Normal('mu', 0., 10., shape=self.norm_w.size) - tau = Gamma('tau', 1., 1., shape=self.norm_w.size) - NormalMixture('x_obs', w, mu, tau=tau, observed=self.norm_x) + w = Dirichlet("w", floatX(np.ones_like(self.norm_w)), shape=self.norm_w.size) + mu = Normal("mu", 0.0, 10.0, shape=self.norm_w.size) + tau = Gamma("tau", 1.0, 1.0, shape=self.norm_w.size) + NormalMixture("x_obs", w, mu, tau=tau, observed=self.norm_x) step = Metropolis() - trace = sample(5000, step, random_seed=self.random_seed, - progressbar=False, chains=1) - - assert_allclose(np.sort(trace['w'].mean(axis=0)), - np.sort(self.norm_w), - rtol=0.1, atol=0.1) - assert_allclose(np.sort(trace['mu'].mean(axis=0)), - np.sort(self.norm_mu), - rtol=0.1, atol=0.1) - - @pytest.mark.parametrize('nd,ncomp', - [(tuple(), 5), - (1, 5), - (3, 5), - ((3, 3), 5), - (3, 3), - ((3, 3), 3)], - ids=str) + trace = sample(5000, step, random_seed=self.random_seed, progressbar=False, chains=1) + + assert_allclose(np.sort(trace["w"].mean(axis=0)), np.sort(self.norm_w), rtol=0.1, atol=0.1) + assert_allclose( + np.sort(trace["mu"].mean(axis=0)), np.sort(self.norm_mu), rtol=0.1, atol=0.1 + ) + + @pytest.mark.parametrize( + "nd,ncomp", [(tuple(), 5), (1, 5), (3, 5), ((3, 3), 5), (3, 3), ((3, 3), 3)], ids=str + ) def test_normal_mixture_nd(self, nd, ncomp): nd = to_tuple(nd) ncomp = int(ncomp) comp_shape = nd + (ncomp,) test_mus = np.random.randn(*comp_shape) test_taus = np.random.gamma(1, 1, size=comp_shape) - observed = generate_normal_mixture_data(w=np.ones(ncomp)/ncomp, - mu=test_mus, - sd=1/np.sqrt(test_taus), - size=10) + observed = generate_normal_mixture_data( + w=np.ones(ncomp) / ncomp, mu=test_mus, sd=1 / np.sqrt(test_taus), size=10 + ) with Model() as model0: - mus = Normal('mus', shape=comp_shape) - taus = Gamma('taus', alpha=1, beta=1, shape=comp_shape) - ws = Dirichlet('ws', np.ones(ncomp), shape=(ncomp,)) - mixture0 = NormalMixture('m', w=ws, mu=mus, tau=taus, shape=nd, - comp_shape=comp_shape) - obs0 = NormalMixture('obs', w=ws, mu=mus, tau=taus, shape=nd, - comp_shape=comp_shape, - observed=observed) + mus = Normal("mus", shape=comp_shape) + taus = Gamma("taus", alpha=1, beta=1, shape=comp_shape) + ws = Dirichlet("ws", np.ones(ncomp), shape=(ncomp,)) + mixture0 = NormalMixture("m", w=ws, mu=mus, tau=taus, shape=nd, comp_shape=comp_shape) + obs0 = NormalMixture( + "obs", w=ws, mu=mus, tau=taus, shape=nd, comp_shape=comp_shape, observed=observed + ) with Model() as model1: - mus = Normal('mus', shape=comp_shape) - taus = Gamma('taus', alpha=1, beta=1, shape=comp_shape) - ws = Dirichlet('ws', np.ones(ncomp), shape=(ncomp,)) - comp_dist = [Normal.dist(mu=mus[..., i], tau=taus[..., i], - shape=nd) - for i in range(ncomp)] - mixture1 = Mixture('m', w=ws, comp_dists=comp_dist, shape=nd) - obs1 = Mixture('obs', w=ws, comp_dists=comp_dist, shape=nd, - observed=observed) + mus = Normal("mus", shape=comp_shape) + taus = Gamma("taus", alpha=1, beta=1, shape=comp_shape) + ws = Dirichlet("ws", np.ones(ncomp), shape=(ncomp,)) + comp_dist = [ + Normal.dist(mu=mus[..., i], tau=taus[..., i], shape=nd) for i in range(ncomp) + ] + mixture1 = Mixture("m", w=ws, comp_dists=comp_dist, shape=nd) + obs1 = Mixture("obs", w=ws, comp_dists=comp_dist, shape=nd, observed=observed) with Model() as model2: # Expected to fail if comp_shape is not provided, @@ -161,21 +162,18 @@ def test_normal_mixture_nd(self, nd, ncomp): # Furthermore, the Mixture will also raise errors when the observed # data is multidimensional but it does not broadcast well with # comp_dists. - mus = Normal('mus', shape=comp_shape) - taus = Gamma('taus', alpha=1, beta=1, shape=comp_shape) - ws = Dirichlet('ws', np.ones(ncomp), shape=(ncomp,)) + mus = Normal("mus", shape=comp_shape) + taus = Gamma("taus", alpha=1, beta=1, shape=comp_shape) + ws = Dirichlet("ws", np.ones(ncomp), shape=(ncomp,)) if len(nd) > 1: if nd[-1] != ncomp: with pytest.raises(ValueError): - NormalMixture('m', w=ws, mu=mus, tau=taus, - shape=nd) + NormalMixture("m", w=ws, mu=mus, tau=taus, shape=nd) mixture2 = None else: - mixture2 = NormalMixture('m', w=ws, mu=mus, tau=taus, - shape=nd) + mixture2 = NormalMixture("m", w=ws, mu=mus, tau=taus, shape=nd) else: - mixture2 = NormalMixture('m', w=ws, mu=mus, tau=taus, - shape=nd) + mixture2 = NormalMixture("m", w=ws, mu=mus, tau=taus, shape=nd) observed_fails = False if len(nd) >= 1 and nd != (1,): try: @@ -184,18 +182,14 @@ def test_normal_mixture_nd(self, nd, ncomp): observed_fails = True if observed_fails: with pytest.raises(ValueError): - NormalMixture('obs', w=ws, mu=mus, tau=taus, - shape=nd, - observed=observed) + NormalMixture("obs", w=ws, mu=mus, tau=taus, shape=nd, observed=observed) obs2 = None else: - obs2 = NormalMixture('obs', w=ws, mu=mus, tau=taus, - shape=nd, - observed=observed) + obs2 = NormalMixture("obs", w=ws, mu=mus, tau=taus, shape=nd, observed=observed) testpoint = model0.test_point - testpoint['mus'] = test_mus - testpoint['taus'] = test_taus + testpoint["mus"] = test_mus + testpoint["taus"] = test_taus assert_allclose(model0.logp(testpoint), model1.logp(testpoint)) assert_allclose(mixture0.logp(testpoint), mixture1.logp(testpoint)) assert_allclose(obs0.logp(testpoint), obs1.logp(testpoint)) @@ -208,74 +202,66 @@ def test_normal_mixture_nd(self, nd, ncomp): def test_poisson_mixture(self): with Model() as model: - w = Dirichlet('w', floatX(np.ones_like(self.pois_w)), shape=self.pois_w.shape) - mu = Gamma('mu', 1., 1., shape=self.pois_w.size) - Mixture('x_obs', w, Poisson.dist(mu), observed=self.pois_x) + w = Dirichlet("w", floatX(np.ones_like(self.pois_w)), shape=self.pois_w.shape) + mu = Gamma("mu", 1.0, 1.0, shape=self.pois_w.size) + Mixture("x_obs", w, Poisson.dist(mu), observed=self.pois_x) step = Metropolis() - trace = sample(5000, step, random_seed=self.random_seed, - progressbar=False, chains=1) + trace = sample(5000, step, random_seed=self.random_seed, progressbar=False, chains=1) - assert_allclose(np.sort(trace['w'].mean(axis=0)), - np.sort(self.pois_w), - rtol=0.1, atol=0.1) - assert_allclose(np.sort(trace['mu'].mean(axis=0)), - np.sort(self.pois_mu), - rtol=0.1, atol=0.1) + assert_allclose(np.sort(trace["w"].mean(axis=0)), np.sort(self.pois_w), rtol=0.1, atol=0.1) + assert_allclose( + np.sort(trace["mu"].mean(axis=0)), np.sort(self.pois_mu), rtol=0.1, atol=0.1 + ) def test_mixture_list_of_poissons(self): with Model() as model: - w = Dirichlet('w', floatX(np.ones_like(self.pois_w)), shape=self.pois_w.shape) - mu = Gamma('mu', 1., 1., shape=self.pois_w.size) - Mixture('x_obs', w, - [Poisson.dist(mu[0]), Poisson.dist(mu[1])], - observed=self.pois_x) + w = Dirichlet("w", floatX(np.ones_like(self.pois_w)), shape=self.pois_w.shape) + mu = Gamma("mu", 1.0, 1.0, shape=self.pois_w.size) + Mixture("x_obs", w, [Poisson.dist(mu[0]), Poisson.dist(mu[1])], observed=self.pois_x) step = Metropolis() - trace = sample(5000, step, random_seed=self.random_seed, - progressbar=False, chains=1) + trace = sample(5000, step, random_seed=self.random_seed, progressbar=False, chains=1) - assert_allclose(np.sort(trace['w'].mean(axis=0)), - np.sort(self.pois_w), - rtol=0.1, atol=0.1) - assert_allclose(np.sort(trace['mu'].mean(axis=0)), - np.sort(self.pois_mu), - rtol=0.1, atol=0.1) + assert_allclose(np.sort(trace["w"].mean(axis=0)), np.sort(self.pois_w), rtol=0.1, atol=0.1) + assert_allclose( + np.sort(trace["mu"].mean(axis=0)), np.sort(self.pois_mu), rtol=0.1, atol=0.1 + ) def test_mixture_of_mvn(self): - mu1 = np.asarray([0., 1.]) + mu1 = np.asarray([0.0, 1.0]) cov1 = np.diag([1.5, 2.5]) - mu2 = np.asarray([1., 0.]) + mu2 = np.asarray([1.0, 0.0]) cov2 = np.diag([2.5, 3.5]) - obs = np.asarray([[.5, .5], mu1, mu2]) + obs = np.asarray([[0.5, 0.5], mu1, mu2]) with Model() as model: - w = Dirichlet('w', floatX(np.ones(2)), transform=None, shape=(2,)) + w = Dirichlet("w", floatX(np.ones(2)), transform=None, shape=(2,)) mvncomp1 = MvNormal.dist(mu=mu1, cov=cov1) mvncomp2 = MvNormal.dist(mu=mu2, cov=cov2) - y = Mixture('x_obs', w, [mvncomp1, mvncomp2], - observed=obs) + y = Mixture("x_obs", w, [mvncomp1, mvncomp2], observed=obs) # check logp of each component - complogp_st = np.vstack((st.multivariate_normal.logpdf(obs, mu1, cov1), - st.multivariate_normal.logpdf(obs, mu2, cov2)) - ).T + complogp_st = np.vstack( + ( + st.multivariate_normal.logpdf(obs, mu1, cov1), + st.multivariate_normal.logpdf(obs, mu2, cov2), + ) + ).T complogp = y.distribution._comp_logp(theano.shared(obs)).eval() assert_allclose(complogp, complogp_st) # check logp of mixture testpoint = model.test_point - mixlogp_st = logsumexp(np.log(testpoint['w']) + complogp_st, - axis=-1, keepdims=False) - assert_allclose(y.logp_elemwise(testpoint), - mixlogp_st) + mixlogp_st = logsumexp(np.log(testpoint["w"]) + complogp_st, axis=-1, keepdims=False) + assert_allclose(y.logp_elemwise(testpoint), mixlogp_st) # check logp of model - priorlogp = st.dirichlet.logpdf(x=testpoint['w'], - alpha=np.ones(2), - ) - assert_allclose(model.logp(testpoint), - mixlogp_st.sum() + priorlogp) + priorlogp = st.dirichlet.logpdf( + x=testpoint["w"], + alpha=np.ones(2), + ) + assert_allclose(model.logp(testpoint), mixlogp_st.sum() + priorlogp) def test_mixture_of_mixture(self): - if theano.config.floatX == 'float32': + if theano.config.floatX == "float32": rtol = 1e-4 else: rtol = 1e-7 @@ -283,79 +269,70 @@ def test_mixture_of_mixture(self): with Model() as model: # mixtures components g_comp = Normal.dist( - mu=Exponential('mu_g', lam=1.0, shape=nbr, transform=None), - sigma=1, - shape=nbr) + mu=Exponential("mu_g", lam=1.0, shape=nbr, transform=None), sigma=1, shape=nbr + ) l_comp = Lognormal.dist( - mu=Exponential('mu_l', lam=1.0, shape=nbr, transform=None), - sigma=1, - shape=nbr) + mu=Exponential("mu_l", lam=1.0, shape=nbr, transform=None), sigma=1, shape=nbr + ) # weight vector for the mixtures - g_w = Dirichlet('g_w', a=floatX(np.ones(nbr)*0.0000001), transform=None, shape=(nbr,)) - l_w = Dirichlet('l_w', a=floatX(np.ones(nbr)*0.0000001), transform=None, shape=(nbr,)) + g_w = Dirichlet("g_w", a=floatX(np.ones(nbr) * 0.0000001), transform=None, shape=(nbr,)) + l_w = Dirichlet("l_w", a=floatX(np.ones(nbr) * 0.0000001), transform=None, shape=(nbr,)) # mixture components g_mix = Mixture.dist(w=g_w, comp_dists=g_comp) l_mix = Mixture.dist(w=l_w, comp_dists=l_comp) # mixture of mixtures - mix_w = Dirichlet('mix_w', a=floatX(np.ones(2)), transform=None, shape=(2,)) - mix = Mixture('mix', w=mix_w, - comp_dists=[g_mix, l_mix], - observed=np.exp(self.norm_x)) + mix_w = Dirichlet("mix_w", a=floatX(np.ones(2)), transform=None, shape=(2,)) + mix = Mixture("mix", w=mix_w, comp_dists=[g_mix, l_mix], observed=np.exp(self.norm_x)) test_point = model.test_point def mixmixlogp(value, point): floatX = theano.config.floatX - priorlogp = st.dirichlet.logpdf(x=point['g_w'], - alpha=np.ones(nbr)*0.0000001, - ).astype(floatX) + \ - st.expon.logpdf(x=point['mu_g']).sum(dtype=floatX) + \ - st.dirichlet.logpdf(x=point['l_w'], - alpha=np.ones(nbr)*0.0000001, - ).astype(floatX) + \ - st.expon.logpdf(x=point['mu_l']).sum(dtype=floatX) + \ - st.dirichlet.logpdf(x=point['mix_w'], - alpha=np.ones(2), - ).astype(floatX) - complogp1 = st.norm.logpdf(x=value, - loc=point['mu_g']).astype(floatX) - mixlogp1 = logsumexp(np.log(point['g_w']).astype(floatX) + - complogp1, - axis=-1, keepdims=True) - complogp2 = st.lognorm.logpdf(value, - 1., - 0., - np.exp(point['mu_l'])).astype(floatX) - mixlogp2 = logsumexp(np.log(point['l_w']).astype(floatX) + - complogp2, - axis=-1, keepdims=True) + priorlogp = ( + st.dirichlet.logpdf( + x=point["g_w"], + alpha=np.ones(nbr) * 0.0000001, + ).astype(floatX) + + st.expon.logpdf(x=point["mu_g"]).sum(dtype=floatX) + + st.dirichlet.logpdf( + x=point["l_w"], + alpha=np.ones(nbr) * 0.0000001, + ).astype(floatX) + + st.expon.logpdf(x=point["mu_l"]).sum(dtype=floatX) + + st.dirichlet.logpdf( + x=point["mix_w"], + alpha=np.ones(2), + ).astype(floatX) + ) + complogp1 = st.norm.logpdf(x=value, loc=point["mu_g"]).astype(floatX) + mixlogp1 = logsumexp( + np.log(point["g_w"]).astype(floatX) + complogp1, axis=-1, keepdims=True + ) + complogp2 = st.lognorm.logpdf(value, 1.0, 0.0, np.exp(point["mu_l"])).astype(floatX) + mixlogp2 = logsumexp( + np.log(point["l_w"]).astype(floatX) + complogp2, axis=-1, keepdims=True + ) complogp_mix = np.concatenate((mixlogp1, mixlogp2), axis=1) - mixmixlogpg = logsumexp(np.log(point['mix_w']).astype(floatX) + - complogp_mix, - axis=-1, keepdims=False) + mixmixlogpg = logsumexp( + np.log(point["mix_w"]).astype(floatX) + complogp_mix, axis=-1, keepdims=False + ) return priorlogp, mixmixlogpg value = np.exp(self.norm_x)[:, None] priorlogp, mixmixlogpg = mixmixlogp(value, test_point) # check logp of mixture - assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point), - rtol=rtol) + assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point), rtol=rtol) # check model logp - assert_allclose(priorlogp + mixmixlogpg.sum(), - model.logp(test_point), - rtol=rtol) + assert_allclose(priorlogp + mixmixlogpg.sum(), model.logp(test_point), rtol=rtol) # check input and check logp again - test_point['g_w'] = np.asarray([.1, .1, .2, .6]) - test_point['mu_g'] = np.exp(np.random.randn(nbr)) + test_point["g_w"] = np.asarray([0.1, 0.1, 0.2, 0.6]) + test_point["mu_g"] = np.exp(np.random.randn(nbr)) priorlogp, mixmixlogpg = mixmixlogp(value, test_point) - assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point), - rtol=rtol) - assert_allclose(priorlogp + mixmixlogpg.sum(), - model.logp(test_point), - rtol=rtol) + assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point), rtol=rtol) + assert_allclose(priorlogp + mixmixlogpg.sum(), model.logp(test_point), rtol=rtol) def test_sample_prior_and_posterior(self): def build_toy_dataset(N, K): @@ -366,8 +343,7 @@ def build_toy_dataset(N, K): y = np.zeros((N,), dtype=np.int) for n in range(N): k = np.argmax(np.random.multinomial(1, pi)) - x[n, :] = np.random.multivariate_normal(mus[k], - np.diag(stds[k])) + x[n, :] = np.random.multivariate_normal(mus[k], np.diag(stds[k])) y[n] = k return x, y @@ -378,25 +354,23 @@ def build_toy_dataset(N, K): X, y = build_toy_dataset(N, K) with pm.Model() as model: - pi = pm.Dirichlet('pi', np.ones(K), shape=(K,)) + pi = pm.Dirichlet("pi", np.ones(K), shape=(K,)) comp_dist = [] mu = [] packed_chol = [] chol = [] for i in range(K): - mu.append(pm.Normal('mu%i' % i, 0, 10, shape=D)) + mu.append(pm.Normal("mu%i" % i, 0, 10, shape=D)) packed_chol.append( - pm.LKJCholeskyCov('chol_cov_%i' % i, - eta=2, - n=D, - sd_dist=pm.HalfNormal.dist(2.5)) + pm.LKJCholeskyCov( + "chol_cov_%i" % i, eta=2, n=D, sd_dist=pm.HalfNormal.dist(2.5) + ) ) - chol.append(pm.expand_packed_triangular(D, packed_chol[i], - lower=True)) + chol.append(pm.expand_packed_triangular(D, packed_chol[i], lower=True)) comp_dist.append(pm.MvNormal.dist(mu=mu[i], chol=chol[i])) - pm.Mixture('x_obs', pi, comp_dist, observed=X) + pm.Mixture("x_obs", pi, comp_dist, observed=X) with model: trace = pm.sample(30, tune=10, chains=1) @@ -404,10 +378,10 @@ def build_toy_dataset(N, K): with model: ppc = pm.sample_posterior_predictive(trace, n_samples) prior = pm.sample_prior_predictive(samples=n_samples) - assert ppc['x_obs'].shape == (n_samples,) + X.shape - assert prior['x_obs'].shape == (n_samples,) + X.shape - assert prior['mu0'].shape == (n_samples, D) - assert prior['chol_cov_0'].shape == (n_samples, D * (D + 1) // 2) + assert ppc["x_obs"].shape == (n_samples,) + X.shape + assert prior["x_obs"].shape == (n_samples,) + X.shape + assert prior["mu0"].shape == (n_samples, D) + assert prior["chol_cov_0"].shape == (n_samples, D * (D + 1) // 2) class TestMixtureVsLatent(SeededTest): @@ -419,9 +393,15 @@ def setup_method(self, *args, **kwargs): np.tile( np.reshape( np.arange(self.npop), - (1, -1,) + ( + 1, + -1, + ), + ), + ( + self.nd, + 1, ), - (self.nd, 1,) ) ) @@ -431,17 +411,11 @@ def test_1d_w(self): mus = self.mus size = 100 with pm.Model() as model: - m = pm.NormalMixture('m', - w=np.ones(npop) / npop, - mu=mus, - sigma=1e-5, - comp_shape=(nd, npop), - shape=nd) - z = pm.Categorical('z', p=np.ones(npop) / npop) - latent_m = pm.Normal('latent_m', - mu=mus[..., z], - sigma=1e-5, - shape=nd) + m = pm.NormalMixture( + "m", w=np.ones(npop) / npop, mu=mus, sigma=1e-5, comp_shape=(nd, npop), shape=nd + ) + z = pm.Categorical("z", p=np.ones(npop) / npop) + latent_m = pm.Normal("latent_m", mu=mus[..., z], sigma=1e-5, shape=nd) m_val = m.random(size=size) latent_m_val = latent_m.random(size=size) @@ -460,18 +434,17 @@ def test_2d_w(self): mus = self.mus size = 100 with pm.Model() as model: - m = pm.NormalMixture('m', - w=np.ones((nd, npop)) / npop, - mu=mus, - sigma=1e-5, - comp_shape=(nd, npop), - shape=nd) - z = pm.Categorical('z', p=np.ones(npop) / npop, shape=nd) + m = pm.NormalMixture( + "m", + w=np.ones((nd, npop)) / npop, + mu=mus, + sigma=1e-5, + comp_shape=(nd, npop), + shape=nd, + ) + z = pm.Categorical("z", p=np.ones(npop) / npop, shape=nd) mu = tt.as_tensor_variable([mus[i, z[i]] for i in range(nd)]) - latent_m = pm.Normal('latent_m', - mu=mu, - sigma=1e-5, - shape=nd) + latent_m = pm.Normal("latent_m", mu=mu, sigma=1e-5, shape=nd) m_val = m.random(size=size) latent_m_val = latent_m.random(size=size) @@ -489,32 +462,30 @@ def samples_from_same_distribution(self, *args): _, p_marginal = st.ks_2samp(*[s.flatten() for s in args]) # Test if correlations within non independent draws match _, p_correlation = st.ks_2samp( - *[np.array([np.corrcoef(ss) for ss in s]).flatten() - for s in args] + *[np.array([np.corrcoef(ss) for ss in s]).flatten() for s in args] ) assert p_marginal >= 0.05 and p_correlation >= 0.05 def logp_matches(self, mixture, latent_mix, z, npop, model): - if theano.config.floatX == 'float32': + if theano.config.floatX == "float32": rtol = 1e-4 else: rtol = 1e-7 test_point = model.test_point - test_point['latent_m'] = test_point['m'] + test_point["latent_m"] = test_point["m"] mix_logp = mixture.logp(test_point) logps = [] for component in range(npop): - test_point['z'] = component * np.ones(z.distribution.shape) + test_point["z"] = component * np.ones(z.distribution.shape) # Count the number of axes that should be broadcasted from z to # modify the logp - sh1 = test_point['z'].shape - sh2 = test_point['latent_m'].shape + sh1 = test_point["z"].shape + sh2 = test_point["latent_m"].shape if len(sh1) > len(sh2): sh2 = (1,) * (len(sh1) - len(sh2)) + sh2 elif len(sh2) > len(sh1): sh1 = (1,) * (len(sh2) - len(sh1)) + sh1 - reps = np.prod([s2 if s1 != s2 else 1 for s1, s2 in - zip(sh1, sh2)]) + reps = np.prod([s2 if s1 != s2 else 1 for s1, s2 in zip(sh1, sh2)]) z_logp = z.logp(test_point) * reps logps.append(z_logp + latent_mix.logp(test_point)) latent_mix_logp = logsumexp(np.array(logps), axis=0)