From 431637621bcba6206ca740f6d841dc3cb6fe49bd Mon Sep 17 00:00:00 2001 From: Jayoung Ryu Date: Mon, 25 Mar 2024 12:01:50 -0400 Subject: [PATCH] add missing files --- bean/framework/read_from_csvs.py | 60 +++ bean/model/survival_model.py | 666 +++++++++++++++++++++++++++++++ 2 files changed, 726 insertions(+) create mode 100644 bean/framework/read_from_csvs.py create mode 100644 bean/model/survival_model.py diff --git a/bean/framework/read_from_csvs.py b/bean/framework/read_from_csvs.py new file mode 100644 index 0000000..8331a3f --- /dev/null +++ b/bean/framework/read_from_csvs.py @@ -0,0 +1,60 @@ +import argparse + +from perturb_tools._readwrite._funcs._read_screen_from_csvs import read_csvs +from perturb_tools import Screen + + +def get_input_parser() -> argparse.Namespace: + """Add multi-sample specific arguments to the base parser.""" + print( + r""" + _ _ + / \ '\ _ + | \ \ __ _ _ ___ __ _| |_ ___ + \ \ | / _| '_/ -_) _` | _/ -_) + `.__|/ \__|_| \___\__,_|\__\___| + """ + ) + parser = argparse.ArgumentParser( + description="bean-create-screen parameters", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "gRNA_info_table_csv", + type=str, + help="Path to gRNA info .csv table, with index at first column and column names at the first row.", + ) + parser.add_argument( + "samples_info_table_csv", + type=str, + help="Path to sample info .csv table, with index at first column and column names at the first row.", + ) + parser.add_argument( + "gRNA_counts_table_csv", + type=str, + help="Path to gRNA counts .csv table, with index at first column and column names at the first row.", + ) + parser.add_argument( + "-e", + "--edits", + type=str, + help="Path to edit counts .csv table, with index at first column and column names at the first row.", + default=None, + ) + parser.add_argument( + "-o", + "--output-prefix", + type=str, + help="Output file prefix (output will be saved as `output_prefix.h5ad`). If not provided, `gRNA_counts_table_csv` file prefix is used.", + default=None, + ) + return parser + + +def create_screen(args: argparse.Namespace) -> Screen: + return read_csvs( + args.gRNA_counts_table_csv, + args.gRNA_info_table_csv, + args.samples_info_table_csv, + layers_filenames_dict={"edits": args.edits} if args.edits else None, + ) diff --git a/bean/model/survival_model.py b/bean/model/survival_model.py new file mode 100644 index 0000000..9b68e2c --- /dev/null +++ b/bean/model/survival_model.py @@ -0,0 +1,666 @@ +import torch +import pyro +from pyro import poutine +import pyro.distributions as dist +import torch.distributions.constraints as constraints +from .utils import ( + get_alpha, + scale_pi_by_accessibility, +) +from ..preprocessing.data_class import ( + VariantSurvivalScreenData, + VariantSurvivalReporterScreenData, + TilingSurvivalReporterScreenData, +) + + +def NormalModel( + data: VariantSurvivalScreenData, mask_thres: int = 10, use_bcmatch: bool = True +): + """ + Fit only on guide counts + Args + -- + data: input data + """ + replicate_plate = pyro.plate("rep_plate", data.n_reps, dim=-3) + replicate_plate2 = pyro.plate("rep_plate2", data.n_reps, dim=-2) + time_plate = pyro.plate("time_plate", data.n_condits, dim=-2) + guide_plate = pyro.plate("guide_plate", data.n_guides, dim=-1) + + # Set the prior for phenotype means + with pyro.plate("guide_plate0", 1): + with pyro.plate("target_plate", data.n_targets): + # In survival analysis, fitted effect size is not + mu_alleles = pyro.sample("mu_alleles", dist.Laplace(0, 1)) + + mu_center = mu_alleles + mu = torch.repeat_interleave(mu_center, data.target_lengths, dim=0) + r = torch.exp(mu) + assert r.shape == (data.n_guides, 1) + + with pyro.plate("replicate_plate0", data.n_reps, dim=-1): + q_0 = pyro.sample( + "initial_guide_abundance", + dist.Dirichlet(torch.ones((data.n_reps, data.n_guides))), + ) + with replicate_plate: + with time_plate as t: + time = data.timepoints[t] + assert time.shape == (data.n_condits,) + # with guide_plate, poutine.mask(mask=(data.allele_counts.sum(axis=-1) == 0)): + with guide_plate: + alleles_p_time = torch.pow( + r.unsqueeze(0).expand((data.n_condits, -1, -1)), + time.unsqueeze(-1).unsqueeze(-1).expand((-1, data.n_guides, 1)), + ) + assert alleles_p_time.shape == (data.n_condits, data.n_guides, 1) + + expected_allele_p = alleles_p_time.unsqueeze(0).expand( + data.n_reps, -1, -1, -1 + ) * q_0.unsqueeze(1).unsqueeze(-1).expand((-1, data.n_condits, -1, -1)) + expected_guide_p = expected_allele_p.sum(axis=-1) + assert expected_guide_p.shape == ( + data.n_reps, + data.n_condits, + data.n_guides, + ), expected_guide_p.shape + + with replicate_plate2: + with pyro.plate("guide_plate3", data.n_guides, dim=-1): + a = get_alpha(expected_guide_p, data.size_factor, data.sample_mask, data.a0) + assert data.X.shape == ( + data.n_reps, + data.n_condits, + data.n_guides, + ) + with poutine.mask( + mask=torch.logical_and( + data.X_masked.permute(0, 2, 1).sum(axis=-1) > mask_thres, + data.repguide_mask, + ) + ): + pyro.sample( + "guide_counts", + dist.DirichletMultinomial(a, validate_args=False), + obs=data.X_masked.permute(0, 2, 1), + ) + if use_bcmatch: + a_bcmatch = get_alpha( + expected_guide_p, + data.size_factor_bcmatch, + data.sample_mask, + data.a0_bcmatch, + ) + with poutine.mask( + mask=torch.logical_and( + data.X_bcmatch_masked.permute(0, 2, 1).sum(axis=-1) + > mask_thres, + data.repguide_mask, + ) + ): + assert ( + a_bcmatch > 0 + ).all(), f"{torch.where(a_bcmatch < 1e-6)}\n{torch.where(torch.isnan(a_bcmatch))}" + pyro.sample( + "guide_bcmatch_counts", + dist.DirichletMultinomial(a_bcmatch, validate_args=False), + obs=data.X_bcmatch_masked.permute(0, 2, 1), + ) + return alleles_p_time + + +def ControlNormalModel(data, mask_thres=10, use_bcmatch=True): + """ + Fit shared control distribution + """ + replicate_plate = pyro.plate("rep_plate", data.n_reps, dim=-3) + replicate_plate2 = pyro.plate("rep_plate2", data.n_reps, dim=-2) + time_plate = pyro.plate("time_plate", data.n_condits, dim=-2) + guide_plate = pyro.plate("guide_plate", data.n_guides, dim=-1) + + # Set the prior for phenotype means + mu_alleles = pyro.sample("mu_alleles", dist.Laplace(0, 1)) + mu = mu_alleles.repeat(data.n_guides).unsqueeze(-1) + r = torch.exp(mu) + with replicate_plate: + with pyro.plate("guide_plate2", data.n_guides): + q_0 = pyro.sample( + "initial_guide_abundance", + dist.Dirichlet(torch.ones((data.n_reps, data.n_guides))), + ) + with time_plate as t: + time = data.timepoints[t] + assert time.shape == (data.n_condits,) + with guide_plate: + alleles_p_time = r.unsqueeze(0).expand( + (data.n_condits, -1, -1) + ) ** time.unsqueeze(-1).unsqueeze(-1).expand((-1, data.n_guides, 1)) + assert alleles_p_time.shape == (data.n_condits, data.n_guides, 1) + + expected_allele_p = alleles_p_time.unsqueeze(0).expand( + data.n_reps, -1, -1, -1 + ) * q_0.unsqueeze(1).unsqueeze(-1).expand((-1, data.n_condits, -1, -1)) + expected_guide_p = expected_allele_p.sum(axis=-1) + assert expected_guide_p.shape == ( + data.n_reps, + data.n_condits, + data.n_guides, + ), expected_guide_p.shape + + with replicate_plate2: + with pyro.plate("guide_plate3", data.n_guides, dim=-1): + a = get_alpha(expected_guide_p, data.size_factor, data.sample_mask, data.a0) + + assert ( + data.X.shape + == data.X_bcmatch.shape + == ( + data.n_reps, + data.n_condits, + data.n_guides, + ) + ) + with poutine.mask( + mask=torch.logical_and( + data.X_masked.permute(0, 2, 1).sum(axis=-1) > mask_thres, + data.repguide_mask, + ) + ): + pyro.sample( + "guide_counts", + dist.DirichletMultinomial(a, validate_args=False), + obs=data.X_masked.permute(0, 2, 1), + ) + if use_bcmatch: + a_bcmatch = get_alpha( + expected_guide_p, + data.size_factor_bcmatch, + data.sample_mask, + data.a0_bcmatch, + ) + with poutine.mask( + mask=torch.logical_and( + data.X_bcmatch_masked.permute(0, 2, 1).sum(axis=-1) + > mask_thres, + data.repguide_mask, + ) + ): + pyro.sample( + "guide_bcmatch_counts", + dist.DirichletMultinomial(a_bcmatch, validate_args=False), + obs=data.X_bcmatch_masked.permute(0, 2, 1), + ) + + return alleles_p_time + + +def MixtureNormalModel( + data: VariantSurvivalReporterScreenData, + alpha_prior: float = 1, + use_bcmatch: bool = True, + sd_scale: float = 0.01, + scale_by_accessibility: bool = False, + fit_noise: bool = False, +): + """ + Args: + scale_by_accessibility: If True, pi fitted from reporter data is scaled by accessibility + """ + torch.autograd.set_detect_anomaly(True) + replicate_plate = pyro.plate("rep_plate", data.n_reps, dim=-3) + replicate_plate2 = pyro.plate("rep_plate2", data.n_reps, dim=-2) + time_plate = pyro.plate("time_plate", data.n_condits, dim=-2) + guide_plate = pyro.plate("guide_plate", data.n_guides, dim=-1) + + # Set the prior for phenotype means + with pyro.plate("guide_plate0", 1): + with pyro.plate("guide_plate1", data.n_targets): + mu_alleles = pyro.sample("mu_alleles", dist.Laplace(0, 1)) + mu_center = torch.cat([torch.zeros((data.n_targets, 1)), mu_alleles], axis=-1) + mu = torch.repeat_interleave(mu_center, data.target_lengths, dim=0) + assert mu.shape == (data.n_guides, 2) + r = torch.exp(mu) + + with pyro.plate("replicate_plate0", data.n_reps, dim=-1): + q_0 = pyro.sample( + "initial_guide_abundance", + dist.Dirichlet(torch.ones((data.n_reps, data.n_guides))), + ) + # The pi should be Dirichlet distributed instead of independent betas + alpha_pi = pyro.param( + "alpha_pi", + torch.ones( + ( + data.n_guides, + 2, + ) + ) + * alpha_prior, + constraint=constraints.positive, + ) + pi_a_scaled = alpha_pi / alpha_pi.sum(axis=-1)[:, None] * data.pi_a0[:, None] + assert alpha_pi.shape == ( + data.n_guides, + 2, + ), alpha_pi.shape + with replicate_plate: + with guide_plate, poutine.mask(mask=data.repguide_mask.unsqueeze(1)): + # Accounting for sample specific overall edit rate across all guides. + # P(allele | guide, bin=bulk) + pi = pyro.sample( + "pi", + dist.Dirichlet( + pi_a_scaled.unsqueeze(0).unsqueeze(0).expand(data.n_reps, 1, -1, -1) + ), + ) + assert pi.shape == ( + data.n_reps, + 1, + data.n_guides, + 2, + ), pi.shape + pyro.sample( + "bulk_allele_count", + dist.Multinomial(probs=pi, validate_args=False), + obs=data.allele_counts_control, + ) + if scale_by_accessibility: + # Endogenous target site editing rate may be different + pi = scale_pi_by_accessibility( + pi, data.guide_accessibility, fit_noise=fit_noise + ) + with replicate_plate: + with time_plate as t: + time = data.timepoints[t] + assert time.shape == (data.n_condits,) + + # with guide_plate, poutine.mask(mask=(data.allele_counts.sum(axis=-1) == 0)): + with guide_plate: + alleles_p_time = r.unsqueeze(0).expand( + (data.n_condits, -1, -1) + ) ** time.unsqueeze(-1).unsqueeze(-1).expand((-1, data.n_guides, 1)) + assert alleles_p_time.shape == (data.n_condits, data.n_guides, 2) + + expected_allele_p = ( + pi.expand(data.n_reps, data.n_condits, -1, -1) + * alleles_p_time[None, :, :, :] + * q_0.unsqueeze(1).unsqueeze(-1).expand((-1, data.n_condits, -1, -1)) + ) + expected_guide_p = expected_allele_p.sum(axis=-1) + assert expected_guide_p.shape == ( + data.n_reps, + data.n_condits, + data.n_guides, + ), expected_guide_p.shape + + with replicate_plate2: + with pyro.plate("guide_plate3", data.n_guides, dim=-1): + a = get_alpha(expected_guide_p, data.size_factor, data.sample_mask, data.a0) + a_bcmatch = get_alpha( + expected_guide_p, + data.size_factor_bcmatch, + data.sample_mask, + data.a0_bcmatch, + ) + # a_bcmatch = get_alpha(expected_guide_p, data.size_factor_bcmatch, data.sample_mask, data.a0_bcmatch) + # assert a.shape == a_bcmatch.shape == (data.n_reps, data.n_guides, data.n_condits) + assert ( + data.X.shape + == data.X_bcmatch.shape + == ( + data.n_reps, + data.n_condits, + data.n_guides, + ) + ) + with poutine.mask( + mask=torch.logical_and( + data.X_masked.permute(0, 2, 1).sum(axis=-1) > 10, data.repguide_mask + ) + ): + pyro.sample( + "guide_counts", + dist.DirichletMultinomial(a, validate_args=False), + obs=data.X_masked.permute(0, 2, 1), + ) + if use_bcmatch: + with poutine.mask( + mask=torch.logical_and( + data.X_bcmatch_masked.permute(0, 2, 1).sum(axis=-1) > 10, + data.repguide_mask, + ) + ): + pyro.sample( + "guide_bcmatch_counts", + dist.DirichletMultinomial(a_bcmatch, validate_args=False), + obs=data.X_bcmatch_masked.permute(0, 2, 1), + ) + + +def NormalGuide(data): + with pyro.plate("guide_plate0", 1): + with pyro.plate("guide_plate1", data.n_targets): + mu_loc = pyro.param("mu_loc", torch.zeros((data.n_targets, 1))) + mu_scale = pyro.param( + "mu_scale", + torch.ones((data.n_targets, 1)), + constraint=constraints.positive, + ) + pyro.sample("mu_alleles", dist.Normal(mu_loc, mu_scale)) + + +def MixtureNormalGuide( + data, + alpha_prior: float = 1, + use_bcmatch: bool = True, + scale_by_accessibility: bool = False, + fit_noise: bool = False, +): + """ + Guide for MixtureNormal model + """ + + replicate_plate = pyro.plate("rep_plate", data.n_reps, dim=-3) + guide_plate = pyro.plate("guide_plate", data.n_guides, dim=-1) + + # Set the prior for phenotype means + mu_loc = pyro.param("mu_loc", torch.zeros((data.n_targets, 1))) + mu_scale = pyro.param( + "mu_scale", torch.ones((data.n_targets, 1)), constraint=constraints.positive + ) + with pyro.plate("guide_plate0", 1): + with pyro.plate("guide_plate1", data.n_targets): + mu_alleles = pyro.sample("mu_alleles", dist.Normal(mu_loc, mu_scale)) + mu_center = torch.cat([torch.zeros((data.n_targets, 1)), mu_alleles], axis=-1) + mu = torch.repeat_interleave(mu_center, data.target_lengths, dim=0) + assert mu.shape == (data.n_guides, 2) + + # The pi should be Dirichlet distributed instead of independent betas + alpha_pi = pyro.param( + "alpha_pi", + torch.ones( + ( + data.n_guides, + 2, + ) + ) + * alpha_prior, + constraint=constraints.positive, + ) + assert alpha_pi.shape == ( + data.n_guides, + 2, + ), alpha_pi.shape + pi_a_scaled = alpha_pi / alpha_pi.sum(axis=-1)[:, None] * data.pi_a0[:, None] + + with replicate_plate: + with guide_plate: + pi = pyro.sample( + "pi", + dist.Dirichlet( + pi_a_scaled.unsqueeze(0) + .unsqueeze(0) + .expand(data.n_reps, 1, -1, -1) + .clamp(1e-5) + ), + ) + assert pi.shape == ( + data.n_reps, + 1, + data.n_guides, + 2, + ), pi.shape + if scale_by_accessibility: + # Endogenous target site editing rate may be different + pi = scale_pi_by_accessibility( + pi, data.guide_accessibility, fit_noise=fit_noise + ) + + +def ControlNormalGuide(data, mask_thres=10, use_bcmatch=True): + """ + Fit shared mean + """ + # Set the prior for phenotype means + mu_loc = pyro.param("mu_loc", torch.tensor(0.0)) + mu_scale = pyro.param( + "mu_scale", torch.tensor(1.0), constraint=constraints.positive + ) + pyro.sample("mu_alleles", dist.Normal(mu_loc, mu_scale)) + + +def MultiMixtureNormalModel( + data: TilingSurvivalReporterScreenData, + alpha_prior=1, + use_bcmatch=True, + sd_scale=0.01, + norm_pi=False, + scale_by_accessibility=False, + epsilon=1e-5, + fit_noise: bool = False, +): + """Tiling version of MixtureNormalModel""" + + replicate_plate = pyro.plate("rep_plate", data.n_reps, dim=-3) + replicate_plate2 = pyro.plate("rep_plate2", data.n_reps, dim=-2) + time_plate = pyro.plate("time_plate", data.n_condits, dim=-2) + guide_plate = pyro.plate("guide_plate", data.n_guides, dim=-1) + + # Set the prior for phenotype means + with pyro.plate("guide_plate1", data.n_edits): + mu_edits = pyro.sample("mu_alleles", dist.Laplace(0, 1)) + sd_edits = pyro.sample( + "sd_alleles", + dist.LogNormal( + torch.zeros((data.n_edits,)), + torch.ones( + data.n_edits, + ) + * sd_scale, + ), + ) + assert mu_edits.shape == sd_edits.shape == (data.n_edits,) + assert data.allele_to_edit.shape == ( + data.n_guides, + data.n_max_alleles - 1, + data.n_edits, + ) + mu_alleles = torch.matmul(data.allele_to_edit, mu_edits) + assert mu_alleles.shape == (data.n_guides, data.n_max_alleles - 1) + + mu = torch.cat([torch.zeros((data.n_guides, 1)), mu_alleles], axis=-1) + r = torch.exp(mu) + # The pi should be Dirichlet distributed instead of independent betas + alpha_pi0 = ( + torch.ones( + ( + data.n_guides, + data.n_max_alleles, + ) + ) + * alpha_prior + ) + # Effectively remove alphas for non-existing alleles + assert data.allele_mask.shape == (data.n_guides, data.n_max_alleles) + alpha_pi0[~data.allele_mask] = epsilon + alpha_pi = pyro.param("alpha_pi", alpha_pi0, constraint=constraints.positive) + alpha_pi[~data.allele_mask] = epsilon + pi_a_scaled = ( + (alpha_pi + epsilon / alpha_pi.shape[-1]) + / (alpha_pi.sum(axis=-1)[:, None] + epsilon) + * data.pi_a0[:, None] + ) + pi_a_scaled[pi_a_scaled < epsilon] = epsilon + if torch.isnan(pi_a_scaled).any(): + print(torch.where(alpha_pi.isnan())) + print(torch.where(alpha_pi < 0)) + exit(1) + if (pi_a_scaled <= 0).any(): + print(torch.where(alpha_pi < 0)) + with replicate_plate: + with guide_plate, poutine.mask(mask=data.repguide_mask.unsqueeze(1)): + pi = pyro.sample( + "pi", + dist.Dirichlet( + pi_a_scaled.unsqueeze(0).unsqueeze(0).expand(data.n_reps, 1, -1, -1) + ), + ) + pyro.sample( + "bulk_allele_count", + dist.Multinomial(probs=pi, validate_args=False), + obs=data.allele_counts_control, + ) + if scale_by_accessibility: + # Endogenous target site editing rate may be different + pi = scale_pi_by_accessibility( + pi, data.guide_accessibility, fit_noise=fit_noise + ) + + with replicate_plate: + with pyro.plate("guide_plate2", data.n_guides): + q_0 = pyro.sample( + "initial_guide_abundance", + dist.Dirichlet(torch.ones((data.n_reps, data.n_guides))), + ) + with time_plate as t: + time = data.timepoints[t] + assert time.shape == (data.n_condits,) + with guide_plate, poutine.mask(mask=data.repguide_mask.unsqueeze(1)): + alleles_p_time = r.unsqueeze(0).expand( + (data.n_condits, -1, -1) + ) ** time.unsqueeze(-1).unsqueeze(-1).expand((-1, data.n_guides, 1)) + mask = data.allele_mask.unsqueeze(0).expand((data.n_condits, -1, -1)) + alleles_p_time[~mask] = 0.0 + + assert alleles_p_time.shape == ( + data.n_condits, + data.n_guides, + data.n_max_alleles, + ) + expected_allele_p = ( + pi.expand(data.n_reps, data.n_condits, -1, -1) + * alleles_p_time[None, :, :, :] + * q_0.unsqueeze(1).unsqueeze(-1).expand((-1, data.n_condits, -1, -1)) + ) + expected_guide_p = expected_allele_p.sum(axis=-1) + assert expected_guide_p.shape == ( + data.n_reps, + data.n_condits, + data.n_guides, + ), expected_guide_p.shape + + with replicate_plate2: + with pyro.plate("guide_plate3", data.n_guides, dim=-1): + a = get_alpha(expected_guide_p, data.size_factor, data.sample_mask, data.a0) + a_bcmatch = get_alpha( + expected_guide_p, + data.size_factor_bcmatch, + data.sample_mask, + data.a0_bcmatch, + ) + # a_bcmatch = get_alpha(expected_guide_p, data.size_factor_bcmatch, data.sample_mask, data.a0_bcmatch) + # assert a.shape == a_bcmatch.shape == (data.n_reps, data.n_guides, data.n_condits) + assert ( + data.X.shape + == data.X_bcmatch.shape + == ( + data.n_reps, + data.n_condits, + data.n_guides, + ) + ) + with poutine.mask( + mask=torch.logical_and( + data.X_masked.permute(0, 2, 1).sum(axis=-1) > 10, data.repguide_mask + ) + ): + pyro.sample( + "guide_counts", + dist.DirichletMultinomial(a, validate_args=False), + obs=data.X_masked.permute(0, 2, 1), + ) + if use_bcmatch: + with poutine.mask( + mask=torch.logical_and( + data.X_bcmatch_masked.permute(0, 2, 1).sum(axis=-1) > 10, + data.repguide_mask, + ) + ): + pyro.sample( + "guide_bcmatch_counts", + dist.DirichletMultinomial(a_bcmatch, validate_args=False), + obs=data.X_bcmatch_masked.permute(0, 2, 1), + ) + + +def MultiMixtureNormalGuide( + data, + alpha_prior=1, + use_bcmatch=True, + epsilon=1e-5, + scale_by_accessibility: bool = False, + fit_noise: bool = False, +): + """ + Guide for model C14 + """ + replicate_plate = pyro.plate("rep_plate", data.n_reps, dim=-3) + guide_plate = pyro.plate("guide_plate", data.n_guides, dim=-1) + + # Set the prior for phenotype means + mu_loc = pyro.param("mu_loc", torch.zeros((data.n_edits,))) + mu_scale = pyro.param( + "mu_scale", torch.ones((data.n_edits,)), constraint=constraints.positive + ) + with pyro.plate("guide_plate1", data.n_edits): + mu_edits = pyro.sample("mu_alleles", dist.Normal(mu_loc, mu_scale)) + mu_alleles = torch.matmul(data.allele_to_edit, mu_edits) + assert mu_alleles.shape == (data.n_guides, data.n_max_alleles - 1), ( + mu_alleles.shape, + data.n_max_alleles, + data.n_edits, + ) + + mu = torch.cat([torch.zeros((data.n_guides, 1)), mu_alleles], axis=-1) + assert mu.shape == (data.n_guides, data.n_max_alleles), (mu.shape,) + # The pi should be Dirichlet distributed instead of independent betas + alpha_pi0 = ( + torch.ones( + ( + data.n_guides, + data.n_max_alleles, + ) + ) + * alpha_prior + ) + # Effectively remove alphas for non-existing alleles + assert data.allele_mask.shape == (data.n_guides, data.n_max_alleles) + alpha_pi0[~data.allele_mask] = epsilon + alpha_pi = pyro.param("alpha_pi", alpha_pi0, constraint=constraints.positive) + alpha_pi[~data.allele_mask] = epsilon + pi_a_scaled = alpha_pi / alpha_pi.sum(axis=-1)[:, None] * data.pi_a0[:, None] + + with replicate_plate: + with guide_plate, poutine.mask(mask=data.repguide_mask.unsqueeze(1)): + pi = pyro.sample( + "pi", + dist.Dirichlet( + pi_a_scaled.unsqueeze(0) + .unsqueeze(0) + .expand(data.n_reps, 1, -1, -1) + # .clamp(1e-5) + ), + ) + + assert pi.shape == ( + data.n_reps, + 1, + data.n_guides, + data.n_max_alleles, + ), pi.shape + if scale_by_accessibility: + # Endogenous target site editing rate may be different + pi = scale_pi_by_accessibility( + pi, data.guide_accessibility, fit_noise=fit_noise + )