diff --git a/bean/cli/profile.py b/bean/cli/profile.py index c780ccc..adff198 100755 --- a/bean/cli/profile.py +++ b/bean/cli/profile.py @@ -33,6 +33,8 @@ def main(args): max_editing_window_length=args.window_length, pam_col=args.pam_col, save_fig=args.save_fig, + reporter_length=args.reporter_length, + reporter_right_flank_length=args.reporter_right_flank_length, ), kernel_name="bean_python3", ) diff --git a/bean/notebooks/profile_editing_preference.ipynb b/bean/notebooks/profile_editing_preference.ipynb index 6358949..8d0da5c 100755 --- a/bean/notebooks/profile_editing_preference.ipynb +++ b/bean/notebooks/profile_editing_preference.ipynb @@ -58,7 +58,9 @@ "output_prefix = \"editing pattern\"\n", "max_editing_window_length = 6\n", "pam_col=\"5-nt PAM\"\n", - "save_fig=False" + "save_fig=False\n", + "reporter_length=32\n", + "reporter_right_flank_length=6" ] }, { @@ -126,7 +128,7 @@ } ], "source": [ - "cedit_rates_df = bean.plotting.editing_patterns.get_edit_rates(cdata_bulk)" + "cedit_rates_df = bean.plotting.editing_patterns.get_edit_rates(cdata_bulk, reporter_length=reporter_length, reporter_right_flank_length=reporter_right_flank_length)" ] }, { diff --git a/bean/plotting/editing_patterns.py b/bean/plotting/editing_patterns.py index f22f766..594756d 100755 --- a/bean/plotting/editing_patterns.py +++ b/bean/plotting/editing_patterns.py @@ -67,6 +67,8 @@ def get_edit_rates( edit_count_key="edit_counts", adjust_spacer_pos: bool = True, reporter_column: str = "reporter", + reporter_length=32, + reporter_right_flank_length=6, ): """ Obtain position- and context-wise editing rate (context: base preceding the target base position). @@ -98,7 +100,9 @@ def get_edit_rates( if "guide_len" not in bdata.guides.columns: bdata.guides["guide_len"] = bdata.guides.sequence.map(len) start_pad = ( - 32 - 6 - bdata.guides.guide_len[edit_rates_agg.guide].reset_index(drop=True) + reporter_length + - reporter_right_flank_length + - bdata.guides.guide_len[edit_rates_agg.guide].reset_index(drop=True) ) edit_rates_agg["spacer_pos"] = (edit_rates_agg.rel_pos - start_pad).astype(int) edit_rates_agg = edit_rates_agg[ @@ -529,7 +533,7 @@ def plot_context_specificity( save_tbl["base_change"] = target_base_change save_tbls.append(save_tbl) ic_tbl.index = [-1, 0, 1] - logomaker.Logo(ic_tbl.astype(float), ax=axes[j]) + logomaker.Logo(ic_tbl.astype(float).fillna(0), ax=axes[j]) axes[j].set_ylabel("Relative frequency") axes[j].set_title(target_base_change) if save_fig: diff --git a/bean/plotting/parser.py b/bean/plotting/parser.py index 6850a35..a968fd5 100644 --- a/bean/plotting/parser.py +++ b/bean/plotting/parser.py @@ -1,5 +1,6 @@ import argparse + def parse_args(parser=None): if parser is None: parser = argparse.ArgumentParser() @@ -48,5 +49,16 @@ def parse_args(parser=None): action="store_true", help="Save .pdf of the figures included in the report.", ) - - return parser \ No newline at end of file + parser.add_argument( + "--reporter-length", + type=int, + default=32, + help="Length of reporter sequence in the construct.", + ) + parser.add_argument( + "--reporter-right-flank-length", + type=int, + default=6, + help="Length of the right-flanking nucleotides of protospacer in the reporter.", + ) + return parser diff --git a/bean/plotting/utils.py b/bean/plotting/utils.py index 897d0c8..6cbfbec 100755 --- a/bean/plotting/utils.py +++ b/bean/plotting/utils.py @@ -2,24 +2,38 @@ import os import bean as be + def check_args(args): if args.output_prefix is None: sample_id = os.path.splitext(os.path.basename(args.bdata_path))[0] - args.output_prefix = ( - f"{os.path.dirname(args.bdata_path)}/bean_profile.{sample_id}/{sample_id}" - ) + sample_path = os.path.dirname(args.bdata_path) + if not sample_path: + sample_path = "." + args.output_prefix = f"{sample_path}/bean_profile.{sample_id}/{sample_id}" os.makedirs(args.output_prefix, exist_ok=True) if args.window_length < 1: raise ValueError(f"window_length {args.window_length} is too small.") cdata = be.read_h5ad(args.bdata_path) cdata.samples["replicate"] = cdata.samples[args.replicate_col] - cdata_bulk = cdata[:,cdata.samples[args.condition_col] == args.control_condition] + cdata_bulk = cdata[:, cdata.samples[args.condition_col] == args.control_condition] if len(cdata_bulk) == 0: raise ValueError( f"No samples with bdata.samples['{args.condition_col}'] == {args.control_condition}. Please check your input arguments --condition-col & --control-condition." ) - if args.pam_col is not None and args.pam_col not in cdata.guides.columns: + if args.pam_col is not None and args.pam_col not in cdata.guides.columns: raise ValueError( f"Specified --pam-col `{args.pam_col}` does not exist in ReporterScreen.guides.columns ({cdata.guides.columns}). Please check your input. If you don't want PAM output, please do not provide --pam-col argument.s" ) + if args.replicate_col not in cdata.samples.columns: + raise ValueError( + f"Specified --replicate-col `{args.replicate_col}` does not exist in ReporterScreen.samples.columns ({cdata.samples.columns}). Please check your input." + ) + if args.condition_col not in cdata.samples.columns: + raise ValueError( + f"Specified --condition-col `{args.condition_col}` does not exist in ReporterScreen.samples.columns ({cdata.samples.columns}). Please check your input." + ) + if args.control_condition not in cdata.samples[args.condition_col].tolist(): + raise ValueError( + f"Specified --control-condition `{args.control_condition}` does not exist in ReporterScreen.samples[{args.condition_col}] ({cdata.samples[args.condition_col]}). Please check your input." + ) return args