Skip to content

Commit

Permalink
Merge pull request #13 from pinellolab/dev
Browse files Browse the repository at this point in the history
Incorporating updates into main branch
  • Loading branch information
jykr authored Oct 31, 2023
2 parents 7a4ef31 + bf071a8 commit c0d8ba3
Show file tree
Hide file tree
Showing 100 changed files with 286,607 additions and 29,938 deletions.
36 changes: 36 additions & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# This workflow will upload a Python Package using Twine when a release is created
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries

# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.

name: CI

on: [push]

permissions:
contents: read

jobs:
deploy:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: '3.8'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install torch==1.12.1+cpu torchvision==0.13.1+cpu torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cpu
pip install -r requirements.txt
pip install -e .
- name: Test with pytest
run: |
pip install pytest
pytest --sparse-ordering
209 changes: 209 additions & 0 deletions LICENSE

Large diffs are not rendered by default.

443 changes: 362 additions & 81 deletions README.md

Large diffs are not rendered by default.

30 changes: 20 additions & 10 deletions bean/annotate/filter_alleles.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from ..framework.Edit import Allele
from ..framework.AminoAcidEdit import CodingNoncodingAllele
from ._supporting_fn import map_alleles_to_filtered
from .utils import fast_concat


def sum_column_groups(mat, column_index_list):
Expand Down Expand Up @@ -389,6 +390,7 @@ def filter_allele_prop(
adata,
allele_uns_key: str,
allele_prop_thres: float = 0.05,
allele_count_thres: int = 0,
sample_prop_thres: float = 0.1,
map_to_filtered=True,
retain_max=True,
Expand All @@ -403,6 +405,7 @@ def filter_allele_prop(
Arguments
adata: ReporterScreen object (should be from bulk)
-- allele_prop_thres: Proportion of allele among the barcode matched guide counts to filter for
-- allele_count_thres: Read count of allele to filter for
-- sample_prop_thres: Proportion of samples where allele proportion exceeds the allele_prop_thres.
-- map_to_filtered: If True, map the allele counts that are filtered out to the closest and most abundant allele that is retained after filtering. If False, discard the read counts that are filtered out.
-- retain_max: If True, in case there is no allele that is retained during the filtering step, retain one allele with maximum median allele proportion across samples.
Expand All @@ -413,9 +416,13 @@ def filter_allele_prop(
["guide", allele_col]
)
aa_prop_filtered = aa_prop.loc[
(np.nan_to_num(aa_prop.loc[:, adata.samples.index]) > allele_prop_thres).mean(
axis=1
)
(
(np.nan_to_num(aa_prop.loc[:, adata.samples.index]) > allele_prop_thres)
& (
alleles.set_index(["guide", allele_col]).loc[:, adata.samples.index]
>= allele_count_thres
)
).mean(axis=1)
>= sample_prop_thres,
:,
]
Expand All @@ -429,7 +436,9 @@ def filter_allele_prop(
guide_df = aa_prop.loc[
aa_prop.index.get_level_values("guide") == guide,
]
max_frequency_idx = np.argmax(guide_df.mean(axis=1))
max_frequency_idx = np.nanargmax(
guide_df.mean(axis=1, numeric_only=True).fillna(0)
)
append_rows.append(guide_df.iloc[[max_frequency_idx], :])
aa_prop_filtered = pd.concat([aa_prop_filtered] + append_rows, axis=0)

Expand Down Expand Up @@ -477,7 +486,6 @@ def _map_alleles_to_filtered(
raw_allele_counts.groupby("guide"),
desc="Mapping alleles to closest filtered alleles",
):

guide_filtered_allele_counts = filtered_allele_counts.loc[
filtered_allele_counts.guide == guide, :
].set_index(allele_col)
Expand All @@ -487,8 +495,8 @@ def _map_alleles_to_filtered(
merge_priority = guide_filtered_allele_counts.mean(
axis=1, numeric_only=True
)
merge_priority = merge_priority.fillna(0)
if is_cn_allele:

guide_raw_counts["allele_mapped"] = guide_raw_counts[allele_col].map(
lambda allele: allele.map_to_closest(
guide_filtered_alleles,
Expand All @@ -510,10 +518,10 @@ def _map_alleles_to_filtered(
.rename(columns={"allele_mapped": allele_col})
.groupby(["guide", allele_col])
.sum()
)
).reset_index()

mapped_allele_counts.append(guide_raw_counts)
res = pd.concat(mapped_allele_counts).reset_index()
res = fast_concat(mapped_allele_counts).reset_index(drop=True)
res = res.loc[res[allele_col].map(bool), :]
return res

Expand Down Expand Up @@ -572,8 +580,10 @@ def _distribute_alleles_to_filtered(
res,
index=guide_filtered_counts.index,
columns=guide_filtered_counts.columns,
)
).reset_index()
mapped_allele_counts.append(added_counts)
res = pd.concat(mapped_allele_counts).reset_index()
# @TODO: these lines are taking very long?
print("Contatenating allele counts...")
res = fast_concat(mapped_allele_counts).reset_index(drop=True)
res = res.loc[res[allele_col].map(bool), :]
return res
Loading

0 comments on commit c0d8ba3

Please sign in to comment.