Skip to content

Commit

Permalink
Debug barcode start site
Browse files Browse the repository at this point in the history
  • Loading branch information
jykr committed Mar 26, 2024
1 parent 94954c6 commit 5217e84
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 8 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ See full detail in [Parameters](#parameters).
File should contain following columns.
* `name`: gRNA ID column
* `sequence`: gRNA sequence
* `barcode`: R2 barcode to help match reporter to gRNA
* `barcode`: R2 barcode to help match reporter to gRNA, written in the sense direction (as in R1)
* In order to use accessibility in the [variant effect quantification](#bean-run-quantify-variant-effects), provide accessibility information in one of two options. (For non-targeting guides, provide NA values (empty cell).)
* Option 1: `chrom` & `genomic_pos`: Chromosome (ex. `chr19`) and genomic position of guide sequence. You will have to provide the path to the bigwig file with matching reference version in `bean-run`.
* Option 2: `accessibility_signal`: ATAC-seq signal value of the target loci of each guide.
Expand Down Expand Up @@ -153,6 +153,7 @@ Read structure
* `--barcode-start-seq` (default: ''): Barcode + reporter starts after this sequence in R2, denoted as the sense direction (the same sequence direction as R1).
* `--guide-start-seqs-file` (default: `None`): CSV file path with per-sample `guide_start_seq` to be used, if provided. Formatted as `sample_id, guide_start_seq`
* `--guide-end-seqs-file` (default: `None`): CSV file path with per-sample `guide_end_seq` to be used, if provided. Formatted as `sample_id,guide_end_seq`
* `--guide-start-seqs-file` (default: `None`): CSV file path with per-sample `barcode_start_seq` to be used, if provided. Formatted as `sample_id, guide_start_seq`
* `-l`, `--reporter-length` (default: `32`): Length of the reporter sequence.
* `--gstart-reporter` (default: `6`): Start position of the guide sequence in the reporter
* `--guide-bc` (default: `True`): Construct has guide barcode
Expand Down
13 changes: 6 additions & 7 deletions bean/mapping/GuideEditCounter.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class NoReadsAfterQualityFiltering(Exception):


strand_str_to_int = {"neg": -1, "pos": 1, "-": -1, "+": 1}

base_revcomp = {"A":"T", "T":"A", "G":"C", "C":"G"}

def _get_stranded_guide_offset(strand: int, start_pos: int, guide_len: int) -> int:
if strand == -1:
Expand Down Expand Up @@ -652,16 +652,15 @@ def get_reporter_seq_qual(self, R2_record: SeqIO.SeqRecord, R2_start = 0):

def get_barcode(self, R1_seq, R2_seq):
if self.barcode_start_seq != "":
barcode_start_idx = revcomp(R2_seq).replace(
self.base_edited_from, self.base_edited_to
).find(
self.barcode_start_seq.replace(self.base_edited_from, self.base_edited_to)
)
barcode_start_idx = R2_seq.replace(
base_revcomp[self.base_edited_from], base_revcomp[self.base_edited_to]
).find(revcomp(self.barcode_start_seq).replace(base_revcomp[self.base_edited_from], base_revcomp[self.base_edited_to]))
if barcode_start_idx == -1:
return -1, ""
barcode_start_idx += len(self.barcode_start_seq)
else:
barcode_start_idx = 0
return barcode_start_idx, revcomp(R2_seq[barcode_start_idx: self.guide_bc_len])
return barcode_start_idx, revcomp(R2_seq[barcode_start_idx: (barcode_start_idx + self.guide_bc_len)])

def _match_read_to_sgRNA_bcmatch_semimatch(self, R1_seq: str, R2_seq: str):
# This should be adjusted for each experimental recipes.
Expand Down
12 changes: 12 additions & 0 deletions tests/test_count.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import pytest
import subprocess
from bean.mapping import GuideEditCounter
from bean.mapping.utils import _get_input_parser
from bean.mapping._supporting_fn import revcomp


@pytest.mark.order(2)
Expand Down Expand Up @@ -52,6 +55,15 @@ def test_count_samples_bcstart():
except subprocess.CalledProcessError as exc:
raise exc

def test_barcode_start_idx():
args = vars(_get_input_parser().parse_args(["-b", "A", "-f", "tests/data/test_guide_info.csv",]))
args["barcode_start_seq"] = "gtttgaattcgctagctaggtcttg"
args["R1"] = "tests/data/test_R1.fastq"
args["R2"] = "tests/data/test_R2.fastq"
gc = GuideEditCounter(**args)
sidx, bc = gc.get_barcode("", revcomp("agtggcaccgagtcggtgcttttttTAACAGTGTAATCTGGCGAGCCACTGTTCTTTGTACCAGAAgtttgaattcgctagctaggtcttgctgg".upper()))
assert sidx == 29
assert bc == "AGAA"

@pytest.mark.order(6)
def test_count_samples_tiling():
Expand Down

0 comments on commit 5217e84

Please sign in to comment.