Skip to content

Commit

Permalink
Add automated barcode position detection
Browse files Browse the repository at this point in the history
  • Loading branch information
Ranum committed Dec 16, 2020
1 parent 9beaf3e commit 3987e93
Showing 1 changed file with 57 additions and 5 deletions.
62 changes: 57 additions & 5 deletions InDevOptimizations/DemultiplexUsingBarcodes_New_V1.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#####
# Set consistent parameters here
Round1_barcode_staticSeq = "CATTCG"
Round2_barcode_staticSeq = "AATCCA"
Round2_barcode_staticSeq = "ATCCAC"
Round3_barcode_staticSeq = "GTGGCC"
#####

Expand Down Expand Up @@ -146,6 +146,54 @@ def return_fastq(self):
Total_barcodes_detected = []
Total_barcodes_passing_minReadThreshold = []

######
# Learn barcode positions from input fastqR
######
print("Learning barcode positions...")

#Set Default Positions
umi_start=0
umi_end=10
barcode3_start=10
barcode3_end=18
barcode2_start=48
barcode2_end=int(48+8)
barcode3_start=86
barcode3_end=int(86+8)

# Code for automated barcode position extractor based on static sequences
line_ct_Learner = 0
learner_bc1_list = []
learner_bc2_list = []
learner_bc3_list = []
with open("position_learner_fastqr.fastq", "r") as infile:
for line in infile:
if (line_ct_Learner % 4 == 1):
learner_bc1_list.append(line.find(Round1_barcode_staticSeq))
learner_bc2_list.append(line.find(Round2_barcode_staticSeq))
learner_bc3_list.append(line.find(Round3_barcode_staticSeq))
line_ct_Learner += 1
foundPosition_Round1_barcode=max(set(learner_bc1_list), key=learner_bc1_list.count)
foundPosition_Round2_barcode=max(set(learner_bc2_list), key=learner_bc2_list.count)
foundPosition_Round3_barcode=max(set(learner_bc3_list), key=learner_bc3_list.count)
print("Extracted position1 = " + str(foundPosition_Round1_barcode))
print("Extracted position2 = " + str(foundPosition_Round2_barcode))
print("Extracted position3 = " + str(foundPosition_Round3_barcode))
# Use extracted static sequence positions to infer barcode positions
umi_start=int(foundPosition_Round3_barcode - 18)
umi_end=int(foundPosition_Round3_barcode - 8)
print("UMI position has been extracted as " + str(umi_start) + ":" + str(umi_end))
barcode3_start=int(foundPosition_Round3_barcode - 8)
barcode3_end=int(foundPosition_Round3_barcode)
print("Barcode3 position has been extracted as " + str(barcode3_start) + ":" + str(barcode3_end))
barcode2_start=int(foundPosition_Round2_barcode - 8)
barcode2_end=int(foundPosition_Round2_barcode)
print("Barcode2 position has been extracted as " + str(barcode2_start) + ":" + str(barcode2_end))
barcode1_start=int(foundPosition_Round1_barcode + 6)
barcode1_end=int(foundPosition_Round1_barcode + 14)
print("Barcode1 position has been extracted as " + str(barcode1_start) + ":" + str(barcode1_end))



######
# Step2: Iterate through input fastqs in bins.
Expand Down Expand Up @@ -228,10 +276,14 @@ def return_fastq(self):
completeReadCounter += 1
if (line_ct1 % 4 == 1):
lineRead=str(line[0:].rstrip())
lineReadUMI = lineRead[0:10]
lineReadBarcode3 = lineRead[10:18]
lineReadBarcode2 = lineRead[48:int(48+8)]
lineReadBarcode1 = lineRead[86:int(86+8)]
#lineReadUMI = lineRead[0:10]
lineReadUMI = lineRead[umi_start:umi_end]
#lineReadBarcode3 = lineRead[10:18]
lineReadBarcode3 = lineRead[barcode3_start:barcode3_end]
#lineReadBarcode2 = lineRead[48:int(48+8)]
lineReadBarcode2 = lineRead[barcode2_start:barcode2_end]
#lineReadBarcode1 = lineRead[86:int(86+8)]
lineReadBarcode1 = lineRead[barcode1_start:barcode1_end]
filteredBarcode1 = [s for s in Eight_BP_barcode if hamming(s, lineReadBarcode1) <= int(args.errorThreshold)] # Match each extracted barcode to a greenlist of possible barcodes. If a match within hamming distance of 1 is found move forward with that match (not the extracted sequence).
filteredBarcode2 = [s for s in Eight_BP_barcode if hamming(s, lineReadBarcode2) <= int(args.errorThreshold)]
filteredBarcode3 = [s for s in Eight_BP_barcode if hamming(s, lineReadBarcode3) <= int(args.errorThreshold)]
Expand Down

0 comments on commit 3987e93

Please sign in to comment.