Skip to content

Commit

Permalink
Merge pull request #36 from oxfordmmm/fix/allow-vcf-row-overlap
Browse files Browse the repository at this point in the history
Fix/allow vcf row overlap
  • Loading branch information
JeremyWesthead authored Jul 8, 2024
2 parents 7160454 + 4e6b95c commit f3116b5
Show file tree
Hide file tree
Showing 7 changed files with 232 additions and 210 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
1.3.3
1.3.4

15 changes: 10 additions & 5 deletions bin/to_piezo_catalogue.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,18 @@ def mutations_to_catalogue(mutations, filename, ref_genome, catalogue):
ref_genome (str): Name of the genome
catalogue (str): Name of the catalogue
"""
header = "GENBANK_REFERENCE,CATALOGUE_NAME,CATALOGUE_VERSION,CATALOGUE_GRAMMAR,PREDICTION_VALUES,DRUG,MUTATION,PREDICTION,SOURCE,EVIDENCE,OTHER\n"
header = (
"GENBANK_REFERENCE,CATALOGUE_NAME,CATALOGUE_VERSION,CATALOGUE_GRAMMAR,"
"PREDICTION_VALUES,DRUG,MUTATION,PREDICTION,SOURCE,EVIDENCE,OTHER\n"
)
common_all = f"{ref_genome},{catalogue},1.0,GARC1,RUS,NAN,"

# Get all genes within the mutations
genes = sorted(list(set([mutation.split("@")[0] for mutation in mutations])))
with open(filename, "w") as f:
f.write(header)
# Piezo requires all prediction values are in the catalogue, and all valid `PREDICTION_VALUES` include `R`
# Piezo requires all prediction values are in the catalogue,
# and all valid `PREDICTION_VALUES` include `R`
# So add a dummy record with R as S and U are used elsewhere
f.write(common_all + "NOTAGENE@A1!,R,{},{},{}\n")
# Add general rules for genes
Expand All @@ -42,9 +46,10 @@ def mutations_to_catalogue(mutations, filename, ref_genome, catalogue):


if __name__ == "__main__":
assert (
len(sys.argv) == 4
), "Incorrect usage. Try: python to_piezo_catalogue.py <genome_path> <vcf_path> <output_filename>"
assert len(sys.argv) == 4, (
"Incorrect usage. Try: python to_piezo_catalogue.py "
"<genome_path> <vcf_path> <output_filename>"
)
genome_path = sys.argv[1]
vcf_path = sys.argv[2]
output_filename = sys.argv[3]
Expand Down
53 changes: 25 additions & 28 deletions gumpy/genome.py
Original file line number Diff line number Diff line change
Expand Up @@ -999,34 +999,31 @@ def __add__(self, vcf: VCFFile):

# use the calls dict to change the nucleotide indicies in the copy of the genome
for idx, type_ in tqdm(vcf.calls.keys(), disable=(not self.show_progress_bar)):
genome.vcf_evidence[genome.nucleotide_index[idx - 1]] = vcf.calls[
(idx, type_)
]["original_vcf_row"]

# deal with changes at a single nucleotide site
if type_ in ["snp", "null", "het"]:
# only set values if the idx is to a single nucleotide
genome.nucleotide_sequence[idx - 1] = vcf.calls[(idx, type_)]["call"]

# deal with insertions and deletions
elif type_ == "indel":
genome.is_indel[idx - 1] = True
genome.indel_nucleotides[idx - 1] = vcf.calls[(idx, type_)]["call"][1]

if vcf.calls[(idx, type_)]["call"][0] == "ins":
genome.indel_length[idx - 1] = len(
vcf.calls[(idx, type_)]["call"][1]
)
else:
genome.indel_length[idx - 1] = -1 * len(
vcf.calls[(idx, type_)]["call"][1]
)

elif type_ == "ref":
# These only exist due to reference calls
# They only made it this far as they are required to pull out minors at
# these positions
pass
for item in vcf.calls[(idx, type_)]:
genome.vcf_evidence[genome.nucleotide_index[idx - 1]] = item[
"original_vcf_row"
]

# deal with changes at a single nucleotide site
if type_ in ["snp", "null", "het"]:
# only set values if the idx is to a single nucleotide
genome.nucleotide_sequence[idx - 1] = item["call"]

# deal with insertions and deletions
elif type_ == "indel":
genome.is_indel[idx - 1] = True
genome.indel_nucleotides[idx - 1] = item["call"][1]

if item["call"][0] == "ins":
genome.indel_length[idx - 1] = len(item["call"][1])
else:
genome.indel_length[idx - 1] = -1 * len(item["call"][1])

elif type_ == "ref":
# These only exist due to reference calls
# They only made it this far as they are required to pull
# out minors at these positions
pass

genome.minor_populations = []

Expand Down
Loading

0 comments on commit f3116b5

Please sign in to comment.