Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

V.0.5.3 #34

Merged
merged 5 commits into from
Sep 30, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions bin/charger
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#!/usr/bin/env python
# CharGer - Characterization of Germline variants
# author: Adam D Scott ([email protected])
# author: Kuan-lin Huang ([email protected])
# author: Amila Weerasinghe ([email protected])
# author: R Jay Mashl ([email protected])
# version: v0.5.2 - 2018*01
# author:
# - Adam D Scott ([email protected])
# - Fernanda Martins Rodrigues ([email protected])
# - Jay R. Mashl ([email protected])
# - Kuan-lin Huang ([email protected])
# version: v0.5.3 - September, 2019

import sys
import getopt
Expand All @@ -13,7 +14,7 @@ import time
import argparse

def parseArgs( argv ):
helpText = "\nCharGer - v0.5.2\n\n"
helpText = "\nCharGer - v0.5.3\n\n"
helpText += "Usage: "
helpText += "charger <input file> [options]\n\n"
helpText += "Accepted input data files:\n"
Expand Down
9 changes: 7 additions & 2 deletions charger/autovivification.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
#!/usr/bin/env python
# autovivification - extends dict
# author: Kuan-lin Huang ([email protected]) & Adam D Scott ([email protected])
# version: v0.0 - 2016*01*12
# CharGer - Characterization of Germline variants
# author:
# - Adam D Scott ([email protected])
# - Fernanda Martins Rodrigues ([email protected])
# - Jay R. Mashl ([email protected])
# - Kuan-lin Huang ([email protected])
# version: v0.5.3 - September, 2019

class autovivification(dict):
'''Implementation of perl's autovivification feature.'''
Expand Down
70 changes: 52 additions & 18 deletions charger/charger.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# - Fernanda Martins Rodrigues ([email protected])
# - Jay R. Mashl ([email protected])
# - Kuan-lin Huang ([email protected])
# version: v0.5.2
# version: v0.5.3 - September, 2019

import os
import sys
Expand Down Expand Up @@ -904,26 +904,46 @@ def getMacClinVarTSV( self , tsvfile ):
for line in macFile:
fields = ( line.rstrip( ) ).split( "\t" )
[ description , status ] = self.parseMacPathogenicity( header, fields ) # no need to specify which fields here anymore; parseMacPathogenicity now knows which specific columns to look for
# fixed coordinates for clinvar file (refer to pull request #19)
pos = int(fields[header.index("pos")])
ref = fields[header.index("ref")]
alt = fields[header.index("alt")]
if len(ref) == 1 and len(alt) > 1: # insertion
ref = '-'
alt = alt[1:]
start = pos
stop = pos + 1
elif len(ref) > 1 and len(alt) == 1: # deletion
ref = ref[1:]
alt = '-'
start = pos + 1
stop = pos + len(ref)
else: # snv
start = pos
stop = pos

if len(header) > 27: # if yes, file is in the new format
var = clinvarvariant( chromosome = fields[header.index("chrom")] , \
start = fields[header.index("pos")] , \
reference = fields[header.index("ref")] , \
alternate = fields[header.index("alt")] , \
start = start , \
stop = stop , \
reference = ref , \
alternate = alt , \
uid = fields[header.index("variation_id")], \
gene = fields[header.index("symbol")] , \
clinical = { "description" : description , "review_status" : status } , \
trait = { fields[header.index("xrefs")] : fields[header.index("all_traits")] } )
else: # file in the old format
var = clinvarvariant( chromosome = fields[header.index("chrom")] , \
start = fields[header.index("pos")] , \
reference = fields[header.index("ref")] , \
alternate = fields[header.index("alt")] , \
start = start , \
stop = stop , \
reference = ref , \
alternate = alt , \
uid = fields[header.index("measureset_id")], \
gene = fields[header.index("symbol")] , \
clinical = { "description" : description , "review_status" : status } , \
trait = { fields[-1] : fields[header.index("all_traits")] } )
var.setStopFromReferenceAndAlternate( )
var.splitHGVSc( fields[header.index("hgvs_c")] , override = True )

var.splitHGVSc( fields[header.index("hgvs_c")] , override = False ) # refer to pull request #19
var.splitHGVSp( fields[header.index("hgvs_p")] )
#var.printVariant( "," )
#print( var.proteogenomicVar( ) )
Expand Down Expand Up @@ -974,21 +994,35 @@ def parseMacPathogenicity( header, fields ): # addded header argument, so can re
else:
splitChar="/" # new macarthur format

if isBenign == 1:
for desc in named.split( splitChar ):
if re.match( "likely", desc.lower( ) ) and desc != chargervariant.benign:
# fixed parsing of conflicting ClinVar classification
if isBenign == 1 and isPathogenic == 1 and int(isConflicted) == 0:
for desc in named.split(splitChar):
if re.match("likely", desc.lower() ) and desc != chargervariant.benign:
desc = chargervariant.likelyBenign
elif re.match( "benign", desc.lower( ) ):
elif re.match( "likely", desc.lower( ) ) and desc != chargervariant.pathogenic:
desc = chargervariant.likelyPathogenic
elif re.match( "benign", desc.lower() ):
desc = chargervariant.benign
break

if isPathogenic == 1:
for desc in named.split( splitChar ):
if re.match( "likely", desc.lower( ) ) and desc != chargervariant.pathogenic:
desc = chargervariant.likelyPathogenic
elif re.match( "pathog", desc.lower( ) ):
desc = chargervariant.pathogenic
break
else:
if isBenign == 1:
for desc in named.split( splitChar ):
if re.match( "likely", desc.lower( ) ) and desc != chargervariant.benign:
desc = chargervariant.likelyBenign
elif re.match( "benign", desc.lower( ) ):
desc = chargervariant.benign
break

if isPathogenic == 1:
for desc in named.split( splitChar ):
if re.match( "likely", desc.lower( ) ) and desc != chargervariant.pathogenic:
desc = chargervariant.likelyPathogenic
elif re.match( "pathog", desc.lower( ) ):
desc = chargervariant.pathogenic
break
return [ desc , status ]

def getMacClinVarVCF( self , vcffile ):
Expand Down
10 changes: 7 additions & 3 deletions charger/chargervariant.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
#!/usr/bin/env python
# chargervariant - CharGer annotated variants
# author: Adam D Scott ([email protected]) & Kuan-lin Huang ([email protected])
# version: v0.0 - 2016*01*13
# CharGer - Characterization of Germline variants
# author:
# - Adam D Scott ([email protected])
# - Fernanda Martins Rodrigues ([email protected])
# - Jay R. Mashl ([email protected])
# - Kuan-lin Huang ([email protected])
# version: v0.5.3 - September, 2019

import pdb
from biomine.variant.clinvarvariant import clinvarvariant
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#https://docs.python.org/2/distutils/examples.html
from distutils.core import setup
version = "0.5.2"
version = "0.5.3"
setup( \
name = 'CharGer' ,
version = version ,
Expand Down