diff --git a/bin/charger b/bin/charger index 3167a3f..a8be3fd 100755 --- a/bin/charger +++ b/bin/charger @@ -1,10 +1,11 @@ #!/usr/bin/env python # CharGer - Characterization of Germline variants -# author: Adam D Scott (adamscott@wustl.edu) -# author: Kuan-lin Huang (kuan-lin.huang@wustl.edu) -# author: Amila Weerasinghe (amila@wustl.edu) -# author: R Jay Mashl (rmashl@wustl.edu) -# version: v0.5.2 - 2018*01 +# author: +# - Adam D Scott (ascott@genome.wustl.edu) +# - Fernanda Martins Rodrigues (fernanda@wustl.edu) +# - Jay R. Mashl (rmashl@wustl.edu) +# - Kuan-lin Huang (khuang@genome.wustl.edu) +# version: v0.5.3 - September, 2019 import sys import getopt @@ -13,7 +14,7 @@ import time import argparse def parseArgs( argv ): - helpText = "\nCharGer - v0.5.2\n\n" + helpText = "\nCharGer - v0.5.3\n\n" helpText += "Usage: " helpText += "charger [options]\n\n" helpText += "Accepted input data files:\n" diff --git a/charger/autovivification.py b/charger/autovivification.py index 141914d..d5a4545 100644 --- a/charger/autovivification.py +++ b/charger/autovivification.py @@ -1,7 +1,12 @@ #!/usr/bin/env python # autovivification - extends dict -# author: Kuan-lin Huang (khuang@genome.wustl.edu) & Adam D Scott (ascott@genome.wustl.edu) -# version: v0.0 - 2016*01*12 +# CharGer - Characterization of Germline variants +# author: +# - Adam D Scott (ascott@genome.wustl.edu) +# - Fernanda Martins Rodrigues (fernanda@wustl.edu) +# - Jay R. Mashl (rmashl@wustl.edu) +# - Kuan-lin Huang (khuang@genome.wustl.edu) +# version: v0.5.3 - September, 2019 class autovivification(dict): '''Implementation of perl's autovivification feature.''' diff --git a/charger/charger.py b/charger/charger.py index 76ad519..41de33a 100644 --- a/charger/charger.py +++ b/charger/charger.py @@ -5,7 +5,7 @@ # - Fernanda Martins Rodrigues (fernanda@wustl.edu) # - Jay R. Mashl (rmashl@wustl.edu) # - Kuan-lin Huang (khuang@genome.wustl.edu) -# version: v0.5.2 +# version: v0.5.3 - September, 2019 import os import sys @@ -904,26 +904,46 @@ def getMacClinVarTSV( self , tsvfile ): for line in macFile: fields = ( line.rstrip( ) ).split( "\t" ) [ description , status ] = self.parseMacPathogenicity( header, fields ) # no need to specify which fields here anymore; parseMacPathogenicity now knows which specific columns to look for + # fixed coordinates for clinvar file (refer to pull request #19) + pos = int(fields[header.index("pos")]) + ref = fields[header.index("ref")] + alt = fields[header.index("alt")] + if len(ref) == 1 and len(alt) > 1: # insertion + ref = '-' + alt = alt[1:] + start = pos + stop = pos + 1 + elif len(ref) > 1 and len(alt) == 1: # deletion + ref = ref[1:] + alt = '-' + start = pos + 1 + stop = pos + len(ref) + else: # snv + start = pos + stop = pos + if len(header) > 27: # if yes, file is in the new format var = clinvarvariant( chromosome = fields[header.index("chrom")] , \ - start = fields[header.index("pos")] , \ - reference = fields[header.index("ref")] , \ - alternate = fields[header.index("alt")] , \ + start = start , \ + stop = stop , \ + reference = ref , \ + alternate = alt , \ uid = fields[header.index("variation_id")], \ gene = fields[header.index("symbol")] , \ clinical = { "description" : description , "review_status" : status } , \ trait = { fields[header.index("xrefs")] : fields[header.index("all_traits")] } ) else: # file in the old format var = clinvarvariant( chromosome = fields[header.index("chrom")] , \ - start = fields[header.index("pos")] , \ - reference = fields[header.index("ref")] , \ - alternate = fields[header.index("alt")] , \ + start = start , \ + stop = stop , \ + reference = ref , \ + alternate = alt , \ uid = fields[header.index("measureset_id")], \ gene = fields[header.index("symbol")] , \ clinical = { "description" : description , "review_status" : status } , \ trait = { fields[-1] : fields[header.index("all_traits")] } ) - var.setStopFromReferenceAndAlternate( ) - var.splitHGVSc( fields[header.index("hgvs_c")] , override = True ) + + var.splitHGVSc( fields[header.index("hgvs_c")] , override = False ) # refer to pull request #19 var.splitHGVSp( fields[header.index("hgvs_p")] ) #var.printVariant( "," ) #print( var.proteogenomicVar( ) ) @@ -974,21 +994,35 @@ def parseMacPathogenicity( header, fields ): # addded header argument, so can re else: splitChar="/" # new macarthur format - if isBenign == 1: - for desc in named.split( splitChar ): - if re.match( "likely", desc.lower( ) ) and desc != chargervariant.benign: + # fixed parsing of conflicting ClinVar classification + if isBenign == 1 and isPathogenic == 1 and int(isConflicted) == 0: + for desc in named.split(splitChar): + if re.match("likely", desc.lower() ) and desc != chargervariant.benign: desc = chargervariant.likelyBenign - elif re.match( "benign", desc.lower( ) ): + elif re.match( "likely", desc.lower( ) ) and desc != chargervariant.pathogenic: + desc = chargervariant.likelyPathogenic + elif re.match( "benign", desc.lower() ): desc = chargervariant.benign break - - if isPathogenic == 1: - for desc in named.split( splitChar ): - if re.match( "likely", desc.lower( ) ) and desc != chargervariant.pathogenic: - desc = chargervariant.likelyPathogenic elif re.match( "pathog", desc.lower( ) ): desc = chargervariant.pathogenic break + else: + if isBenign == 1: + for desc in named.split( splitChar ): + if re.match( "likely", desc.lower( ) ) and desc != chargervariant.benign: + desc = chargervariant.likelyBenign + elif re.match( "benign", desc.lower( ) ): + desc = chargervariant.benign + break + + if isPathogenic == 1: + for desc in named.split( splitChar ): + if re.match( "likely", desc.lower( ) ) and desc != chargervariant.pathogenic: + desc = chargervariant.likelyPathogenic + elif re.match( "pathog", desc.lower( ) ): + desc = chargervariant.pathogenic + break return [ desc , status ] def getMacClinVarVCF( self , vcffile ): diff --git a/charger/chargervariant.py b/charger/chargervariant.py index 3c78c83..cc7fd65 100644 --- a/charger/chargervariant.py +++ b/charger/chargervariant.py @@ -1,7 +1,11 @@ #!/usr/bin/env python -# chargervariant - CharGer annotated variants -# author: Adam D Scott (ascott@genome.wustl.edu) & Kuan-lin Huang (khuang@genome.wustl.edu) -# version: v0.0 - 2016*01*13 +# CharGer - Characterization of Germline variants +# author: +# - Adam D Scott (ascott@genome.wustl.edu) +# - Fernanda Martins Rodrigues (fernanda@wustl.edu) +# - Jay R. Mashl (rmashl@wustl.edu) +# - Kuan-lin Huang (khuang@genome.wustl.edu) +# version: v0.5.3 - September, 2019 import pdb from biomine.variant.clinvarvariant import clinvarvariant diff --git a/setup.py b/setup.py index 747d93e..208a3e7 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ #https://docs.python.org/2/distutils/examples.html from distutils.core import setup -version = "0.5.2" +version = "0.5.3" setup( \ name = 'CharGer' , version = version ,