ding-lab · fernanda-rodrigues · Sep 30, 2019 · Sep 30, 2019 · Sep 30, 2019 · Sep 30, 2019
diff --git a/bin/charger b/bin/charger
@@ -1,10 +1,11 @@
 #!/usr/bin/env python
 # CharGer - Characterization of Germline variants
-# author: Adam D Scott ([email protected])
-# author: Kuan-lin Huang ([email protected])
-# author: Amila Weerasinghe ([email protected])
-# author: R Jay Mashl ([email protected])
-# version: v0.5.2 - 2018*01
+# author: 
+#	- Adam D Scott ([email protected])
+#	- Fernanda Martins Rodrigues ([email protected])
+#	- Jay R. Mashl ([email protected])
+#	- Kuan-lin Huang ([email protected])
+# version: v0.5.3 - September, 2019
 
 import sys
 import getopt
@@ -13,7 +14,7 @@ import time
 import argparse
 
 def parseArgs( argv ):
-	helpText = "\nCharGer - v0.5.2\n\n"
+	helpText = "\nCharGer - v0.5.3\n\n"
 	helpText += "Usage: "
 	helpText += "charger <input file> [options]\n\n"
 	helpText += "Accepted input data files:\n"

diff --git a/charger/autovivification.py b/charger/autovivification.py
@@ -1,7 +1,12 @@
 #!/usr/bin/env python
 # autovivification - extends dict
-# author: Kuan-lin Huang ([email protected]) & Adam D Scott ([email protected])
-# version: v0.0 - 2016*01*12
+# CharGer - Characterization of Germline variants
+# author: 
+#	- Adam D Scott ([email protected])
+#	- Fernanda Martins Rodrigues ([email protected])
+#	- Jay R. Mashl ([email protected])
+#	- Kuan-lin Huang ([email protected])
+# version: v0.5.3 - September, 2019
 
 class autovivification(dict):
 	'''Implementation of perl's autovivification feature.'''

diff --git a/charger/charger.py b/charger/charger.py
@@ -5,7 +5,7 @@
 #	- Fernanda Martins Rodrigues ([email protected])
 #	- Jay R. Mashl ([email protected])
 #	- Kuan-lin Huang ([email protected])
-# version: v0.5.2
+# version: v0.5.3 - September, 2019
 
 import os
 import sys
@@ -904,26 +904,46 @@ def getMacClinVarTSV( self , tsvfile ):
 			for line in macFile:
 				fields = ( line.rstrip( ) ).split( "\t" )
 				[ description , status ] = self.parseMacPathogenicity( header, fields ) # no need to specify which fields here anymore; parseMacPathogenicity now knows which specific columns to look for
+				# fixed coordinates for clinvar file (refer to pull request #19)
+				pos = int(fields[header.index("pos")])
+				ref = fields[header.index("ref")]
+				alt = fields[header.index("alt")]
+				if len(ref) == 1 and len(alt) > 1: # insertion
+					ref = '-'
+					alt = alt[1:]
+					start = pos
+					stop = pos + 1
+				elif len(ref) > 1 and len(alt) == 1: # deletion
+					ref = ref[1:]
+					alt = '-'
+					start = pos + 1
+					stop = pos + len(ref)
+				else: # snv
+					start = pos
+					stop = pos
+
 				if len(header) > 27: # if yes, file is in the new format
 					var = clinvarvariant( chromosome = fields[header.index("chrom")] , \
-										  start = fields[header.index("pos")] , \
-										  reference = fields[header.index("ref")] , \
-										  alternate = fields[header.index("alt")] , \
+										  start = start , \
+										  stop = stop , \
+										  reference = ref , \
+										  alternate = alt , \
 										  uid = fields[header.index("variation_id")], \
 										  gene = fields[header.index("symbol")] , \
 										  clinical = { "description" : description , "review_status" : status } , \
 										  trait = { fields[header.index("xrefs")] : fields[header.index("all_traits")] } )
 				else: # file in the old format
 					var = clinvarvariant( chromosome = fields[header.index("chrom")] , \
-										  start = fields[header.index("pos")] , \
-										  reference = fields[header.index("ref")] , \
-										  alternate = fields[header.index("alt")] , \
+										  start = start , \
+										  stop = stop , \
+										  reference = ref , \
+										  alternate = alt , \
 										  uid = fields[header.index("measureset_id")], \
 										  gene = fields[header.index("symbol")] , \
 										  clinical = { "description" : description , "review_status" : status } , \
 										  trait = { fields[-1] : fields[header.index("all_traits")] } )
-				var.setStopFromReferenceAndAlternate( )
-				var.splitHGVSc( fields[header.index("hgvs_c")] , override = True )
+
+				var.splitHGVSc( fields[header.index("hgvs_c")] , override = False ) # refer to pull request #19
 				var.splitHGVSp( fields[header.index("hgvs_p")] )
 				#var.printVariant( "," )
 				#print( var.proteogenomicVar( ) )
@@ -974,21 +994,35 @@ def parseMacPathogenicity( header, fields ): # addded header argument, so can re
 		else:
 			splitChar="/" # new macarthur format
 
-		if isBenign == 1:
-			for desc in named.split( splitChar ):
-				if re.match( "likely", desc.lower( ) ) and desc != chargervariant.benign:
+		# fixed parsing of conflicting ClinVar classification
+		if isBenign == 1 and isPathogenic == 1 and int(isConflicted) == 0:
+			for desc in named.split(splitChar):
+				if re.match("likely", desc.lower() ) and desc != chargervariant.benign:
 					desc = chargervariant.likelyBenign
-				elif re.match( "benign", desc.lower( ) ):
+				elif re.match( "likely", desc.lower( ) ) and desc != chargervariant.pathogenic:
+					desc = chargervariant.likelyPathogenic
+				elif re.match( "benign", desc.lower() ):
 					desc = chargervariant.benign
 					break
-
-		if isPathogenic == 1:
-			for desc in named.split( splitChar ):
-				if re.match( "likely", desc.lower( ) ) and desc != chargervariant.pathogenic:
-					desc = chargervariant.likelyPathogenic
 				elif re.match( "pathog", desc.lower( ) ):
 					desc = chargervariant.pathogenic
 					break
+		else:
+			if isBenign == 1:
+				for desc in named.split( splitChar ):
+					if re.match( "likely", desc.lower( ) ) and desc != chargervariant.benign:
+						desc = chargervariant.likelyBenign
+					elif re.match( "benign", desc.lower( ) ):
+						desc = chargervariant.benign
+						break
+
+			if isPathogenic == 1:
+				for desc in named.split( splitChar ):
+					if re.match( "likely", desc.lower( ) ) and desc != chargervariant.pathogenic:
+						desc = chargervariant.likelyPathogenic
+					elif re.match( "pathog", desc.lower( ) ):
+						desc = chargervariant.pathogenic
+						break
 		return [ desc , status ]
 
 	def getMacClinVarVCF( self , vcffile ):

diff --git a/charger/chargervariant.py b/charger/chargervariant.py
@@ -1,7 +1,11 @@
 #!/usr/bin/env python
-# chargervariant - CharGer annotated variants
-# author: Adam D Scott ([email protected]) & Kuan-lin Huang ([email protected])
-# version: v0.0 - 2016*01*13
+# CharGer - Characterization of Germline variants
+# author: 
+#	- Adam D Scott ([email protected])
+#	- Fernanda Martins Rodrigues ([email protected])
+#	- Jay R. Mashl ([email protected])
+#	- Kuan-lin Huang ([email protected])
+# version: v0.5.3 - September, 2019
 
 import pdb
 from biomine.variant.clinvarvariant import clinvarvariant

diff --git a/setup.py b/setup.py
@@ -1,6 +1,6 @@
 #https://docs.python.org/2/distutils/examples.html
 from distutils.core import setup
-version = "0.5.2"
+version = "0.5.3"
 setup( \
 	name = 'CharGer' , 
 	version = version ,