-
Notifications
You must be signed in to change notification settings - Fork 37
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #34 from ding-lab/v.0.5.3
V.0.5.3
- Loading branch information
Showing
5 changed files
with
74 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,11 @@ | ||
#!/usr/bin/env python | ||
# CharGer - Characterization of Germline variants | ||
# author: Adam D Scott ([email protected]) | ||
# author: Kuan-lin Huang ([email protected]) | ||
# author: Amila Weerasinghe ([email protected]) | ||
# author: R Jay Mashl ([email protected]) | ||
# version: v0.5.2 - 2018*01 | ||
# author: | ||
# - Adam D Scott ([email protected]) | ||
# - Fernanda Martins Rodrigues ([email protected]) | ||
# - Jay R. Mashl ([email protected]) | ||
# - Kuan-lin Huang ([email protected]) | ||
# version: v0.5.3 - September, 2019 | ||
|
||
import sys | ||
import getopt | ||
|
@@ -13,7 +14,7 @@ import time | |
import argparse | ||
|
||
def parseArgs( argv ): | ||
helpText = "\nCharGer - v0.5.2\n\n" | ||
helpText = "\nCharGer - v0.5.3\n\n" | ||
helpText += "Usage: " | ||
helpText += "charger <input file> [options]\n\n" | ||
helpText += "Accepted input data files:\n" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,12 @@ | ||
#!/usr/bin/env python | ||
# autovivification - extends dict | ||
# author: Kuan-lin Huang ([email protected]) & Adam D Scott ([email protected]) | ||
# version: v0.0 - 2016*01*12 | ||
# CharGer - Characterization of Germline variants | ||
# author: | ||
# - Adam D Scott ([email protected]) | ||
# - Fernanda Martins Rodrigues ([email protected]) | ||
# - Jay R. Mashl ([email protected]) | ||
# - Kuan-lin Huang ([email protected]) | ||
# version: v0.5.3 - September, 2019 | ||
|
||
class autovivification(dict): | ||
'''Implementation of perl's autovivification feature.''' | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,7 +5,7 @@ | |
# - Fernanda Martins Rodrigues ([email protected]) | ||
# - Jay R. Mashl ([email protected]) | ||
# - Kuan-lin Huang ([email protected]) | ||
# version: v0.5.2 | ||
# version: v0.5.3 - September, 2019 | ||
|
||
import os | ||
import sys | ||
|
@@ -904,26 +904,46 @@ def getMacClinVarTSV( self , tsvfile ): | |
for line in macFile: | ||
fields = ( line.rstrip( ) ).split( "\t" ) | ||
[ description , status ] = self.parseMacPathogenicity( header, fields ) # no need to specify which fields here anymore; parseMacPathogenicity now knows which specific columns to look for | ||
# fixed coordinates for clinvar file (refer to pull request #19) | ||
pos = int(fields[header.index("pos")]) | ||
ref = fields[header.index("ref")] | ||
alt = fields[header.index("alt")] | ||
if len(ref) == 1 and len(alt) > 1: # insertion | ||
ref = '-' | ||
alt = alt[1:] | ||
start = pos | ||
stop = pos + 1 | ||
elif len(ref) > 1 and len(alt) == 1: # deletion | ||
ref = ref[1:] | ||
alt = '-' | ||
start = pos + 1 | ||
stop = pos + len(ref) | ||
else: # snv | ||
start = pos | ||
stop = pos | ||
|
||
if len(header) > 27: # if yes, file is in the new format | ||
var = clinvarvariant( chromosome = fields[header.index("chrom")] , \ | ||
start = fields[header.index("pos")] , \ | ||
reference = fields[header.index("ref")] , \ | ||
alternate = fields[header.index("alt")] , \ | ||
start = start , \ | ||
stop = stop , \ | ||
reference = ref , \ | ||
alternate = alt , \ | ||
uid = fields[header.index("variation_id")], \ | ||
gene = fields[header.index("symbol")] , \ | ||
clinical = { "description" : description , "review_status" : status } , \ | ||
trait = { fields[header.index("xrefs")] : fields[header.index("all_traits")] } ) | ||
else: # file in the old format | ||
var = clinvarvariant( chromosome = fields[header.index("chrom")] , \ | ||
start = fields[header.index("pos")] , \ | ||
reference = fields[header.index("ref")] , \ | ||
alternate = fields[header.index("alt")] , \ | ||
start = start , \ | ||
stop = stop , \ | ||
reference = ref , \ | ||
alternate = alt , \ | ||
uid = fields[header.index("measureset_id")], \ | ||
gene = fields[header.index("symbol")] , \ | ||
clinical = { "description" : description , "review_status" : status } , \ | ||
trait = { fields[-1] : fields[header.index("all_traits")] } ) | ||
var.setStopFromReferenceAndAlternate( ) | ||
var.splitHGVSc( fields[header.index("hgvs_c")] , override = True ) | ||
|
||
var.splitHGVSc( fields[header.index("hgvs_c")] , override = False ) # refer to pull request #19 | ||
var.splitHGVSp( fields[header.index("hgvs_p")] ) | ||
#var.printVariant( "," ) | ||
#print( var.proteogenomicVar( ) ) | ||
|
@@ -974,21 +994,35 @@ def parseMacPathogenicity( header, fields ): # addded header argument, so can re | |
else: | ||
splitChar="/" # new macarthur format | ||
|
||
if isBenign == 1: | ||
for desc in named.split( splitChar ): | ||
if re.match( "likely", desc.lower( ) ) and desc != chargervariant.benign: | ||
# fixed parsing of conflicting ClinVar classification | ||
if isBenign == 1 and isPathogenic == 1 and int(isConflicted) == 0: | ||
for desc in named.split(splitChar): | ||
if re.match("likely", desc.lower() ) and desc != chargervariant.benign: | ||
desc = chargervariant.likelyBenign | ||
elif re.match( "benign", desc.lower( ) ): | ||
elif re.match( "likely", desc.lower( ) ) and desc != chargervariant.pathogenic: | ||
desc = chargervariant.likelyPathogenic | ||
elif re.match( "benign", desc.lower() ): | ||
desc = chargervariant.benign | ||
break | ||
|
||
if isPathogenic == 1: | ||
for desc in named.split( splitChar ): | ||
if re.match( "likely", desc.lower( ) ) and desc != chargervariant.pathogenic: | ||
desc = chargervariant.likelyPathogenic | ||
elif re.match( "pathog", desc.lower( ) ): | ||
desc = chargervariant.pathogenic | ||
break | ||
else: | ||
if isBenign == 1: | ||
for desc in named.split( splitChar ): | ||
if re.match( "likely", desc.lower( ) ) and desc != chargervariant.benign: | ||
desc = chargervariant.likelyBenign | ||
elif re.match( "benign", desc.lower( ) ): | ||
desc = chargervariant.benign | ||
break | ||
|
||
if isPathogenic == 1: | ||
for desc in named.split( splitChar ): | ||
if re.match( "likely", desc.lower( ) ) and desc != chargervariant.pathogenic: | ||
desc = chargervariant.likelyPathogenic | ||
elif re.match( "pathog", desc.lower( ) ): | ||
desc = chargervariant.pathogenic | ||
break | ||
return [ desc , status ] | ||
|
||
def getMacClinVarVCF( self , vcffile ): | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,11 @@ | ||
#!/usr/bin/env python | ||
# chargervariant - CharGer annotated variants | ||
# author: Adam D Scott ([email protected]) & Kuan-lin Huang ([email protected]) | ||
# version: v0.0 - 2016*01*13 | ||
# CharGer - Characterization of Germline variants | ||
# author: | ||
# - Adam D Scott ([email protected]) | ||
# - Fernanda Martins Rodrigues ([email protected]) | ||
# - Jay R. Mashl ([email protected]) | ||
# - Kuan-lin Huang ([email protected]) | ||
# version: v0.5.3 - September, 2019 | ||
|
||
import pdb | ||
from biomine.variant.clinvarvariant import clinvarvariant | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters