Skip to content

Commit

Permalink
update to v2.1.7
Browse files Browse the repository at this point in the history
  • Loading branch information
YuSugihara committed Apr 7, 2020
1 parent dab6614 commit 1598551
Show file tree
Hide file tree
Showing 6 changed files with 156 additions and 56 deletions.
26 changes: 17 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ $ mutmap -h
usage: mutmap -r <FASTA> -c <BAM|FASTQ> -b <BAM|FASTQ>
-n <INT> -o <OUT_DIR> [-T] [-e <DATABASE>]
MutMap version 2.1.6
MutMap version 2.1.7
optional arguments:
-h, --help show this help message and exit
Expand All @@ -101,7 +101,7 @@ optional arguments:
-d , --min-depth Minimum depth of variants which will be used.
This cutoff will be applied in both of cultivar
and bulk. [8]
-N , --N-rep Number of replicates for simulation to make
-N , --N-rep Number of replicates for simulation to make
null distribution. [5000]
-T, --trim Trim fastq using trimmomatic.
-a , --adapter FASTA of adapter sequences. This will be used
Expand All @@ -123,6 +123,11 @@ optional arguments:
-Q , --min-BQ Minimum base quality in mpileup. [18]
-C , --adjust-MQ "adjust-MQ" in mpileup. Default parameter
is suited for BWA. [50]
--species Consider multiple test correction derived by
Huang et al. (2019). Please spesify a species name.
With this option. QTL-seq produces a theoretical threshold.
Currently, Arabidopsis, Cucumber, Maize, Rapeseed,
Rice, Tobacco, Tomato, Wheat, and Yeast are supported.
-v, --version show program's version number and exit
```

Expand Down Expand Up @@ -216,9 +221,9 @@ $ mutplot -h
usage: mutplot -v <VCF> -o <OUT_DIR> -n <INT> [-w <INT>] [-s <INT>]
[-D <INT>] [-d <INT>] [-N <INT>] [-m <FLOAT>]
[-S <INT>] [-e <DATABASE>] [--igv] [--corr] [--indel]
[-S <INT>] [-e <DATABASE>] [--igv] [--indel]
MutPlot version 2.1.6
MutPlot version 2.1.7
optional arguments:
-h, --help show this help message and exit
Expand All @@ -234,7 +239,7 @@ optional arguments:
-d , --min-depth Minimum depth of variants which will be used.
This cutoff will be applied in both of cultivar
and bulk. [8]
-N , --N-rep Number of replicates for simulation to make
-N , --N-rep Number of replicates for simulation to make
null distribution. [5000]
-m , --min-SNPindex Cutoff of minimum SNP-index for clear results. [0.3]
-S , --strand-bias Filter spurious homo genotypes in cultivar using
Expand All @@ -245,10 +250,11 @@ optional arguments:
-e , --snpEff Predict causal variant using SnpEff. Please
check available databases in SnpEff.
--igv Output IGV format file to check results on IGV.
--corr Use the corrected threshold in Huang et al. (2019).
Please spesify mu_alpha_2 in Huang et al. (2019).
When you specify this option, p99 and p95 has the
same value.
--species Consider multiple test correction derived by
Huang et al. (2019). Please spesify a species name.
With this option. QTL-seq produces a theoretical threshold.
Currently, Arabidopsis, Cucumber, Maize, Rapeseed,
Rice, Tobacco, Tomato, Wheat, and Yeast are supported.
--indel Plot SNP-index with INDEL.
--fig-width Width allocated in chromosome figure. [7.5]
--fig-height Height allocated in chromosome figure. [4.0]
Expand Down Expand Up @@ -294,6 +300,8 @@ Inside of `OUT_DIR` is like below.
| `-- mutmap.vcf.gz.tbi
|-- 40_mutmap
| |-- snp_index.tsv
│ ├── snp_index.p95.tsv
│ ├── snp_index.p99.tsv
| |-- sliding_window.tsv
│ ├── sliding_window.p95.tsv
│ ├── sliding_window.p99.tsv
Expand Down
2 changes: 1 addition & 1 deletion mutmap/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "2.1.6"
__version__ = "2.1.7"
52 changes: 20 additions & 32 deletions mutmap/mutmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,38 +107,26 @@ def mpileup(self):
mp.run()

def mutplot(self):
if args.snpEff is None:
cmd = 'mutplot -v {0}/30_vcf/mutmap.vcf.gz \
-n {1} \
-w {2} \
-s {3} \
-N {4} \
-D {5} \
-d {6} \
-o {0}/40_mutmap'.format(self.out,
self.args.N_bulk,
self.args.window,
self.args.step,
self.args.N_rep,
self.args.max_depth,
self.args.min_depth)
else:
cmd = 'mutplot -v {0}/30_vcf/mutmap.vcf.gz \
-n {1} \
-w {2} \
-s {3} \
-N {4} \
-D {5} \
-d {6} \
-o {0}/40_mutmap \
-e {7}'.format(self.out,
self.args.N_bulk,
self.args.window,
self.args.step,
self.args.N_rep,
self.args.max_depth,
self.args.min_depth,
self.args.snpEff)
cmd = 'mutplot -v {0}/30_vcf/mutmap.vcf.gz \
-n {1} \
-w {2} \
-s {3} \
-N {4} \
-D {5} \
-d {6} \
-o {0}/40_mutmap'.format(self.out,
self.args.N_bulk,
self.args.window,
self.args.step,
self.args.N_rep,
self.args.max_depth,
self.args.min_depth)

if self.args.snpEff is not None:
cmd = cmd + ' -e {}'.format(self.args.snpEff)

if self.args.species is not None:
cmd = cmd + ' --species {}'.format(self.args.species)

cmd = clean_cmd(cmd)
p = sbp.Popen(cmd,
Expand Down
34 changes: 34 additions & 0 deletions mutmap/mutplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,39 @@ def run_snpEff(self):

self.args.vcf = '{}/mutmap.snpEff.vcf'.format(self.out)

def get_outlier_SNPindex(self):
if self.snpEff is None:
snp_index = pd.read_csv('{}/snp_index.tsv'.format(self.out),
sep='\t',
names=['CHROM',
'POSI',
'variant',
'depth',
'p99',
'p95',
'SNPindex'])
else:
snp_index = pd.read_csv('{}/snp_index.tsv'.format(self.out),
sep='\t',
names=['CHROM',
'POSI',
'variant',
'impact',
'depth',
'p99',
'p95',
'SNPindex'])

snp_index[abs(snp_index['p99']) <= \
abs(snp_index['SNPindex'])].to_csv('{}/snp_index.p99.tsv'.format(self.out),
sep='\t',
index=False)

snp_index[abs(snp_index['p95']) <= \
abs(snp_index['SNPindex'])].to_csv('{}/snp_index.p95.tsv'.format(self.out),
sep='\t',
index=False)

def get_outlier_windows(self):
sliding_window = pd.read_csv('{}/sliding_window.tsv'.format(self.out),
sep='\t',
Expand Down Expand Up @@ -143,6 +176,7 @@ def run(self):
pt = Plot(self.args)
pt.run()

self.get_outlier_SNPindex()
self.get_outlier_windows()

if self.args.igv:
Expand Down
41 changes: 34 additions & 7 deletions mutmap/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,24 @@ def mutmap_options(self):
'is suited for BWA. [50]'),
metavar='')

parser.add_argument('--species',
action='store',
choices=['Arabidopsis',
'Cucumber',
'Maize',
'Rapeseed',
'Rice',
'Tobacco',
'Tomato',
'Wheat',
'Yeast'],
help=('Consider multiple test correction derived by\n'
'Huang et al. (2019). Please spesify a species name.\n'
'With this option. QTL-seq produces a theoretical threshold.\n'
'Currently, Arabidopsis, Cucumber, Maize, Rapeseed,\n'
'Rice, Tobacco, Tomato, Wheat, and Yeast are supported.'),
metavar='')

# set version
parser.add_argument('-v',
'--version',
Expand All @@ -215,7 +233,7 @@ def mutplot_options(self):
formatter_class=argparse.RawTextHelpFormatter)
parser.usage = ('mutplot -v <VCF> -o <OUT_DIR> -n <INT> [-w <INT>] [-s <INT>]\n'
' [-D <INT>] [-d <INT>] [-N <INT>] [-m <FLOAT>]\n'
' [-S <INT>] [-e <DATABASE>] [--igv] [--corr] [--indel]')
' [-S <INT>] [-e <DATABASE>] [--igv] [--indel]\n')

# set options
parser.add_argument('-v',
Expand Down Expand Up @@ -321,13 +339,22 @@ def mutplot_options(self):
default=False,
help='Output IGV format file to check results on IGV.')

parser.add_argument('--corr',
parser.add_argument('--species',
action='store',
type=float,
help=('Use the corrected threshold in Huang et al. (2019).\n'
'Please spesify mu_alpha_2 in Huang et al. (2019).\n'
'When you specify this option, p99 and p95 has the\n'
'same value.'),
choices=['Arabidopsis',
'Cucumber',
'Maize',
'Rapeseed',
'Rice',
'Tobacco',
'Tomato',
'Wheat',
'Yeast'],
help=('Consider multiple test correction derived by\n'
'Huang et al. (2019). Please spesify a species name.\n'
'With this option. QTL-seq produces a theoretical threshold.\n'
'Currently, Arabidopsis, Cucumber, Maize, Rapeseed,\n'
'Rice, Tobacco, Tomato, Wheat, and Yeast are supported.'),
metavar='')

parser.add_argument('--indel',
Expand Down
57 changes: 50 additions & 7 deletions mutmap/vcf2index.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def __init__(self, args):
self.out = args.out
self.vcf = args.vcf
self.snpEff = args.snpEff
self.corr = args.corr
self.species = args.species
self.N_bulk = args.N_bulk
self.N_replicates = args.N_rep
self.min_SNPindex = args.min_SNPindex
Expand All @@ -25,9 +25,51 @@ def __init__(self, args):
if self.snpEff is not None:
self.ANN_re = re.compile(';ANN=(.*);*')

if self.corr is not None:
var_fix = 0.25/(2*self.N_bulk)
self.threshold = self.corr*(var_fix**(1/2)) + 0.5
if self.species is not None:
self.u99, self.u95 = self.correct_threshold()

def correct_threshold(self):
if self.species == 'Arabidopsis':
u99 = 3.82
u95 = 3.41

elif self.species == 'Cucumber':
u99 = 4.02
u95 = 3.62

elif self.species == 'Maize':
u99 = 4.11
u95 = 3.72

elif self.species == 'Rapeseed':
u99 = 4.16
u95 = 3.78

elif self.species == 'Rice':
u99 = 4.05
u95 = 3.65

elif self.species == 'Tobacco':
u99 = 4.22
u95 = 3.84

elif self.species == 'Tomato':
u99 = 4.04
u95 = 3.65

elif self.species == 'Wheat':
u99 = 4.21
u95 = 3.83

elif self.species == 'Yeast':
u99 = 4.31
u95 = 3.93

else:
print('You specified not supported species.', file=sys.stderr)
sys.exit(1)

return u99, u95

def get_field(self):
root, ext = os.path.splitext(self.vcf)
Expand Down Expand Up @@ -142,10 +184,11 @@ def calc_SNPindex(self, field_pos):
if record['type'] == 'keep':
variant = self.check_variant_type(REF, ALT)

if self.corr is not None:
p99, p95 = self.threshold, self.threshold
else:
if self.species is None:
p99, p95 = self.F2_simulation(record['bulk_depth'])
else:
p99 = self.u99*0.25/record['bulk_depth'] + 0.5
p95 = self.u95*0.25/record['bulk_depth'] + 0.5

if self.snpEff is None:
snp_index.write(('{}\t{}\t{}\t{}\t'
Expand Down

0 comments on commit 1598551

Please sign in to comment.