From 1598551f4ee84fbdeed6433ea76089a0d33603c3 Mon Sep 17 00:00:00 2001 From: yu57th Date: Tue, 7 Apr 2020 22:17:34 +0900 Subject: [PATCH] update to v2.1.7 --- README.md | 26 ++++++++++++++------- mutmap/__init__.py | 2 +- mutmap/mutmap.py | 52 ++++++++++++++++------------------------- mutmap/mutplot.py | 34 +++++++++++++++++++++++++++ mutmap/params.py | 41 ++++++++++++++++++++++++++------ mutmap/vcf2index.py | 57 +++++++++++++++++++++++++++++++++++++++------ 6 files changed, 156 insertions(+), 56 deletions(-) diff --git a/README.md b/README.md index 2ed2c09..ffcfe2d 100755 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ $ mutmap -h usage: mutmap -r -c -b -n -o [-T] [-e ] -MutMap version 2.1.6 +MutMap version 2.1.7 optional arguments: -h, --help show this help message and exit @@ -101,7 +101,7 @@ optional arguments: -d , --min-depth Minimum depth of variants which will be used. This cutoff will be applied in both of cultivar and bulk. [8] - -N , --N-rep Number of replicates for simulation to make + -N , --N-rep Number of replicates for simulation to make null distribution. [5000] -T, --trim Trim fastq using trimmomatic. -a , --adapter FASTA of adapter sequences. This will be used @@ -123,6 +123,11 @@ optional arguments: -Q , --min-BQ Minimum base quality in mpileup. [18] -C , --adjust-MQ "adjust-MQ" in mpileup. Default parameter is suited for BWA. [50] + --species Consider multiple test correction derived by + Huang et al. (2019). Please spesify a species name. + With this option. QTL-seq produces a theoretical threshold. + Currently, Arabidopsis, Cucumber, Maize, Rapeseed, + Rice, Tobacco, Tomato, Wheat, and Yeast are supported. -v, --version show program's version number and exit ``` @@ -216,9 +221,9 @@ $ mutplot -h usage: mutplot -v -o -n [-w ] [-s ] [-D ] [-d ] [-N ] [-m ] - [-S ] [-e ] [--igv] [--corr] [--indel] + [-S ] [-e ] [--igv] [--indel] -MutPlot version 2.1.6 +MutPlot version 2.1.7 optional arguments: -h, --help show this help message and exit @@ -234,7 +239,7 @@ optional arguments: -d , --min-depth Minimum depth of variants which will be used. This cutoff will be applied in both of cultivar and bulk. [8] - -N , --N-rep Number of replicates for simulation to make + -N , --N-rep Number of replicates for simulation to make null distribution. [5000] -m , --min-SNPindex Cutoff of minimum SNP-index for clear results. [0.3] -S , --strand-bias Filter spurious homo genotypes in cultivar using @@ -245,10 +250,11 @@ optional arguments: -e , --snpEff Predict causal variant using SnpEff. Please check available databases in SnpEff. --igv Output IGV format file to check results on IGV. - --corr Use the corrected threshold in Huang et al. (2019). - Please spesify mu_alpha_2 in Huang et al. (2019). - When you specify this option, p99 and p95 has the - same value. + --species Consider multiple test correction derived by + Huang et al. (2019). Please spesify a species name. + With this option. QTL-seq produces a theoretical threshold. + Currently, Arabidopsis, Cucumber, Maize, Rapeseed, + Rice, Tobacco, Tomato, Wheat, and Yeast are supported. --indel Plot SNP-index with INDEL. --fig-width Width allocated in chromosome figure. [7.5] --fig-height Height allocated in chromosome figure. [4.0] @@ -294,6 +300,8 @@ Inside of `OUT_DIR` is like below. | `-- mutmap.vcf.gz.tbi |-- 40_mutmap | |-- snp_index.tsv +│ ├── snp_index.p95.tsv +│ ├── snp_index.p99.tsv | |-- sliding_window.tsv │ ├── sliding_window.p95.tsv │ ├── sliding_window.p99.tsv diff --git a/mutmap/__init__.py b/mutmap/__init__.py index edc60b3..b6d2a0c 100644 --- a/mutmap/__init__.py +++ b/mutmap/__init__.py @@ -1 +1 @@ -__version__ = "2.1.6" +__version__ = "2.1.7" diff --git a/mutmap/mutmap.py b/mutmap/mutmap.py index 69a049d..ac0674f 100755 --- a/mutmap/mutmap.py +++ b/mutmap/mutmap.py @@ -107,38 +107,26 @@ def mpileup(self): mp.run() def mutplot(self): - if args.snpEff is None: - cmd = 'mutplot -v {0}/30_vcf/mutmap.vcf.gz \ - -n {1} \ - -w {2} \ - -s {3} \ - -N {4} \ - -D {5} \ - -d {6} \ - -o {0}/40_mutmap'.format(self.out, - self.args.N_bulk, - self.args.window, - self.args.step, - self.args.N_rep, - self.args.max_depth, - self.args.min_depth) - else: - cmd = 'mutplot -v {0}/30_vcf/mutmap.vcf.gz \ - -n {1} \ - -w {2} \ - -s {3} \ - -N {4} \ - -D {5} \ - -d {6} \ - -o {0}/40_mutmap \ - -e {7}'.format(self.out, - self.args.N_bulk, - self.args.window, - self.args.step, - self.args.N_rep, - self.args.max_depth, - self.args.min_depth, - self.args.snpEff) + cmd = 'mutplot -v {0}/30_vcf/mutmap.vcf.gz \ + -n {1} \ + -w {2} \ + -s {3} \ + -N {4} \ + -D {5} \ + -d {6} \ + -o {0}/40_mutmap'.format(self.out, + self.args.N_bulk, + self.args.window, + self.args.step, + self.args.N_rep, + self.args.max_depth, + self.args.min_depth) + + if self.args.snpEff is not None: + cmd = cmd + ' -e {}'.format(self.args.snpEff) + + if self.args.species is not None: + cmd = cmd + ' --species {}'.format(self.args.species) cmd = clean_cmd(cmd) p = sbp.Popen(cmd, diff --git a/mutmap/mutplot.py b/mutmap/mutplot.py index 61cbe25..16df72b 100755 --- a/mutmap/mutplot.py +++ b/mutmap/mutplot.py @@ -45,6 +45,39 @@ def run_snpEff(self): self.args.vcf = '{}/mutmap.snpEff.vcf'.format(self.out) + def get_outlier_SNPindex(self): + if self.snpEff is None: + snp_index = pd.read_csv('{}/snp_index.tsv'.format(self.out), + sep='\t', + names=['CHROM', + 'POSI', + 'variant', + 'depth', + 'p99', + 'p95', + 'SNPindex']) + else: + snp_index = pd.read_csv('{}/snp_index.tsv'.format(self.out), + sep='\t', + names=['CHROM', + 'POSI', + 'variant', + 'impact', + 'depth', + 'p99', + 'p95', + 'SNPindex']) + + snp_index[abs(snp_index['p99']) <= \ + abs(snp_index['SNPindex'])].to_csv('{}/snp_index.p99.tsv'.format(self.out), + sep='\t', + index=False) + + snp_index[abs(snp_index['p95']) <= \ + abs(snp_index['SNPindex'])].to_csv('{}/snp_index.p95.tsv'.format(self.out), + sep='\t', + index=False) + def get_outlier_windows(self): sliding_window = pd.read_csv('{}/sliding_window.tsv'.format(self.out), sep='\t', @@ -143,6 +176,7 @@ def run(self): pt = Plot(self.args) pt.run() + self.get_outlier_SNPindex() self.get_outlier_windows() if self.args.igv: diff --git a/mutmap/params.py b/mutmap/params.py index 7aeb334..8da987b 100755 --- a/mutmap/params.py +++ b/mutmap/params.py @@ -202,6 +202,24 @@ def mutmap_options(self): 'is suited for BWA. [50]'), metavar='') + parser.add_argument('--species', + action='store', + choices=['Arabidopsis', + 'Cucumber', + 'Maize', + 'Rapeseed', + 'Rice', + 'Tobacco', + 'Tomato', + 'Wheat', + 'Yeast'], + help=('Consider multiple test correction derived by\n' + 'Huang et al. (2019). Please spesify a species name.\n' + 'With this option. QTL-seq produces a theoretical threshold.\n' + 'Currently, Arabidopsis, Cucumber, Maize, Rapeseed,\n' + 'Rice, Tobacco, Tomato, Wheat, and Yeast are supported.'), + metavar='') + # set version parser.add_argument('-v', '--version', @@ -215,7 +233,7 @@ def mutplot_options(self): formatter_class=argparse.RawTextHelpFormatter) parser.usage = ('mutplot -v -o -n [-w ] [-s ]\n' ' [-D ] [-d ] [-N ] [-m ]\n' - ' [-S ] [-e ] [--igv] [--corr] [--indel]') + ' [-S ] [-e ] [--igv] [--indel]\n') # set options parser.add_argument('-v', @@ -321,13 +339,22 @@ def mutplot_options(self): default=False, help='Output IGV format file to check results on IGV.') - parser.add_argument('--corr', + parser.add_argument('--species', action='store', - type=float, - help=('Use the corrected threshold in Huang et al. (2019).\n' - 'Please spesify mu_alpha_2 in Huang et al. (2019).\n' - 'When you specify this option, p99 and p95 has the\n' - 'same value.'), + choices=['Arabidopsis', + 'Cucumber', + 'Maize', + 'Rapeseed', + 'Rice', + 'Tobacco', + 'Tomato', + 'Wheat', + 'Yeast'], + help=('Consider multiple test correction derived by\n' + 'Huang et al. (2019). Please spesify a species name.\n' + 'With this option. QTL-seq produces a theoretical threshold.\n' + 'Currently, Arabidopsis, Cucumber, Maize, Rapeseed,\n' + 'Rice, Tobacco, Tomato, Wheat, and Yeast are supported.'), metavar='') parser.add_argument('--indel', diff --git a/mutmap/vcf2index.py b/mutmap/vcf2index.py index 40fb8a9..d16f6bd 100755 --- a/mutmap/vcf2index.py +++ b/mutmap/vcf2index.py @@ -16,7 +16,7 @@ def __init__(self, args): self.out = args.out self.vcf = args.vcf self.snpEff = args.snpEff - self.corr = args.corr + self.species = args.species self.N_bulk = args.N_bulk self.N_replicates = args.N_rep self.min_SNPindex = args.min_SNPindex @@ -25,9 +25,51 @@ def __init__(self, args): if self.snpEff is not None: self.ANN_re = re.compile(';ANN=(.*);*') - if self.corr is not None: - var_fix = 0.25/(2*self.N_bulk) - self.threshold = self.corr*(var_fix**(1/2)) + 0.5 + if self.species is not None: + self.u99, self.u95 = self.correct_threshold() + + def correct_threshold(self): + if self.species == 'Arabidopsis': + u99 = 3.82 + u95 = 3.41 + + elif self.species == 'Cucumber': + u99 = 4.02 + u95 = 3.62 + + elif self.species == 'Maize': + u99 = 4.11 + u95 = 3.72 + + elif self.species == 'Rapeseed': + u99 = 4.16 + u95 = 3.78 + + elif self.species == 'Rice': + u99 = 4.05 + u95 = 3.65 + + elif self.species == 'Tobacco': + u99 = 4.22 + u95 = 3.84 + + elif self.species == 'Tomato': + u99 = 4.04 + u95 = 3.65 + + elif self.species == 'Wheat': + u99 = 4.21 + u95 = 3.83 + + elif self.species == 'Yeast': + u99 = 4.31 + u95 = 3.93 + + else: + print('You specified not supported species.', file=sys.stderr) + sys.exit(1) + + return u99, u95 def get_field(self): root, ext = os.path.splitext(self.vcf) @@ -142,10 +184,11 @@ def calc_SNPindex(self, field_pos): if record['type'] == 'keep': variant = self.check_variant_type(REF, ALT) - if self.corr is not None: - p99, p95 = self.threshold, self.threshold - else: + if self.species is None: p99, p95 = self.F2_simulation(record['bulk_depth']) + else: + p99 = self.u99*0.25/record['bulk_depth'] + 0.5 + p95 = self.u95*0.25/record['bulk_depth'] + 0.5 if self.snpEff is None: snp_index.write(('{}\t{}\t{}\t{}\t'