Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gzip support #35

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ taxonomy ID. Additional options are specified below.
Optional:
* `-o2, --output2 OUTPUT.FASTA.............`second output FASTA/Q file with extracted seqs (for paired reads)
* `--fastq-output..........................`Instead of producing FASTA files, print FASTQ files (requires FASTQ input)
* `--gzip-output...........................`Gzip output
* `--exclude...............................`Instead of finding reads matching specified taxids, finds reads NOT matching specified taxids.
* `-r, --report MYFILE.KREPORT.............`Kraken report file (required if specifying --include-children or --include-parents)
* `--include-children......................`include reads classified at more specific levels than specified taxonomy ID levels.
Expand Down
8 changes: 6 additions & 2 deletions combine_kreports.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
# - main
# - process_kraken_report
####################################################################
import gzip
import os, sys, argparse
import operator
from time import gmtime
Expand Down Expand Up @@ -197,8 +198,11 @@ def main():
sys.stdout.write("\r\t%i/%i samples processed" % (count_samples, num_samples))
sys.stdout.flush()
id2files[count_samples] = r_file
#Open File
curr_file = open(r_file,'r')
#Open File
if r_file.endswith(".gz"):
curr_file = gzip.open(r_file,'rt')
else:
curr_file = open(r_file, 'r')
for line in curr_file:
report_vals = process_kraken_report(line)
if len(report_vals) < 5:
Expand Down
25 changes: 19 additions & 6 deletions extract_kraken_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,8 @@ def main():
parser.add_argument('--fastq-output', dest='fastq_out', required=False,
action='store_true',default=False,
help='Print output FASTQ reads [requires input FASTQ, default: output is FASTA]')
parser.add_argument('--gzip-output', dest='gzip_output', required=False,
action='store_true', default=False)
parser.set_defaults(append=False)

args=parser.parse_args()
Expand Down Expand Up @@ -343,13 +345,24 @@ def main():
sys.stdout.flush()
#Open output file
if (args.append):
o_file = open(args.output_file, 'a')
if args.output_file2 != '':
o_file2 = open(args.output_file2, 'a')
if not args.gzip_output:
o_file = open(args.output_file, 'a')
if args.output_file2 != '':
o_file2 = open(args.output_file2, 'a')
else:
o_file = gzip.open(args.output_file, 'wta')
if args.output_file2 != '':
o_file2 = gzip.open(args.output_file2, 'wta')
else:
o_file = open(args.output_file, 'w')
if args.output_file2 != '':
o_file2 = open(args.output_file2, 'w')
if not args.gzip_output:
o_file = open(args.output_file, 'w')
if args.output_file2 != '':
o_file2 = open(args.output_file2, 'w')
else:
o_file = gzip.open(args.output_file, 'wt')
if args.output_file2 != '':
o_file2 = gzip.open(args.output_file2, 'wt')

#Process SEQUENCE 1 file
count_seqs = 0
count_output = 0
Expand Down