-
Notifications
You must be signed in to change notification settings - Fork 1
/
merge_bwt.cpp
117 lines (95 loc) · 2.9 KB
/
merge_bwt.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
/*
* merge_bwt.cpp
*
* Created on: Nov 26, 2018
* Author: nico
*/
#include <iostream>
#include <fstream>
#include "internal/dna_bwt.hpp"
#include "internal/bwt.hpp"
#include "internal/bwt_merger.hpp"
using namespace std;
string input_bwt1;
string input_bwt2;
string output_file;
bool out_da = false;
uint8_t lcp_size = 0;
void help(){
cout << "merge_bwt [options]" << endl <<
"Merges eBWTs of two read sets. Only A,C,G,T, and terminator # are allowed in the input eBWTs." <<
"Options:" << endl <<
"-h Print this help" << endl <<
"-1 <arg> Input BWT index 1 (REQUIRED)" << endl <<
"-2 <arg> Input BWT index 2 (REQUIRED)" << endl <<
"-o <arg> Output prefix (REQUIRED)" << endl <<
"-d Output document array as an ASCII file of 0/1. Default: do not output." << endl <<
"-l <arg> Output LCP of the merged BWT using <arg>=0,1,2,4,8 Bytes" << endl <<
" per integer. If arg=0, LCP is not computed (faster). Default: 0." << endl;
exit(0);
}
int main(int argc, char** argv){
if(argc < 4) help();
int opt;
while ((opt = getopt(argc, argv, "h1:2:o:l:d")) != -1){
switch (opt){
case 'h':
help();
break;
case '1':
input_bwt1 = string(optarg);
break;
case '2':
input_bwt2 = string(optarg);
break;
case 'o':
output_file = string(optarg);
break;
case 'l':
lcp_size = atoi(optarg);
break;
case 'd':
out_da = true;
break;
default:
help();
return -1;
}
}
if(input_bwt1.size()==0) help();
if(input_bwt2.size()==0) help();
if(output_file.size()==0) help();
cout << "Input bwt index file 1: " << input_bwt1 << endl;
cout << "Input bwt index file 2: " << input_bwt2 << endl;
cout << "Output prefix: " << output_file << endl;
cout << "Loading BWTs ... " << endl;
dna_bwt_t BWT1;
dna_bwt_t BWT2;
BWT1.load_from_file(input_bwt1);
BWT2.load_from_file(input_bwt2);
cout << "Done. Size of BWTs: " << BWT1.size() << " and " << BWT2.size() << endl;
switch(lcp_size){
case 0: { bwt_merger<dna_bwt_t,dna_bwt_t, uint8_t> M0(&BWT1, &BWT2, false, out_da);
cout << "Storing output to file ... " << endl;
M0.save_to_file(output_file);
break; }
case 1: { bwt_merger<dna_bwt_t,dna_bwt_t, uint8_t> M1(&BWT1, &BWT2, true, out_da);
cout << "Storing output to file ... " << endl;
M1.save_to_file(output_file);
break;}
case 2: {bwt_merger<dna_bwt_t,dna_bwt_t, uint16_t> M2(&BWT1, &BWT2, true, out_da);
cout << "Storing output to file ... " << endl;
M2.save_to_file(output_file);
break;}
case 4: {bwt_merger<dna_bwt_t,dna_bwt_t, uint32_t> M4(&BWT1, &BWT2, true, out_da);
cout << "Storing output to file ... " << endl;
M4.save_to_file(output_file);
break;}
case 8: {bwt_merger<dna_bwt_t,dna_bwt_t, uint64_t> M8(&BWT1, &BWT2, true, out_da);
cout << "Storing output to file ... " << endl;
M8.save_to_file(output_file);
break;}
default:break;
}
cout << "Done. " << endl;
}