-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtab_to_plink_binary.cpp
111 lines (98 loc) · 2.76 KB
/
tab_to_plink_binary.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#include "plink_binary.h"
#include <iostream>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <vector>
/*
* Construct a binary ped file from genotype data in a matrix format
*/
using namespace std;
void line_split(char *s, vector<string> &toks);
char *getline_unlimited(FILE *f, int ignore_cr, int strip_nl);
int main(int argc, char *argv[])
{
char c;
string chromosome = "0";
char missing = 'N';
while ((c = getopt(argc, argv, "c:m:")) != -1) {
switch (c) {
case 'c':
chromosome = optarg;
break;
case 'm':
missing = *optarg;
break;
}
}
if (argc <= optind) {
cout << "Usage: " << argv[0] << " [ options ] PLINK_BINARY" << endl;
cout << "Options: -c chromosome number" << endl;
cout << " -m missing genotype character (default " << missing << ")" << endl;
return 1;
}
char *buffer;
plink_binary *pb = new plink_binary();
buffer = getline_unlimited(stdin, 1, 1);
vector<string> data;
line_split(buffer, data);
for (unsigned int i = 0; i < data.size(); i++) {
gftools::individual ind;
ind.name = data[i];
pb->individuals.push_back(ind);
}
pb->open(argv[optind], 1);
pb->missing_genotype = missing;
while ((buffer = getline_unlimited(stdin, 1, 1))) {
line_split(buffer, data);
gftools::snp snp;
snp.name = data[0];
snp.chromosome = chromosome;
vector<string> genotypes;
for (unsigned int i = 1; i < data.size(); i++)
genotypes.push_back(data[i].substr(0, 2));
pb->write_snp(snp, genotypes);
}
pb->close();
delete pb;
}
void line_split(char *s, vector<string> &toks)
{
toks.resize(0);
char *p = s;
if (p[strlen(p) - 1] == '\n') p[strlen(p) - 1] = '\0';
p = strtok(p, "\t");
while (p != NULL) {
toks.push_back(string(p));
p = strtok(NULL, "\t");
}
free(p);
}
// Read an arbitrarily long line
char *getline_unlimited(FILE *f, int ignore_cr, int strip_nl)
{
size_t len = 0;
static char *buf = NULL;
static size_t buflen = 0;
int c;
while (! feof(f) && (c = fgetc(f)) != '\n' && c != EOF) {
if (ignore_cr && c == '\r') continue;
if (len >= buflen) {
buflen += BUFSIZ;
if ((buf = (char *)realloc(buf, buflen)) == NULL) {
fprintf(stderr, "Memory alloc problem reading\n");
exit(5);
}
if (len == 0) buf[0] = 0;
}
buf[len++] = c;
}
if (buf) {
if (strip_nl && buf[len] == '\n')
buf[len] = 0;
else
buf[len++] = 0;
}
if (feof(f)) return NULL;
return buf;
}