This repository has been archived by the owner on Mar 16, 2022. It is now read-only.
forked from thegenemyers/DAZZ_DB
-
Notifications
You must be signed in to change notification settings - Fork 9
/
rangen.c
187 lines (165 loc) · 5.22 KB
/
rangen.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
/*******************************************************************************************
*
* Synthetic DNA shotgun sequence generator
* Generate a fake genome of size genlen*1Mb long, that has an AT-bias of -b.
* The -r parameter seeds the random number generator for the generation of the genome
* so that one can reproducbile produce the same underlying genome to sample from. If
* missing, then the job id of the invocation seeds the generator. The sequence is
* sent to the standard output in .fasta format.
*
* Author: Gene Myers
* Date : April 2016
*
********************************************************************************************/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <math.h>
static char *Usage = "<genlen:double> [-U] [-b<double(.5)>] [-w<int(80)>] [-r<int>]";
static int GENOME; // -g option * 1Mbp
static double BIAS; // -b option
static int HASR = 0; // -r option is set?
static int SEED; // -r option
static int WIDTH; // -w option
static int UPPER; // -U option
static char *Prog_Name;
// Generate a random DNA sequence of length *len* with an AT-bias of BIAS.
// Uppercase letters if UPPER is set, lowercase otherwise.
static char *random_genome(int len)
{ static char *seq = NULL;
static double x, PRA, PRC, PRG;
int i;
if (seq == NULL)
{ PRA = BIAS/2.;
PRC = (1.-BIAS)/2. + PRA;
PRG = (1.-BIAS)/2. + PRC;
if ((seq = (char *) malloc(WIDTH+1)) == NULL)
{ fprintf(stderr,"%s: Allocating genome sequence\n",Prog_Name);
exit (1);
}
}
if (UPPER)
for (i = 0; i < len; i++)
{ x = drand48();
if (x < PRC)
if (x < PRA)
seq[i] = 'A';
else
seq[i] = 'C';
else
if (x < PRG)
seq[i] = 'G';
else
seq[i] = 'T';
}
else
for (i = 0; i < len; i++)
{ x = drand48();
if (x < PRC)
if (x < PRA)
seq[i] = 'a';
else
seq[i] = 'c';
else
if (x < PRG)
seq[i] = 'g';
else
seq[i] = 't';
}
seq[len] = '\0';
return (seq);
}
int main(int argc, char *argv[])
{ int i, j;
char *eptr;
double glen;
// Process command arguments
//
// Usage: <GenomeLen:double> [-b<double(.5)>] [-r<int>]
Prog_Name = strdup("rangen");
WIDTH = 80;
BIAS = .5;
HASR = 0;
UPPER = 0;
j = 1;
for (i = 1; i < argc; i++)
if (argv[i][0] == '-')
switch (argv[i][1])
{ default:
fprintf(stderr,"%s: %s is an illegal option\n",Prog_Name,argv[i]);
exit (1);
case 'U':
if (argv[i][2] != '\0')
{ fprintf(stderr,"%s: %s is an illegal option\n",Prog_Name,argv[i]);
exit (1);
}
UPPER = 1;
break;
case 'b':
BIAS = strtod(argv[i]+2,&eptr);
if (*eptr != '\0' || argv[i][2] == '\0')
{ fprintf(stderr,"%s: -%c '%s' argument is not a real number\n",
Prog_Name,argv[i][1],argv[i]+2);
exit (1);
}
if (BIAS < 0. || BIAS > 1.)
{ fprintf(stderr,"%s: AT-bias must be in [0,1] (%g)\n",Prog_Name,BIAS);
exit (1);
}
break;
case 'r':
SEED = strtol(argv[i]+2,&eptr,10);
HASR = 1;
if (*eptr != '\0' || argv[i][2] == '\0')
{ fprintf(stderr,"%s: -r argument is not an integer\n",Prog_Name);
exit (1);
}
break;
case 'w':
WIDTH = strtol(argv[i]+2,&eptr,10);
if (*eptr != '\0' || argv[i][2] == '\0')
{ fprintf(stderr,"%s: -w '%s' argument is not an integer\n",Prog_Name,argv[i]+2);
exit (1);
}
if (WIDTH < 0)
{ fprintf(stderr,"%s: Line width must be non-negative (%d)\n",Prog_Name,WIDTH);
exit (1);
}
break;
}
else
argv[j++] = argv[i];
argc = j;
if (argc != 2)
{ fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
fprintf(stderr,"\n");
fprintf(stderr," -r: Random number generator seed (default is process id).\n");
fprintf(stderr," -b: AT vs GC ratio or bias.\n");
fprintf(stderr," -w: Print -w bp per line (default is 80).\n");
fprintf(stderr," -U: Output sequence in upper case (default is lower case).\n");
exit (1);
}
glen = strtod(argv[1],&eptr);
if (*eptr != '\0')
{ fprintf(stderr,"%s: genome length is not a real number\n",Prog_Name);
exit (1);
}
if (glen < 0.)
{ fprintf(stderr,"%s: Genome length must be positive (%g)\n",Prog_Name,glen);
exit (1);
}
GENOME = (int) (glen*1000000.);
// Set up random number generator
if (HASR)
srand48(SEED);
else
srand48(getpid());
// Generate the sequence line at a time where all lines have width WDITH, save the last.
fprintf(stdout,">random len=%d bias=%g\n",GENOME,BIAS);
for (j = 0; j+WIDTH < GENOME; j += WIDTH)
fprintf(stdout,"%s\n",random_genome(WIDTH));
if (j < GENOME)
fprintf(stdout,"%s\n",random_genome(GENOME-j));
exit (0);
}