-
Notifications
You must be signed in to change notification settings - Fork 18
/
regex.c
253 lines (207 loc) · 5.06 KB
/
regex.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
/*
* regex.c: String and regex operations for odt2txt
*
* Copyright (c) 2006-2009 Dennis Stosberg <[email protected]>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License,
* version 2 as published by the Free Software Foundation
*/
#include "mem.h"
#include "regex.h"
#define BUF_SZ 4096
static char *headline(char line, const char *buf, regmatch_t matches[],
size_t nmatch, size_t off);
static size_t charlen_utf8(const char *s);
static void print_regexp_err(int reg_errno, const regex_t *rx)
{
char *buf = ymalloc(BUF_SZ);
regerror(reg_errno, rx, buf, BUF_SZ);
fprintf(stderr, "%s\n", buf);
yfree(buf);
}
int regex_subst(STRBUF *buf,
const char *regex, int regopt,
const void *subst)
{
int r;
const char *bufp;
size_t off = 0;
const int i = 0;
int match_count = 0;
regex_t rx;
const size_t nmatches = 10;
regmatch_t matches[10];
r = regcomp(&rx, regex, REG_EXTENDED);
if (r) {
print_regexp_err(r, &rx);
exit(EXIT_FAILURE);
}
do {
if (off > strbuf_len(buf))
break;
bufp = strbuf_get(buf) + off;
#ifdef REG_STARTEND
matches[0].rm_so = 0;
matches[0].rm_eo = strbuf_len(buf) - off;
if (0 != regexec(&rx, bufp, nmatches, matches, REG_STARTEND))
#else
if (0 != regexec(&rx, bufp, nmatches, matches, 0))
#endif
break;
if (matches[i].rm_so != -1) {
char *s;
int subst_len;
if (regopt & _REG_EXEC) {
s = (*(char *(*)
(const char *buf, regmatch_t matches[],
size_t nmatch, size_t off))subst)
(strbuf_get(buf), matches, nmatches, off);
} else
s = (char*)subst;
subst_len = strbuf_subst(buf,
matches[i].rm_so + off,
matches[i].rm_eo + off,
s);
match_count++;
if (regopt & _REG_EXEC)
yfree(s);
off += matches[i].rm_so;
if (subst_len >= 0)
off += subst_len + 1;
}
} while (regopt & _REG_GLOBAL);
regfree(&rx);
return match_count;
}
int regex_rm(STRBUF *buf,
const char *regex, int regopt)
{
return regex_subst(buf, regex, regopt, "");
}
char *underline(char linechar, const char *str)
{
size_t i;
char *tmp;
STRBUF *line;
size_t charlen = charlen_utf8(str);
if (str[0] == '\0') {
tmp = ymalloc(1);
tmp[0] = '\0';
return tmp;
}
line = strbuf_new();
strbuf_append(line, str);
strbuf_append(line, "\n");
tmp = ymalloc(charlen);
for (i = 0; i < charlen; i++) {
tmp[i] = linechar;
}
strbuf_append_n(line, tmp, charlen);
yfree(tmp);
strbuf_append(line, "\n\n");
return strbuf_spit(line);
}
static char *headline(char line, const char *buf, regmatch_t matches[],
size_t nmatch, size_t off)
{
const int i = 1;
char *result;
size_t len;
char *match;
len = matches[i].rm_eo - matches[i].rm_so;
match = ymalloc(len + 1);
memcpy(match, buf + matches[i].rm_so + off, len);
match[len] = '\0' ;
result = underline(line, match);
yfree(match);
return result;
}
char *h1(const char *buf, regmatch_t matches[], size_t nmatch, size_t off)
{
return headline('=', buf, matches, nmatch, off);
}
char *h2(const char *buf, regmatch_t matches[], size_t nmatch, size_t off)
{
return headline('-', buf, matches, nmatch, off);
}
char *image(const char *buf, regmatch_t matches[], size_t nmatch, size_t off)
{
const int i = 1;
const char *prefix = "[-- Image: ";
const char *postfix = " --]";
size_t pr_len, po_len, len;
char *match;
pr_len = strlen(prefix);
len = matches[i].rm_eo - matches[i].rm_so;
po_len = strlen(prefix);
match = ymalloc(pr_len + len + po_len + 1);
memcpy(match, prefix, pr_len);
memcpy(match + pr_len, buf + matches[i].rm_so + off, len);
memcpy(match + pr_len + len, postfix, po_len);
match[pr_len + len + po_len] = '\0' ;
return match;
}
static size_t charlen_utf8(const char *s)
{
size_t count = 0;
unsigned char *t = (unsigned char*) s;
while (*t != '\0') {
if (*t > 0x80)
t += utf8_length[*t - 0x80];
count++;
t++;
}
return count;
}
STRBUF *wrap(STRBUF *buf, int width)
{
const char *lf = "\n";
const size_t lflen = strlen(lf);
const char *bufp;
const char *last;
const char *lastspace = 0;
size_t linelen = 0;
STRBUF *out = strbuf_new();
bufp = strbuf_get(buf);
last = bufp;
if (width == -1) {
strbuf_append_n(out, strbuf_get(buf), strbuf_len(buf));
return out;
}
strbuf_append_n(out, lf, lflen);
while(bufp - strbuf_get(buf) < (ptrdiff_t)strbuf_len(buf)) {
if (*bufp == ' ')
lastspace = bufp;
else if (*bufp == '\n') {
strbuf_append_n(out, last, (size_t)(bufp - last));
do {
strbuf_append_n(out, lf, lflen);
} while (*++bufp == '\n');
lastspace = NULL;
while(*bufp == ' ') {
bufp++;
}
last = bufp;
linelen = 0;
}
if (NULL != lastspace && (int)linelen > width) {
strbuf_append_n(out, last, (size_t)(lastspace - last));
strbuf_append_n(out, lf, lflen);
last = lastspace;
lastspace = NULL;
linelen = (size_t)(bufp - last);
while(*last == ' ') {
last++;
}
if(last > bufp)
bufp = last;
}
bufp++;
linelen++;
if ((unsigned char)*bufp > 0x80)
bufp += utf8_length[(unsigned char)*bufp - 0x80];
}
strbuf_append_n(out, "\n", 1);
return out;
}