forked from bakagirl/Arachne-WWW-browser
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ENTITY.C
291 lines (270 loc) · 7.22 KB
/
ENTITY.C
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
// ===============================================================================
// HTML/2.0 entities (defined in <fontpaht>/fontinfo.bin)
// ===============================================================================
#include "arachne.h"
unsigned char HTMLentity(char *name)
{
if(!strcmpi(name,"lt"))
return '<';
else
if(!strcmpi(name,"gt"))
return '>';
else
if(!strcmpi(name,"amp"))
return '&';
else
if(!strcmpi(name,"quot"))
return '\"';
//!!glennmcc: Jan 04, 2005 -- 'bullet'
else
if(!strcmpi(name,"bul") || !strcmpi(name,"bull") || !strcmpi(name,"bullet"))
return atoi("183");
//!!glennmcc: end
//!!glennmcc: MAY 30, 2005 -- 'trade' == small 'TM' Trade Mark symbol
else
if(!strcmpi(name,"trade"))
return atoi("153");
//!!glennmcc: end
//!!glennmcc: May 27, 2007 -- next section no-longer needed
//!!glennmcc: May 27, 2007 -- read entity conversions from entity.cfg
//using this format
/*
[Entity conversions]
lsquo `
rsquo '
ldquo "
rdquo "
mdash -
ndash -
*/
else
if(configvariable(&ENTITYcfg,name,NULL))
return *configvariable(&ENTITYcfg,name,NULL);
//!!glennmcc: end
/*
//commented-out in favor of entity.cfg method above
//!!glennmcc: May 26, 2007 -- lsquo & rsquo
else
if(!strcmpi(name,"lsquo"))// lsquo == left single quote
return atoi("96"); // 96 == `
else
if(!strcmpi(name,"rsquo"))// rsquo == right single quote
return atoi("39"); // 39 == '
else
//ldquo == left double quote, rdquo == right double quote
if(!strcmpi(name,"ldquo") || !strcmpi(name,"rdquo"))
return atoi("34"); // 34 == "
//!!glennmcc: end
//!!glennmcc: May 27, 2007 -- mdash and ndash
else
if(!strcmpi(name,"mdash") || !strcmpi(name,"ndash"))
return atoi("45"); // 45 == -
//!!glennmcc: end
*/
//end commenting
//!!glennmcc: May 29, 2007 -- moved numeric section to after named section
//!!glennmcc: Feb 05, 2007 -- interpret HEX, OCT and DEC format entities
// HEX ' == OCT &#\37; == DEC '
// HEX – == OCT &#\20023; == DEC –
// Now using a combination of methods from Joe and Ray
// -- Eureka !! we have perfection... (or darned close to it) ;-)
if(*name=='#')
{
int value;
char *number="\0";
if(name[1]=='x' || name[1]=='X')
value=(int)strtoul(&name[2],NULL,16);//HEX format
//!!glennmcc: Feb 07, 2007 -- OCTAL is not needed after-all,
//but I'll leave this here just in case it's needed some time in the future
// else
// if(name[1]=='\\')
// value=(int)strtoul(&name[2],NULL,8);//OCT format
else
value=(int)strtoul(&name[1],NULL,10);//DEC format
//!!glennmcc: begin Dec 29, 2004 -- 'fix' for punctuation in numeric code format
//!!glennmcc: Mar 27, 2005 -- added 8222 == "
//!!glennmcc: May 26, 2007 -- added 145 == `
//!!glennmcc: May 27, 2007 -- added 8212 == -
//!!glennmcc: May 29, 2007 -- next section no-longer needed
//also read numeric entity conversions from entity.cfg
if(value>127
&& configvariable(&ENTITYcfg,itoa(value,number,10),NULL))
return *configvariable(&ENTITYcfg,itoa(value,number,10),NULL);
/*
if (value==146 || value==8217)
return 39; // 39 == '
else if (value==8216 || value==145)
return 96; //96 == `
else if (value==8220 || value==8221 || value==8222)
return 34; // 34 == "
else if (value==8211 || value==8212)
return 45; // 45 == -
*/
//!!glennmcc: May 30, 2007 --
else if (value==8194 || value==8195 || value==8201)
return 32; // 32 == space
//!!glennmcc: end
//!!glennmcc: Mar 26, 2008 --
else if (value==710)
return 94; // 94 == caret
else if (value==732)
return 126; // 126 == tilde
//!!glennmcc: end
else if(value>255)
return 127; // 127 ==
else
return value;
}
//!!glennmcc: end -- Dec 29, 2004
//!!glennmcc: Feb 06, 2007 -- commented-out entire original block
// in-favor of new method above for HEX, OCT and DEC format entities
// the original method supported only DEC format
/*
if (*name=='#')
//!!glennmcc: begin Dec 29, 2004 -- 'fix' for punctuation in numeric code format
//!!glennmcc: Mar 27, 2005 -- added 8222 == "
{
if (!strcmpi(name,"#146") || !strcmpi(name,"#8217"))
return atoi("39"); // 39 == '
else if (!strcmpi(name,"#8216"))
return atoi("96"); // 96 == `
else if (!strcmpi(name,"#8220") || !strcmpi(name,"#8221") || !strcmpi(name,"#8222"))
return atoi("34"); // 34 == "
else if (!strcmpi(name,"#8211"))
return atoi("45"); // 45 == -
else if (atoi(&name[1])>255)
return atoi("127"); // 127 ==
else
//!!glennmcc: end
return atoi(&name[1]);
}
*/
else//------------------------- ISO Latin entities
{
int i=0,l=strlen(name);
while(i<128)
{
if(!strncmp(finf->entity[i],name,l))
return (unsigned char)128+i;
i++;
}
}
if (!strcmpi(name,"copy"))
return 'c';
else if (!strcmpi(name,"reg"))
return 'r';
else if (!strcmpi(name,"middot"))
//!!glennmcc: Jun 12, 2005
return 183;
//return 127;
//!!glennmcc: end
//!!glennmcc: May 29, 2007 -- added 'ensp, emsp and thinsp'
else
if(!strcmpi(name,"sp") || !strcmpi(name,"ensp")
|| !strcmpi(name,"emsp") || !strcmpi(name,"thinsp"))
return ' ';
else if (!strcmpi(name,"nbsp"))
return 160;
else
return name[0]; // "Aacute" -> 'A'
}
void entity2str(char *str)
{
int i=0,j=0,l=strlen(str);
char *ptr;
while(i<l)
{
//!!glennmcc: Mar 06, 2005 -- fix '&' and '&' in URL bug
//http://www.cisnet.com/glennmcc/testing_&_symbol/&-amp-bug.htm
if (str[i]=='&' && !(i<l && str[i+1]==' ') &&
(str[i+2]==';' || str[i+3]==';' || str[i+4]==';' ||
str[i+5]==';' || str[i+6]==';'))
// if (str[i]=='&' && !(i<l && str[i+1]==' ')) //original line
//!!glennmcc: end
{
ptr=&str[++i];
while(i<l && str[i]!=';') i++;
str[i++]='\0';
str[j]=HTMLentity(ptr);
if((unsigned char)str[j]==160) //
str[j]=' ';
}
else
str[j]=str[i++];
j++;
}//loop
str[j]='\0';
}
/*
if(!strcmpi(name,"reg"))
return 174; //(R)
else
if(!strcmp(name,"aacute"))
return ' ';
else
if(!strcmp(name,"Aacute"))
return '';
else
if(!strcmp(name,"auml"))
return 228;
else
if(!strcmp(name,"Auml"))
return 196;
else
if(!strcmp(name,"Eacute"))
return '';
else
if(!strcmp(name,"eacute"))
return '‚';
else
if(!strcmp(name,"iacute"))
return '¡';
else
if(!strcmp(name,"Iacute"))
return '‹';
else
if(!strcmp(name,"ntilde"))
return '¤';
else
if(!strcmp(name,"Ntilde"))
return '¥';
else
if(!strcmp(name,"oacute"))
return '¢';
else
if(!strcmp(name,"Oacute"))
return '•';
else
if(!strcmp(name,"ouml"))
return 246;
else
if(!strcmp(name,"Ouml"))
return 214;
else
if(!strcmp(name,"szlig"))
return 223;
else
if(!strcmp(name,"uuml"))
return 252;
else
if(!strcmp(name,"Uuml"))
return 220;
else
if(!strcmp(name,"uacute"))
return '£';
else
if(!strcmp(name,"Uacute"))
return '—';
else
if(!strcmp(name,"uacute"))
return '£';
else
if(!strcmp(name,"Uacute"))
return '—';
else
if(!strcmp(name,"yacute"))
return '˜';
else
if(!strcmp(name,"Yacute"))
return '';
*/