-
Notifications
You must be signed in to change notification settings - Fork 1
/
tccexsymtab.c
2410 lines (1996 loc) · 79.9 KB
/
tccexsymtab.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* TCC - Tiny C Compiler
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "tcc.h"
/* tccgen: asm_label cleanu */
//#define BEFORE_hash_opt
#define FUNC_STR func_str.str
/*****************************************************************************/
/* exsymtab_token_hash */
/*****************************************************************************/
/* algorithm djb2 from http://www.cse.yorku.ca/~oz/hash.html */
unsigned long _token_hash_hash_string(token_string_hash * tsh, const char *str)
{
unsigned long hash = 5381;
int c;
while ((c = *str++))
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
return hash & (tsh->N_buckets - 1);
}
token_string_hash * token_string_hash_new()
{
/* Allocate enough room for the hash's internal data, and four
* buckets (which means three extra buckets) */
token_string_hash * to_return = tcc_mallocz(sizeof(token_string_hash));
to_return->buckets = tcc_mallocz(4 * sizeof(void*));
to_return->N_buckets = 4;
return to_return;
}
/* token_string_hash_find: internal function. Returns a reference
* (pointer) to the address of the linked list element associated with
* the given name. It may be that the element does not exist, in which
* case dereferencing will yeild a null pointer. The memory location
* itself is guaranteed to exist, so you can allocate new memory and
* store a new value if so desired. */
token_string_hash_linked_list** _token_string_hash_get_ll_ref(
token_string_hash * tsh, const char * name
) {
/* find the associated bucket */
token_string_hash_linked_list ** to_return
= tsh->buckets + _token_hash_hash_string(tsh, name);
/* Check the names of all elements in the bucket until we find it */
while(*to_return) {
if (strcmp((*to_return)->name, name) == 0) return to_return;
to_return = &((*to_return)->next);
}
return to_return;
}
void _token_string_hash_extend(token_string_hash * tsh)
{
/* Back up the old buckets */
int n;
token_string_hash_linked_list ** old_buckets = tsh->buckets;
int old_N_buckets = tsh->N_buckets;
/* Extend the new buckets by a factor of 2 */
tsh->N_buckets <<= 1;
tsh->buckets = tcc_mallocz(tsh->N_buckets * sizeof(void*));
/* rehash the data */
for (n = 0; n < old_N_buckets; n++) {
token_string_hash_linked_list * curr_ll = old_buckets[n];
while(curr_ll != NULL) {
token_string_hash_linked_list * tmp;
/* Make the new slot point to this linked list object */
*(_token_string_hash_get_ll_ref(tsh, curr_ll->name)) = curr_ll;
/* Make sure the old "next" is cleared, since it probably
* won't be in this bucket after rehashing. */
tmp = curr_ll->next;
curr_ll->next = NULL;
/* On to the old "next" */
curr_ll = tmp;
}
}
tcc_free(old_buckets);
}
/* Returns a reference to the data slot of the token string hash entry
* for the given name, creating the hash entry if necessary. */
void ** token_string_hash_get_ref(token_string_hash * tsh, const char * name)
{
token_string_hash_linked_list** ll_slot = _token_string_hash_get_ll_ref(tsh, name);
token_string_hash_linked_list* return_container = *ll_slot;
/* create a new entry if necessary */
if (return_container == NULL)
{
return_container = tcc_mallocz(sizeof(token_string_hash_linked_list) + strlen(name));
strcpy(return_container->name, name);
*ll_slot = return_container;
/* Rehash if too big; note rehashing does not invalidate return_container */
if (++tsh->N > tsh->N_buckets) _token_string_hash_extend(tsh);
}
return &(return_container->data);
}
/* string_string_hash_count: returns the number of elements in the hash table */
int token_string_hash_count(token_string_hash * tsh) {
return tsh->N;
}
void token_string_hash_free(token_string_hash * tsh)
{
int n;
if (tsh == NULL) return;
for (n = 0; n < tsh->N_buckets; n++) {
token_string_hash_linked_list * curr_ll = tsh->buckets[n];
while(curr_ll != NULL) {
token_string_hash_linked_list * tmp = curr_ll->next;
tcc_free(curr_ll);
curr_ll = tmp;
}
}
tcc_free(tsh->buckets);
tcc_free(tsh);
}
/****************************************************************************/
/* ram hash */
/****************************************************************************/
/* This provides a mechanism for mapping a set of old pointers to a set of
* new pointers. Assuming that a collection of data structures are being
* copied, this basically provides an interface to say, "What is the new
* address for this old address?"
*
* The current hashing function is pretty basic, taken from this discussion:
* http://stackoverflow.com/questions/20953390/what-is-the-fastest-hash-function-for-pointers
* However, it performs pretty well. With perl.h, the maximum bucket depth
* is 6, and on average each bucket has about 1.3 entries.
*
* As currently implemented, you should create a new ram_hash with
* ram_hash_new and free the memory associated with your ram_hash with
* ram_hash_free:
*
* ram_hash * my_ram_hash = ram_hash_new();
*
*/
ram_hash_linked_list * ram_hash_get(ram_hash * rh, void * key);
ram_hash * ram_hash_new()
{
ram_hash * to_return = tcc_mallocz(sizeof(ram_hash));
to_return->N_buckets = 4;
to_return->buckets = tcc_mallocz(4*sizeof(ram_hash_linked_list));
/* Add entry for null pointer */
ram_hash_get(to_return, NULL)->value = NULL;
return to_return;
}
/* ram_hash_hash_ptr: internal function. Returns the bucket offset
* for a given pointer, i.e. it hashes the pointer value. */
uintptr_t ram_hash_hash_ptr(ram_hash * rh, void * old)
{
uintptr_t hashed = (uintptr_t)old;
/* shift and mask out bits we don't want */
return (hashed >> 5) & (rh->N_buckets - 1);
}
/* ram_hash_find: internal function. Returns the ram_hash_linked_list
* element for a given pointer, if the pointer is already in the hash. */
ram_hash_linked_list* ram_hash_find(ram_hash * rh, void * old)
{
/* find the associated bucket */
ram_hash_linked_list * to_return
= rh->buckets + ram_hash_hash_ptr(rh, old);
while(to_return) {
if (to_return->key == old) return to_return;
to_return = to_return->next;
}
return NULL;
}
/* ram_hash_get: Internal function. Returns the ram_hash_linked_list
* element for the given key, creating if necessary. This does not
* check if the hash has to be rehashed, which is why it is internal
* only. */
ram_hash_linked_list * ram_hash_get(ram_hash * rh, void * key)
{
/* find the associated bucket */
ram_hash_linked_list * curr_el
= rh->buckets + ram_hash_hash_ptr(rh, key);
if (curr_el->key == NULL || curr_el->key == key) {
curr_el->key = key;
return curr_el;
}
while(curr_el->next != NULL) {
curr_el = curr_el->next;
if (curr_el->key == key)
return curr_el;
}
/* out here, curr_el->next is null, so allocate a new element */
curr_el->next = tcc_mallocz(sizeof(ram_hash_linked_list));
curr_el->next->key = key;
return curr_el->next;
}
/* ram_hash_rehash: internal function. Given a ram_hash, increments the
* number of log_buckets and rehashes the contents. */
void ram_hash_rehash(ram_hash * rh)
{
/* back up old-bucket data */
int i;
int old_N_buckets = rh->N_buckets;
ram_hash_linked_list * old_buckets = rh->buckets;
/* Allocate new buckets */
rh->N_buckets <<= 1;
rh->buckets
= tcc_mallocz(rh->N_buckets * sizeof(ram_hash_linked_list));
/* Add everything */
for (i = 0; i < old_N_buckets; i++) {
ram_hash_linked_list * curr;
ram_hash_linked_list * next;
ram_hash_linked_list * bucket_head = old_buckets + i;
/* add the head, or move on if empty */
if (bucket_head->key == NULL) continue;
ram_hash_get(rh, bucket_head->key)->value = bucket_head->value;
/* move on if there are no non-head elements */
if (bucket_head->next == NULL) continue;
/* process non-head elements */
curr = bucket_head->next;
do {
next = curr->next;
ram_hash_get(rh, curr->key)->value = curr->value;
tcc_free(curr);
curr = next;
} while(curr != NULL);
}
tcc_free(old_buckets);
}
/* ram_hash_get_ref: returns a *reference* to the data slot for the
* given a pointer address that you want mapped, creating the slot if
* necessary. The reason this returns a reference to the pointer rather
* than the pointer itself is so that you can work with the result as an
* lvalue.
*
* void ** p_data = ram_hash_get_ref(my_ram_hash, old_ptr);
* if (*p_data == NULL) {
* *p_data = create_new_data();
* }
*/
void ** ram_hash_get_ref(ram_hash * rh, void * old)
{
/* Does it already exist? */
ram_hash_linked_list * container = ram_hash_find(rh, old);
if (container != NULL) return &(container->value);
/* No. Rehash if the buckets are full */
if (rh->N == rh->N_buckets - 1) ram_hash_rehash(rh);
rh->N++;
/* Add the element and return the result */
container = ram_hash_get(rh, old);
return &(container->value);
}
/* ram_hash_describe: semi-internal, describes hash table statistics */
void ram_hash_describe(ram_hash * rh)
{
int N_filled = 0;
int max_occupancy = 0;
int i;
printf("Ram tree has %d buckets for %d elements\n", rh->N_buckets, rh->N);
for (i = 0; i < rh->N_buckets; i++) if(rh->buckets[i].key != NULL)
{
ram_hash_linked_list * curr;
int this_occupancy = 1;
N_filled++;
curr = rh->buckets + i;
while(curr->next != NULL) {
this_occupancy++;
curr = curr->next;
}
if (max_occupancy < this_occupancy) max_occupancy = this_occupancy;
}
printf("%d buckets are filled, with an average occupancy of %f\n",
N_filled, (float)rh->N / (float)N_filled);
printf("Maximum occupancy is %d\n", max_occupancy);
}
/* ram_hash_count: returns the number of elements in the hash table */
int ram_hash_count(ram_hash * rh) {
return rh->N;
}
/* void ** ram_hash_iterate(void * ram_hash, void ** iter_data)
* Iterates through the ram_hash data, returning a reference to new leaf
* with each call. The void ** iter_data is a reference to a void pointer
* that is used by the iterator to store state between calls. You should
* call this function like so:
*
* void * iter_data = NULL;
* do {
* void ** data_ref = ram_hash_iterate(ram_hash, &iter_data);
* ...
* } while (iter_data != NULL);
*
* For example, to count the number of entries, you could do this
*
* void * iter_data = NULL;
* int count = 0;
* do {
* count++;
* ram_hash_iterate(ram_hash, &iter_data);
* } while (iter_data != NULL;
*
* To free data referenced by all leaf pointers, use this
*
* void * iter_data = NULL;
* void ** ptr_ref;
* do {
* ptr_ref = ram_hash_iterate(ram_hash, &iter_data);
* free(*ptr_ref);
* } while (iter_data != NULL;
*
* State is allocated on the heap. For the moment, the only way to free
* the state information is to iterate through all of the data.
*/
/* points to the *next* element, or is set to null */
typedef struct {
int bucket;
ram_hash_linked_list * next;
} rt_next_data;
void ** ram_hash_iterate(ram_hash * rh, void ** p_next_data)
{
int i;
rt_next_data * next_data;
void ** to_return;
if (rh == NULL) return NULL;
if (rh->N == 0) return NULL;
/* dereference the pointer they passed in */
next_data = *p_next_data;
/* If the next data is not initialized, then we allocate memory
* and point it to the first element. */
if (next_data == NULL) {
next_data = tcc_mallocz(sizeof(rt_next_data));
*p_next_data = next_data;
for (i = 0; i < rh->N_buckets; i++) {
if (rh->buckets[i].key != NULL) {
next_data->bucket = i;
next_data->next = rh->buckets + i;
break;
}
}
}
/* hold on to the address to return */
to_return = &(next_data->next->value);
/* move next_data forward */
if (next_data->next->next != NULL) {
next_data->next = next_data->next->next;
return to_return;
}
for (i = next_data->bucket + 1; i < rh->N_buckets; i++) {
if (rh->buckets[i].key != NULL) {
next_data->bucket = i;
next_data->next = rh->buckets + i;
return to_return;
}
}
/* out here means we need to free the next_data */
tcc_free(next_data);
*p_next_data = NULL;
return to_return;
}
/* ram_hash_free(void * ram_hash)
* Frees memory associated with a ram_hash. Does not do anything with
* the leaves. Use ram_hash_iterate to go through the leaves and take
* care of memory allocations stored there.
*/
void ram_hash_free(ram_hash * rh)
{
int i;
if (rh == NULL) return;
for (i = 0; i < rh->N_buckets; i++) {
ram_hash_linked_list * curr;
if (rh->buckets[i].next == NULL) continue;
curr = rh->buckets[i].next;
do {
ram_hash_linked_list * next = curr->next;
tcc_free(curr);
curr = next;
} while(curr != NULL);
}
tcc_free(rh->buckets);
tcc_free(rh);
}
/******************************************************************************/
/* compiled symbol lookup */
/******************************************************************************/
void dump_sym_names(TCCState *state)
{
Section * s;
ElfW(Sym) *sym;
int sym_index;
const char *name;
s = state->symtab;
sym_index = 2;
sym = &((ElfW(Sym) *)s->data)[sym_index];
name = (char *) s->link->data + sym->st_name;
while (strcmp("_etext", name) != 0) {
printf("%s: sym_index = %d, st_shndx is %x, address is %p\n", name, sym_index, sym->st_shndx, (void*)sym->st_value);
sym_index++;
sym = &((ElfW(Sym) *)s->data)[sym_index];
name = (char *) s->link->data + sym->st_name;
}
}
char * type_lookup_table[16] = {
"int", "char", "short", "void",
"pointer", "enum", "func", "struct",
"float", "double", "long double", "bool",
"long long", "long", "qlong", "qfloat"
};
void tcc_dump_identifier_names(extended_symtab * symtab, char * outfile)
{
int i;
FILE * out_fh = fopen(outfile, "w");
/* report error? I think a lack of file will probably be sufficient */
if (!out_fh) return;
for (i = 0; symtab->tokenSym_list + i < symtab->tokenSym_last; i++) {
int btype;
TokenSym * ts;
Sym * curr_sym;
ts = symtab->tokenSym_list[i];
if (!ts->sym_identifier) continue;
curr_sym = ts->sym_identifier;
/* only indicate the things that have external linkage */
if ((curr_sym->type.t & (VT_EXTERN | VT_STATIC)) != VT_EXTERN) continue;
if (curr_sym->type.t & VT_TYPEDEF) continue;
/* name */
fprintf(out_fh, "%s ", ts->str);
/* qualifiers */
if (curr_sym->type.t & VT_CONSTANT) fprintf(out_fh, "constant ");
/* type */
btype = curr_sym->type.t & VT_BTYPE;
fprintf(out_fh, "%s\n", type_lookup_table[btype]);
}
fclose(out_fh);
}
/* tcc_get_next_extended_symbol_name: a simple mechanism for getting the names
* of all of the global symbols known to the extended symbol table. */
char * tcc_get_next_extended_symbol_name(extended_symtab * symtab, int * poffset)
{
/* Increment the counter to get to the next TokenSym */
for ((*poffset)++; symtab->tokenSym_list + *poffset < symtab->tokenSym_last; (*poffset)++)
{
TokenSym * ts = symtab->tokenSym_list[*poffset];
if (ts->sym_identifier) return ts->str;
}
/* Reached end of list. Reset the counter and return null */
*poffset = -1;
return NULL;
}
void copy_extended_symbols_to_exsymtab(TCCState *state)
{
Section * s;
ElfW(Sym) *sym;
int sym_index;
const char *name;
extended_symtab* exsymtab;
exsymtab = state->exsymtab;
s = state->symtab;
sym_index = 2;
sym = &((ElfW(Sym) *)s->data)[sym_index];
name = (char *) s->link->data + sym->st_name;
while (strcmp("_etext", name) != 0) {
if (name[0] == 'L' && name[1] == '.') {
/* Skip constants */
}
else {
/* Copy the symbol's pointer into the hash_next field of the TokenSym */
TokenSym * ts = tcc_get_extended_tokensym(exsymtab, name);
if (ts == NULL) {
tcc_warning("Global symbol %s does not exist in extended symbol table; not copying\n",
name);
}
else {
ts->hash_next = (void*)sym->st_value;
}
}
/* Next iteration */
sym_index++;
sym = &((ElfW(Sym) *)s->data)[sym_index];
name = (char *) s->link->data + sym->st_name;
}
}
/* A value of NULL for exsymtab means that the extended symtab was not supposed
* to be generated in the first place. A value of 1 means that it is supposed to
* be created, but the state hasn't compiled yet. Otherwise, we have a fully
* formed extended symbol table, which we can return. In that case, we assume
* that the user takes responsibility for cleaning it up. */
LIBTCCAPI extended_symtab * tcc_get_extended_symbol_table(TCCState * s)
{
extended_symtab * to_return;
if (s->exsymtab <= (extended_symtab*)1) return NULL;
/* clear the pointer value; otherwise we would free it, leading to a
* double-free situation when the user also frees it. */
to_return = s->exsymtab;
s->exsymtab = (extended_symtab*)1;
return to_return;
}
LIBTCCAPI TokenSym* tcc_get_extended_tokensym(extended_symtab* symtab, const char * name)
{
/* delegate to the symtab's trie */
return (TokenSym*)(*token_string_hash_get_ref(symtab->tsh, name));
}
LIBTCCAPI void * tcc_get_extended_symbol(extended_symtab * symtab, const char * name)
{
TokenSym * ts = tcc_get_extended_tokensym(symtab, name);
if (ts == NULL) return NULL;
return (void*) ts->hash_next;
}
/******************************************************************************/
/* extended symtab copy */
/******************************************************************************/
/* The user may want fine-grained control over the order of symbol table lookup.
* Thus, I provide a set of callbacks to look for names, add symbols to compiler
* contexts, and prep the compiler state before things get started. */
LIBTCCAPI void tcc_set_extended_symtab_callbacks (
TCCState * s,
extended_symtab_lookup_by_name_callback new_name_callback,
extended_symtab_sym_used_callback new_sym_used_callback,
extended_symtab_prep_callback new_prep_callback,
void * data
) {
s->symtab_name_callback = new_name_callback;
s->symtab_sym_used_callback = new_sym_used_callback;
s->symtab_prep_callback = new_prep_callback;
s->symtab_callback_data = data;
}
LIBTCCAPI void tcc_save_extended_symtab(TCCState * s) {
if (s->exsymtab == NULL) s->exsymtab = (extended_symtab*)1;
}
Sym * get_new_symtab_pointer (Sym * old, ram_hash * rh)
{
void ** Sym_ref;
Sym * to_return;
int btype;
/* Handle the null case up-front */
if (old == NULL) return NULL;
/* Check the global symbol stack. */
Sym_ref = ram_hash_get_ref(rh, old);
to_return = *Sym_ref;
if (NULL != to_return) return to_return;
/* Create new sym. Note that mallocz sets lots of things to null
* for me. :-) */
to_return = *Sym_ref = tcc_mallocz(sizeof(Sym));
/* See tcc.h around line 425 for descriptions of some of the fields.
* See also tccgen.c line 5987 to see what needs to happen for function
* declarations to work properly (and, in turn, line 446 for how to
* push a forward reference). */
/* Copy the v value (token id). This will not be copied later, so keep
* things simple for now and simply strip out the extended flag. */
to_return->v = old->v & ~SYM_EXTENDED;
/* Copy the assembler label token id. Just like the v field, we copy
* this unmodified. */
/* XXX do we need to strip out SYM_EXTENDED? It seems unlikely. */
to_return->asm_label = old->asm_label;
/* associated register. For variables, I believe that the low bits
* specify the register size that can hold the value while high bits
* indicate storage details (VT_SYM, VT_LVAL, etc). For function types,
* however, this gets cast as an AttributeDef and queried for function
* attributes; so far, I have only seen the .r field queried for the
* FUNC_CALL field. It matters little; copying the whole long is easy
* and it seems that everything works fine when it is the same for
* consuming contexts as for the original compilation context. */
to_return->r = old->r;
/* Set the type. Judging by the constants in tcc.h and code that
* uses this field, I'm pretty sure that the low bits in the .t field
* tells tcc how to load the data into a register. The high bits seem to
* indicate storage details, such as VT_EXTERN. Since that is not
* something that can be extended at runtime, I should be able to copy
* the value as-is and add an extern flag for variables and functions. */
to_return->type.t = old->type.t;
/* After compilation, functions and global variables point to hard
* locations in memory. Consuming contexts should think of these as
* having external storage, which is reflected in the VT_EXTERN bit of
* the type.t field. */
btype = old->type.t & VT_BTYPE;
if (btype == VT_FUNC || to_return->r & (VT_SYM | VT_LVAL))
to_return->type.t |= VT_EXTERN;
/* Remove static indicator from functions */
if ((btype == VT_FUNC) && (old->type.t & VT_STATIC))
to_return->type.t &= ~VT_STATIC;
/* Static inline functions are the exception to this rule, so undo
* the above work for them. */
if ((old->type.t & (VT_INLINE | VT_STATIC)) == (VT_INLINE | VT_STATIC))
to_return->type.t = old->type.t;
/* The type.ref field contains something useful only if the basic type
* is a pointer, struct, or function. See code from tccgen's
* compare_types for details. */
if (btype == VT_PTR || btype == VT_STRUCT || btype == VT_FUNC) {
to_return->type.ref = get_new_symtab_pointer(old->type.ref, rh);
}
/* Copy the c field, the "associated number." According to tcc-doc.texi
* as well as the comments just above the definition of put_extern_sym2,
* the c field will (for some Syms) point to an external symbol in an
* associated section. But this is not true for all Syms. For structs,
* this is the size (in bytes), and for struct members it is the byte
* offset of the member, according to the end of struct_decl(). For
* variable length arrays, this is "the location on the stack that holds
* the runtime sizeof for the type." For functions, I believe this is
* one of FUNC_NEW, FUNC_OLD, or FUNC_ELLIPSIS. At any rate, everything
* seems to work if I simply set it to zero for functions and global
* variables and copy it otherwise, so I'm going with that. This probably
* needs to be more nuanced. */
if (btype == VT_FUNC || to_return->r & (VT_SYM | VT_LVAL))
to_return->c = 0;
else
to_return->c = old->c;
/* Copy the next symbol field. Labels and gotos are tracked in a
* separate stack, so for these Symbols we focus on next, not
* jnext. The next field (I seem to recall) is used in storing
* argument lists, so it needs to be copied for function
* types. I believe it can be copied anonymously. */
to_return->next = get_new_symtab_pointer(old->next, rh);
return to_return;
}
Sym * get_new_deftab_pointer (Sym * old, ram_hash * rh)
{
void ** Sym_ref;
Sym * to_return;
/* Handle the null case up-front */
if (old == NULL) return NULL;
/* Does this exist in the ram hash? */
Sym_ref = ram_hash_get_ref(rh, old);
to_return = *Sym_ref;
if (to_return != NULL) return to_return;
/* Create a new define object. See symtab pointer copy above for
* descriptions of some of the fields. */
to_return = *Sym_ref = tcc_mallocz(sizeof(Sym));
/* Convert the symbol's token index. */
to_return->v = old->v & ~SYM_EXTENDED;
/* As far as I can tell, the 'r' field is not used by
* preprocessor macros. Just copy it in the off-chance I'm wrong. */
to_return->r = old->r;
/* Copy the tokenstream if it exists */
if (old->d != NULL) {
int * str = old->d;
int len = tokenstream_len(str);
to_return->d = tcc_malloc(sizeof(int) * len);
/* The extended symbol table's token ids are identical to the
* originals, so we can just copy the token stream verbatim! */
memcpy(to_return->d, old->d, sizeof(int) * len);
}
/* Set the type. define_push and parse_define indicate that this
* will be either MACRO_OBJ or MACRO_FUNC. */
to_return->type.t = old->type.t;
/* Copy the macro arguments. */
to_return->next = get_new_deftab_pointer(old->next, rh);
return to_return;
}
int should_copy_TokenSym(TokenSym * to_check, int tok_start)
{
/* Copy all tokens that come after tok_start */
if (to_check->tok >= tok_start) return 1;
/* Always ignore these, no matter what */
if (to_check->tok == TOK___LINE__
|| to_check->tok == TOK___FILE__
|| to_check->tok == TOK___DATE__
|| to_check->tok == TOK___TIME__
) return 0;
/* For the handful of specially declared tokens (like push, pop, etc),
* decide based on contents. */
if ((to_check->sym_define != NULL && to_check->sym_define->d != NULL)
|| to_check->sym_struct != NULL
|| to_check->sym_identifier != NULL
) return 1;
return 0;
}
/* Make a complete copy of the TokenSym and Sym tables, using a ram_hash
* for the latter. */
void copy_extended_symtab (TCCState * s, Sym * define_start, int tok_start)
{
int i, N_tokens, tok_start_offset;
extended_symtab * to_return;
int curr_tok_idx;
ram_hash * sym_rh;
ram_hash * def_rh;
/* Do nothing if we have an empty TCCState. */
if (NULL == s) return;
/* Count the number of tokens that we'll store whose token ids come
* before tok_start. (We know we'll at least have everything after
* and including tok_start.) */
tok_start_offset = 0;
for (i = 0; i < tok_start - TOK_IDENT; i++) {
if (should_copy_TokenSym(table_ident[i], tok_start)) tok_start_offset++;
}
N_tokens = tok_ident - tok_start + tok_start_offset;
/* Room for the first TokenSym is included in the struct definition, so I
* need to allocate room for the extended symtab plus N_tokens - 1. */
to_return = tcc_malloc(sizeof(extended_symtab) + sizeof(void*) * (N_tokens - 1));
to_return->tok_start = tok_start;
to_return->tok_start_offset = tok_start_offset;
/* Allocate the token string and ram hashes */
to_return->tsh = token_string_hash_new();
sym_rh = to_return->sym_rh = ram_hash_new();
def_rh = to_return->def_rh = ram_hash_new();
to_return->N_syms = 0; /* Setting to zero indicates that the data */
to_return->N_defs = 0; /* are in hashes, not arrays */
/* Copy all TokenSyms and their dependent Syms */
curr_tok_idx = 0;
for (i = 0; i < tok_ident - TOK_IDENT; i++)
{
TokenSym * tok_copy = table_ident[i];
int tokensym_size;
TokenSym * tok_sym;
if (!should_copy_TokenSym(tok_copy, tok_start)) continue;
tokensym_size = sizeof(TokenSym) + tok_copy->len;
tok_sym = to_return->tokenSym_list[curr_tok_idx++]
= tcc_malloc(tokensym_size);
/* Follow the code from tok_alloc_new in tccpp.c */
tok_sym->tok = tok_copy->tok;
tok_sym->sym_define
= get_new_deftab_pointer(tok_copy->sym_define, def_rh);
tok_sym->sym_label = NULL; /* Not copying labels */
tok_sym->sym_struct
= get_new_symtab_pointer(tok_copy->sym_struct, sym_rh);
tok_sym->sym_identifier
= get_new_symtab_pointer(tok_copy->sym_identifier, sym_rh);
tok_sym->len = tok_copy->len;
tok_sym->hash_next = NULL;
memcpy(tok_sym->str, tok_copy->str, tok_copy->len);
tok_sym->str[tok_copy->len] = '\0';
/* Add this to the token string hash */
*token_string_hash_get_ref(to_return->tsh, tok_sym->str) = tok_sym;
}
/* Set the tail pointer, which points to the first address past the
* last element. */
to_return->tokenSym_last = to_return->tokenSym_list + N_tokens;
/* Copy the collection of inline functions */
if (s->nb_inline_fns > 0)
{
int N;
InlineFunc* new_func;
InlineFunc* old_func;
/* make room for the number of inline functions */
N = s->nb_inline_fns;
to_return->N_inline_funcs = N;
to_return->inline_funcs = tcc_malloc(N * sizeof(InlineFunc*));
/* Copy each inline function verbatim. Based on the behavior of
* get_new_deftab_pointer, I do not need to update any token ids.
*/
for (i = 0; i < s->nb_inline_fns; i++)
{
int ts_len;
old_func = s->inline_fns[i];
new_func = tcc_malloc(sizeof *new_func + strlen(old_func->filename));
strcpy(new_func->filename, old_func->filename);
new_func->sym = get_new_symtab_pointer(old_func->sym, sym_rh);
/* Copy the token stream, WITHOUT replacement (see copy_extended_tokensym
* for contrast) */
ts_len = tokenstream_len(old_func->FUNC_STR);
new_func->FUNC_STR = tcc_malloc(ts_len * sizeof(int));
memcpy(new_func->FUNC_STR, old_func->FUNC_STR, ts_len * sizeof(int));
to_return->inline_funcs[i] = new_func;
}
}
else {
to_return->inline_funcs = 0;
to_return->N_inline_funcs = 0;
}
/* Store the extended symtab */
s->exsymtab = to_return;
}
void exsymtab_free_sym (Sym * to_delete, int is_def)
{
if (to_delete == NULL) return;
/* If it's a define Sym, delete the token stream */
if (is_def) tcc_free(to_delete->d);
}
/* Frees memory associated with a copied extended symbol table. For a
* description of the structure of the allocated memory, see the copy
* function above. */
LIBTCCAPI void tcc_delete_extended_symbol_table (extended_symtab * symtab)
{
TokenSym** ts_to_delete;
TokenSym** done;
if (symtab == NULL) return;
if (symtab->sym_list != NULL)
{
/* Sym memory handling depends on storage type. If N_syms is
* zero, then its stored via a ram_hash. */
if (symtab->N_syms == 0)
{
/* Iterate through all Syms in the ram tree */
if (symtab->sym_rh->N > 0)
{
void * iterator_data = NULL;
do {
void ** data_ref = ram_hash_iterate(symtab->sym_rh, &iterator_data);
exsymtab_free_sym((Sym *)*data_ref, 0);
/* Clear the symbol itself */
tcc_free(*data_ref);
} while (iterator_data != NULL);
}
/* clean up the ram_hash itself */
ram_hash_free(symtab->sym_rh);
}
else
{
int i;
/* Iterate through all Syms in the list */
for (i = 0; i < symtab->N_syms; i++)
exsymtab_free_sym(symtab->sym_list + i, 0);
/* Clean up the sym list itself */
tcc_free(symtab->sym_list);
}
}
/* Perform identical steps for define Syms. */
if (symtab->def_list != NULL)
{
if (symtab->N_defs == 0) {
if (symtab->def_rh->N > 0)
{
void * iterator_data = NULL;
do {
void ** data_ref = ram_hash_iterate(symtab->def_rh, &iterator_data);
exsymtab_free_sym((Sym *)*data_ref, 1);
tcc_free(*data_ref);
} while (iterator_data != NULL);
}
ram_hash_free(symtab->def_rh);
}
else {
int i;
for (i = 0; i < symtab->N_defs; i++)
exsymtab_free_sym(symtab->def_list + i, 1);
tcc_free(symtab->def_list);
}
}
/* Clear out the token string hash table */
token_string_hash_free(symtab->tsh);
/* Clear out the allocated TokenSym pointers */
ts_to_delete = symtab->tokenSym_list;
done = symtab->tokenSym_last;
while (ts_to_delete < done) {
tcc_free(*ts_to_delete);
ts_to_delete++;
}
/* Clear out the inline functions */
if (symtab->inline_funcs) {
int i;
for (i = 0; i < symtab->N_inline_funcs; i++) {
tcc_free(symtab->inline_funcs[i]->FUNC_STR);
tcc_free(symtab->inline_funcs[i]);