From 3cbe86e6f7e6b3b05ab600886e4ead5d5940b0ad Mon Sep 17 00:00:00 2001 From: Joomy Korkut Date: Mon, 16 Oct 2023 11:26:10 -0400 Subject: [PATCH 1/5] Use value type in Clight backend --- theories/Codegen/LambdaANF_to_Clight.v | 4 +++- theories/Codegen/LambdaANF_to_Clight_stack.v | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/theories/Codegen/LambdaANF_to_Clight.v b/theories/Codegen/LambdaANF_to_Clight.v index 5d457d8d..0e57c4d5 100644 --- a/theories/Codegen/LambdaANF_to_Clight.v +++ b/theories/Codegen/LambdaANF_to_Clight.v @@ -260,7 +260,9 @@ Definition ulongTy : type := Definition int_chunk : memory_chunk := if Archi.ptr64 then Mint64 else Mint32. (* NOTE for val: in Clight, SIZEOF_PTR == SIZEOF_INT *) -Definition val : type := if Archi.ptr64 then ulongTy else uintTy. + +(* Definition val : type := if Archi.ptr64 then ulongTy else uintTy. *) +Definition val : type := talignas (if Archi.ptr64 then 3%N else 2%N) (tptr tvoid). Definition uval : type := if Archi.ptr64 then ulongTy else uintTy. Definition sval : type := if Archi.ptr64 then longTy else intTy. Definition val_typ : typ := if Archi.ptr64 then AST.Tlong else Tany32. diff --git a/theories/Codegen/LambdaANF_to_Clight_stack.v b/theories/Codegen/LambdaANF_to_Clight_stack.v index 695f4837..3cf0f646 100644 --- a/theories/Codegen/LambdaANF_to_Clight_stack.v +++ b/theories/Codegen/LambdaANF_to_Clight_stack.v @@ -297,7 +297,9 @@ Notation ulongTy := (Tlong Unsigned Definition int_chunk := if Archi.ptr64 then Mint64 else Mint32. (* NOTE for val: in Clight, SIZEOF_PTR == SIZEOF_INT *) -Definition val := if Archi.ptr64 then ulongTy else uintTy. + +Definition val : type := talignas (if Archi.ptr64 then 3%N else 2%N) (tptr tvoid). +(* Definition val := if Archi.ptr64 then ulongTy else uintTy. *) Definition uval := if Archi.ptr64 then ulongTy else uintTy. Definition sval := if Archi.ptr64 then longTy else intTy. Definition val_typ := if Archi.ptr64 then (AST.Tlong:typ) else (Tany32:typ). From 85f6ef7346cedcf0978460588e6cce52f1ac53bd Mon Sep 17 00:00:00 2001 From: Joomy Korkut Date: Tue, 17 Oct 2023 05:51:16 -0400 Subject: [PATCH 2/5] Include gc.h in compiled C --- cplugin/static/printClight.ml | 2 +- plugin/static/printClight.ml | 2 +- theories/Glue/glue.v | 32 ++++++++++++++++---------------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/cplugin/static/printClight.ml b/cplugin/static/printClight.ml index e173e4a8..8e414d31 100644 --- a/cplugin/static/printClight.ml +++ b/cplugin/static/printClight.ml @@ -361,7 +361,7 @@ let print_dest_names_imports prog names (dest : string) (imports : string list) List.iter (fun n -> add_name (remove_primes n)) names; let fm = formatter_of_out_channel oc in open_vbox 0; - List.iter (fun s -> fprintf fm "#include \"%s\"" s; pp_print_newline fm ();) ("values.h" :: imports); + List.iter (fun s -> fprintf fm "#include \"%s\"" s; pp_print_newline fm ();) ("gc.h" :: imports); close_box (); open_box 0; print_program Clight2 fm prog; diff --git a/plugin/static/printClight.ml b/plugin/static/printClight.ml index 03e1fc15..7a22dafd 100755 --- a/plugin/static/printClight.ml +++ b/plugin/static/printClight.ml @@ -361,7 +361,7 @@ let print_dest_names_imports prog names (dest : string) (imports : string list) List.iter (fun n -> add_name (remove_primes n)) names; let fm = formatter_of_out_channel oc in open_vbox 0; - List.iter (fun s -> fprintf fm "#include \"%s\"" s; pp_print_newline fm ();) ("values.h" :: imports); + List.iter (fun s -> fprintf fm "#include \"%s\"" s; pp_print_newline fm ();) ("gc.h" :: imports); close_box (); open_box 0; print_program Clight2 fm prog; diff --git a/theories/Glue/glue.v b/theories/Glue/glue.v index b668a238..083b701d 100755 --- a/theories/Glue/glue.v +++ b/theories/Glue/glue.v @@ -388,17 +388,17 @@ Section Externs. (tptr (Tfunction (Tcons (Tstruct _thread_info noattr) (Tcons val (Tcons val Tnil))) val cc_default)) :: Member_plain _env val :: nil) noattr :: - Composite _stack_frame Struct - (Member_plain _next (tptr val) :: - Member_plain _root (tptr val) :: - Member_plain _prev (tptr (Tstruct _stack_frame noattr)) :: nil) noattr :: - Composite _thread_info Struct - (Member_plain _alloc (tptr val) :: - Member_plain _limit (tptr val) :: - Member_plain _heap (tptr (Tstruct _heap noattr)) :: - Member_plain _args (Tarray val max_args noattr) :: - Member_plain _fp (tptr (Tstruct _stack_frame noattr)) :: - Member_plain _nalloc tulong :: nil) noattr :: + (* Composite _stack_frame Struct *) + (* (Member_plain _next (tptr val) :: *) + (* Member_plain _root (tptr val) :: *) + (* Member_plain _prev (tptr (Tstruct _stack_frame noattr)) :: nil) noattr :: *) + (* Composite _thread_info Struct *) + (* (Member_plain _alloc (tptr val) :: *) + (* Member_plain _limit (tptr val) :: *) + (* Member_plain _heap (tptr (Tstruct _heap noattr)) :: *) + (* Member_plain _args (Tarray val max_args noattr) :: *) + (* Member_plain _fp (tptr (Tstruct _stack_frame noattr)) :: *) + (* Member_plain _nalloc tulong :: nil) noattr :: *) nil | CPS => Composite _closure Struct @@ -406,11 +406,11 @@ Section Externs. (tptr (Tfunction (Tcons (Tstruct _thread_info noattr) (Tcons val (Tcons val Tnil))) Tvoid cc_default)) :: Member_plain _env val :: nil) noattr :: - Composite _thread_info Struct - (Member_plain _alloc (tptr val) :: - Member_plain _limit (tptr val) :: - Member_plain _heap (tptr (Tstruct _heap noattr)) :: - Member_plain _args (Tarray val max_args noattr) :: nil) noattr :: + (* Composite _thread_info Struct *) + (* (Member_plain _alloc (tptr val) :: *) + (* Member_plain _limit (tptr val) :: *) + (* Member_plain _heap (tptr (Tstruct _heap noattr)) :: *) + (* Member_plain _args (Tarray val max_args noattr) :: nil) noattr :: *) nil end in let toolbox := From a5e785ab4f197730bc99715574cea458276d0aeb Mon Sep 17 00:00:00 2001 From: Joomy Korkut Date: Tue, 17 Oct 2023 18:17:34 -0400 Subject: [PATCH 3/5] Update runtime with the current version from CertiGC --- benchmarks/Makefile | 2 + bootstrap/certicoqc/certicoqc_wrapper.c | 2 +- bootstrap/certicoqc/config.h | 7 + bootstrap/certicoqc/gc_stack.c | 316 +++++++++++------- bootstrap/certicoqc/gc_stack.h | 16 +- bootstrap/certicoqc/m.h | 3 + bootstrap/certicoqc/print.c | 4 +- bootstrap/certicoqc/print.h | 2 +- bootstrap/certicoqc/values.h | 25 +- bootstrap/certicoqchk/certicoqchk_wrapper.c | 2 +- bootstrap/certicoqchk/config.h | 7 + bootstrap/certicoqchk/gc_stack.c | 316 +++++++++++------- bootstrap/certicoqchk/gc_stack.h | 16 +- bootstrap/certicoqchk/m.h | 3 + bootstrap/certicoqchk/print.c | 4 +- bootstrap/certicoqchk/print.h | 2 +- bootstrap/certicoqchk/values.h | 25 +- cplugin/certicoq.ml | 11 +- cplugin/runtime/config.h | 7 + cplugin/runtime/gc.h | 155 +++++++++ cplugin/runtime/gc_stack.c | 318 ++++++++++++------- cplugin/runtime/gc_stack.h | 16 +- cplugin/runtime/m.h | 3 + cplugin/runtime/values.h | 15 +- cplugin/static/printClight.ml | 2 +- plugin/certicoq.ml | 11 +- plugin/runtime/config.h | 7 + plugin/runtime/gc.h | 155 +++++++++ plugin/runtime/gc_stack.c | 318 ++++++++++++------- plugin/runtime/gc_stack.h | 16 +- plugin/runtime/m.h | 3 + plugin/runtime/values.h | 16 +- plugin/static/printClight.ml | 2 +- theories/Codegen/LambdaANF_to_Clight.v | 12 +- theories/Codegen/LambdaANF_to_Clight_stack.v | 30 +- theories/Runtime/config.h | 7 + theories/Runtime/gc.c | 37 ++- theories/Runtime/gc_stack.c | 134 ++++---- theories/Runtime/gc_stack.h | 33 +- theories/Runtime/m.h | 3 + theories/Runtime/values.h | 9 +- 41 files changed, 1489 insertions(+), 583 deletions(-) create mode 100644 bootstrap/certicoqc/m.h create mode 100644 bootstrap/certicoqchk/m.h create mode 100644 cplugin/runtime/gc.h create mode 100644 cplugin/runtime/m.h create mode 100644 plugin/runtime/gc.h create mode 100644 plugin/runtime/m.h create mode 100644 theories/Runtime/m.h diff --git a/benchmarks/Makefile b/benchmarks/Makefile index 8c005055..5f70c79c 100644 --- a/benchmarks/Makefile +++ b/benchmarks/Makefile @@ -17,6 +17,7 @@ $(CFILES): tests exec: $(TESTS) copy: + cp ../theories/Runtime/m.h ./ cp ../theories/Runtime/gc.c ./ cp ../theories/Runtime/gc.h ./ cp ../theories/Runtime/gc_stack.c ./ @@ -27,6 +28,7 @@ copy: cp ../theories/Runtime/mod.py ./ clean: + rm -f ./m.h rm -f ./gc.c rm -f ./gc.h rm -f ./gc_stack.c diff --git a/bootstrap/certicoqc/certicoqc_wrapper.c b/bootstrap/certicoqc/certicoqc_wrapper.c index c79dde46..adbfa805 100644 --- a/bootstrap/certicoqc/certicoqc_wrapper.c +++ b/bootstrap/certicoqc/certicoqc_wrapper.c @@ -1,9 +1,9 @@ #include #include +#include "values.h" #include "gc.h" #include #include -#include #include extern value body(struct thread_info *); diff --git a/bootstrap/certicoqc/config.h b/bootstrap/certicoqc/config.h index f3eb8ce3..4dc00718 100644 --- a/bootstrap/certicoqc/config.h +++ b/bootstrap/certicoqc/config.h @@ -16,6 +16,13 @@ #ifndef CONFIG_H #define CONFIG_H +#include "m.h" + +#if SIZEOF_PTR > 0 +#else +#error "Oops! SIZEOF_PTR is not defined" +#endif + #if SIZEOF_PTR == SIZEOF_LONG /* Standard models: ILP32 or I32LP64 */ typedef long intnat; diff --git a/bootstrap/certicoqc/gc_stack.c b/bootstrap/certicoqc/gc_stack.c index 40e75e45..7083c891 100644 --- a/bootstrap/certicoqc/gc_stack.c +++ b/bootstrap/certicoqc/gc_stack.c @@ -1,6 +1,7 @@ #include #include #include +#include "m.h" /* use printm.c to create m.h */ #include "config.h" #include "values.h" #include "gc_stack.h" @@ -10,23 +11,50 @@ */ +/* The following 5 functions should (in practice) compile correctly in CompCert, + but the CompCert correctness specification does not _require_ that + they compile correctly: their semantics is "undefined behavior" in + CompCert C (and in C11), but in practice they will work in any compiler. */ + +int test_int_or_ptr (value x) /* returns 1 if int, 0 if aligned ptr */ { + return (int)(((intnat)x)&1); +} + +intnat int_or_ptr_to_int (value x) /* precondition: is int */ { + return (intnat)x; +} + +void * int_or_ptr_to_ptr (value x) /* precond: is aligned ptr */ { + return (void *)x; +} + +value int_to_int_or_ptr(intnat x) /* precondition: is odd */ { + return (value)x; +} + +value ptr_to_int_or_ptr(void *x) /* precondition: is aligned */ { + return (value)x; +} + +int is_ptr(value x) { + return test_int_or_ptr(x) == 0; +} + /* A "space" describes one generation of the generational collector. */ struct space { - value *start, *next, *limit; + value *start, *next, *limit, *rem_limit; }; /* Either start==NULL (meaning that this generation has not yet been created), or start <= next <= limit. The words in start..next are allocated and initialized, and the words from next..limit are available to allocate. */ -#define MAX_SPACES 15 /* how many generations */ - #ifndef RATIO #define RATIO 2 /* size of generation i+1 / size of generation i */ /* Using RATIO=2 is faster than larger ratios, empirically */ #endif -#ifndef NURSERY_SIZE -#define NURSERY_SIZE (1<<16) +#ifndef LOG_NURSERY_SIZE +#define LOG_NURSERY_SIZE 16 #endif /* The size of generation 0 (the "nursery") should approximately match the size of the level-2 cache of the machine, according to: @@ -38,8 +66,44 @@ struct space { (which is the size of the Intel Core i7 per-core L2 cache). http://www.tomshardware.com/reviews/Intel-i7-nehalem-cpu,2041-10.html https://en.wikipedia.org/wiki/Nehalem_(microarchitecture) - Empirical measurements show that 64k works well + Empirical measurements on Intel Core i7 in 32-bit mode show that NURSERY_SIZE=64k 4-byte words works well (or anything in the range from 32k to 128k). + Some machines are radically different, however: + Mac M2 "big" core has 64-bit words, L1 cache 128kB, shared L2 cache 16MB + Mac M2 "small" core has 64-bit words, L1 cache 64kB, shared L2 cache 4MB + On such machines the Goncalves rule of thumb may not apply; would be worth measuring + performance with different nursery sizes, on realistic workloads. + +*/ + +#define NURSERY_SIZE (1<>1); + else fprintf(f,"%ld",v>>1); } // XXX todo update for roots arrays @@ -105,13 +169,14 @@ void printtree(FILE *f, struct heap *h, value v) { #endif void abort_with(char *s) { - fprintf(stderr, s); + fprintf(stderr, "%s", s); exit(1); } -#define Is_from(from_start, from_limit, v) \ - (from_start <= (value*)(v) && (value*)(v) < from_limit) -/* Assuming v is a pointer (Is_block(v)), tests whether v points +int Is_from(value* from_start, value * from_limit, value * v) { + return (from_start <= v && v < from_limit); +} +/* Assuming v is a pointer (is_ptr(v)), tests whether v points somewhere into the "from-space" defined by from_start and from_limit */ void forward (value *from_start, /* beginning of from-space */ @@ -130,9 +195,10 @@ void forward (value *from_start, /* beginning of from-space */ may improve the cache locality of the copied graph. */ { - value v = *p; - if(Is_block(v)) { - + value * v; + value va = *p; + if(is_ptr(va)) { + v = (value*)int_or_ptr_to_ptr(va); /* printf("Start: %lld end"" %lld word %lld \n", from_start, from_limit, v); */ /* if (v == 4360698480) printf ("Found it\n"); */ if(Is_from(from_start, from_limit, v)) { @@ -141,20 +207,21 @@ void forward (value *from_start, /* beginning of from-space */ if(hd == 0) { /* already forwarded */ *p = Field(v,0); } else { - int i; - int sz; + intnat i; + intnat sz; value *new; sz = Wosize_hd(hd); new = *next+1; *next = new+sz; - // if (sz > 50) printf("Moving value %lld with tag %lld with %d fields\n", v, hd, sz); - for(i = -1; i < sz; i++) { + /* if (sz > 50) printf("Moving value %p with tag %ld with %d fields\n", (void*)v, hd, sz); */ + Hd_val(new) = hd; + for(i = 0; i < sz; i++) { /* printf("Moving field %d\n", i); */ Field(new, i) = Field(v, i); } Hd_val(v) = 0; - Field(v, 0) = (value)new; - *p = (value)new; + Field(v, 0) = ptr_to_int_or_ptr((void *)new); + *p = ptr_to_int_or_ptr((void *)new); /* printf("New %lld\n", new); */ /* if (*p == 73832) printf("Found it\n"); */ if (depth>0) @@ -164,48 +231,45 @@ void forward (value *from_start, /* beginning of from-space */ } } } +void forward_remset (struct space *from, /* descriptor of from-space */ + struct space *to, /* descriptor of to-space */ + value **next) /* next available spot in to-space */ +{ + value *from_start = from->start, *from_limit=from->limit, *from_rem_limit=from->rem_limit; + value *q = from_limit; + assert (from_rem_limit-from_limit <= to->limit-to->start); + while (q != from_rem_limit) { + value *p = *(value**)q; + if (!(from_start <= p && p < from_limit)) { + value oldp= *p, newp; + forward(from_start, from_limit, next, p, DEPTH); + newp= *p; + if (oldp!=newp) + *(--to->limit) = (value)q; + } + q++; + } +} void forward_roots (value *from_start, /* beginning of from-space */ value *from_limit, /* end of from-space */ value **next, /* next available spot in to-space */ - struct thread_info *ti) /* where's the args array? */ + struct stack_frame *frames) /* data structure to find the roots */ /* Forward each live root in the stack */ { - struct stack_frame *frame = ti->fp; - value *live, *curr, *limit, val; - header_t hd; int sz; + struct stack_frame *frame = frames; + value *start; size_t i, limit; /* Scan the stack by traversing the stack pointers */ - /* printf("Scanning frames \n"); */ while (frame != NULL) { - live = frame->root; - curr = live; - limit = frame->next; - /* printf("Scanning frame, curr: %lld, limit: %lld\n", curr, limit); */ - /* int cnt = 0; */ - while (curr < limit) { - /* printf("%d \n", cnt); */ - /* cnt ++; */ - /* printf("Before \n"); */ - /* Curr has the stack address of the local */ - /* printf("curr: %lld\n", curr); */ - val = *curr; - if Is_block(val) { - hd = Hd_val(val); - sz = Wosize_hd(hd); - /* printf("Moving root %lld with tag %ldd and %d fields\n", val, hd, sz); */ - } - forward(from_start, from_limit, next, curr, DEPTH); - /* printf("After \n"); */ - curr ++; - } + start = frame->root; + limit = frame->next - start; /* See NOTE-POINTER-ARITH below */ frame = frame->prev; + for (i=0; i= No_scan_tag) void do_scan(value *from_start, /* beginning of from-space */ value *from_limit, /* end of from-space */ value *scan, /* start of unforwarded part of to-space */ @@ -219,7 +283,7 @@ void do_scan(value *from_start, /* beginning of from-space */ s = scan; /* printf("in scan \n"); */ while(s < *next) { - header_t hd = *s; + header_t hd = *((header_t*)s); mlsize_t sz = Wosize_hd(hd); int tag = Tag_hd(hd); if (!No_scan(tag)) { @@ -235,17 +299,21 @@ void do_scan(value *from_start, /* beginning of from-space */ void do_generation (struct space *from, /* descriptor of from-space */ struct space *to, /* descriptor of to-space */ - struct thread_info *ti) /* where's the args array? */ + struct stack_frame *fr) /* where are the roots? */ /* Copy the live objects out of the "from" space, into the "to" space, using fi and ti to determine the roots of liveness. */ { value *p = to->next; - assert(from->next-from->start <= to->limit-to->next); - forward_roots(from->start, from->limit, &to->next, ti); + /* assert(from->next-from->start + from->rem_limit-from->limit <= to->limit-to->next); */ + forward_remset(from, to, &to->next); + forward_roots(from->start, from->limit, &to->next, fr); do_scan(from->start, from->limit, p, &to->next); - if(0) fprintf(stderr,"%5.3f%% occupancy\n", + #ifdef CERTICOQ_DEBUG_GC + fprintf(stderr,"%5.3f%% occupancy\n", (to->next-p)/(double)(from->next-from->start)); + #endif from->next=from->start; + from->limit=from->rem_limit; } #if 0 @@ -285,6 +353,7 @@ void create_space(struct space *s, /* which generation to create */ s->start=p; s->next=p; s->limit = p+n; + s->rem_limit = s->limit; } struct heap *create_heap() @@ -300,6 +369,7 @@ struct heap *create_heap() h->spaces[i].start = NULL; h->spaces[i].next = NULL; h->spaces[i].limit = NULL; + h->spaces[i].rem_limit = NULL; } return h; } @@ -309,26 +379,22 @@ struct thread_info *make_tinfo(void) { struct thread_info *tinfo; h = create_heap(); tinfo = (struct thread_info *)malloc(sizeof(struct thread_info)); - if (!tinfo) { - fprintf(stderr, "Could not allocate thread_info struct\n"); - exit(1); - } - - + if (!tinfo) + abort_with("Could not allocate thread_info struct\n"); tinfo->heap=h; tinfo->alloc=h->spaces[0].start; tinfo->limit=h->spaces[0].limit; tinfo->fp=NULL; /* the initial stack pointer is NULL */ + tinfo->nalloc=0; + tinfo->odata=NULL; return tinfo; } void resume(struct thread_info *ti) -/* When the garbage collector is all done, it does not "return" - to the mutator; instead, it uses this function (which does not return) - to resume the mutator by invoking the continuation, fi->fun. - But first, "resume" informs the mutator - of the new values for the alloc and limit pointers. +/* When the garbage collector is all done, inform the mutator + of the new values for the alloc and limit pointers, + and check that enough space has been freed (ti->nalloc words). */ { struct heap *h = ti->heap; @@ -337,7 +403,7 @@ void resume(struct thread_info *ti) assert (h); lo = h->spaces[0].start; hi = h->spaces[0].limit; - if (hi-lo < num_allocs) + if (hi-lo < num_allocs) /* See NOTE-POINTER-ARITH below */ abort_with ("Nursery is too small for function's num_allocs\n"); ti->alloc = lo; ti->limit = hi; @@ -349,17 +415,10 @@ void garbage_collect(struct thread_info *ti) { struct heap *h = ti->heap; /* printf("In GC\n"); */ - if (h==NULL) { - /* If the heap has not yet been initialized, create it and resume */ - h = create_heap(); - ti->heap = h; - resume(ti); - return; - } else { - int i; - assert (h->spaces[0].limit == ti->limit); - h->spaces[0].next = ti->alloc; /* this line is probably unnecessary */ - for (i=0; ispaces[0].limit = ti->limit; + h->spaces[0].next = ti->alloc; /* this line is probably unnecessary */ + for (i=0; ispaces[i+1].start==NULL) { - int w = h->spaces[i].limit-h->spaces[i].start; + intnat w = h->spaces[i].rem_limit-h->spaces[i].start; /* See NOTE-POINTER-ARITH below */ create_space(h->spaces+(i+1), RATIO*w); } /* Copy all the objects in generation i, into generation i+1 */ - if(0) fprintf(stderr, "Generation %d: ", i); - do_generation(h->spaces+i, h->spaces+(i+1), ti); + #ifdef CERTICOQ_DEBUG_GC + fprintf(stderr, "Generation %d: ", i); + #endif + do_generation(h->spaces+i, h->spaces+(i+1), ti->fp); /* If there's enough space in gen i+1 to guarantee that the - NEXT collection into i+1 will succeed, we can stop here */ - if (h->spaces[i].limit - h->spaces[i].start + NEXT collection into i+1 will succeed, we can stop here. + We need enough space in the (unlikely) scenario where + * all the data in gen i is live ([i].limit-[i].start), and + * all the remembered set in i is preserved ([i].rem_limit-[i].limit). + */ + if (h->spaces[i].rem_limit - h->spaces[i].start /* See NOTE-POINTER-ARITH below */ <= h->spaces[i+1].limit - h->spaces[i+1].next) { resume(ti); return; } } - /* If we get to i==MAX_SPACES, that's bad news */ - abort_with("Ran out of generations\n"); - } - /* Can't reach this point */ - assert(0); + + /* If we get to i==MAX_SPACES, that's bad news */ + /* assert (MAX_SPACES == i); */ + abort_with("Ran out of generations\n"); } /* REMARK. The generation-management policy in the garbage_collect function @@ -424,26 +488,30 @@ int garbage_collect_all(struct thread_info *ti) { } int i; - assert (h->spaces[0].limit == ti->limit); - for (i=0; i < MAX_SPACES - 1 && h->spaces[i+1].start != NULL; i++) { + h->spaces[0].limit = ti->limit; + h->spaces[0].next = ti->alloc; /* this line more necessary here than perhaps in garbage_collect() */ + for (i=0; i < MAX_SPACES - 1 && h->spaces[i+1].start != NULL; i++) + do_generation(h->spaces+i, h->spaces+(i+1), ti->fp); - do_generation(h->spaces+i, h->spaces+(i+1), ti); - } - - /* If i is the nursery, its next won't be tracked, so need to be set to alloc */ - if(i == 0){ - h->spaces[i].next = ti->alloc; - } return i; } -/* export (deep copy if boxed) the first field of args */ -void* export(struct thread_info *ti) { +/* export (deep copy if boxed) from the given root */ +void *export(struct thread_info *ti, value root) { - /* if args[1] is unboxed, return it */ - if(!Is_block(ti->args[1])){ - return ti->args[1]; - } + + /* This block of 7 lines is new (appel 2023/06/27) and untested */ + struct stack_frame frame; + value roots[1]; + roots[0]=root; + frame.root=roots; + frame.next=roots+1; + frame.prev=NULL; + ti->fp= &frame; + + /* if root is unboxed, return it */ + if(!is_ptr(root)) + return (void *)root; /* otherwise collect all that is reachable from it to the last generation, then compact it into value_sp */ int gen_level = garbage_collect_all(ti); @@ -451,11 +519,11 @@ void* export(struct thread_info *ti) { struct space* fake_sp = (struct space*)malloc(sizeof(struct space)); create_space(fake_sp, sp->next - sp->start); - do_generation(sp, fake_sp, ti); + do_generation(sp, fake_sp, ti->fp); struct space* value_sp = (struct space*)malloc(sizeof(struct space)); create_space(value_sp, fake_sp->next - fake_sp->start); - do_generation(fake_sp, value_sp, ti); + do_generation(fake_sp, value_sp, ti->fp); /* offset start by the header */ void* result_block = (void *)(value_sp->start +1); @@ -464,6 +532,40 @@ void* export(struct thread_info *ti) { free(fake_sp); free(value_sp); - return result_block; } + +/* mutable write barrier */ +void certicoq_modify(struct thread_info *ti, value *p_cell, value p_val) { + assert (ti->alloc < ti->limit); + *p_cell = p_val; + if (is_ptr(p_val)) { + *(value **)(--ti->limit) = p_cell; + } +} + +void print_heapsize(struct thread_info *ti) { + for (int i = 0; i < MAX_SPACES; i++) { + int allocated = (int)(ti->heap->spaces[i].next - ti->heap->spaces[i].start); + int remembered = (int)(ti->heap->spaces[i].rem_limit - ti->heap->spaces[i].limit); + if (!(allocated || remembered)) { + continue; + } + printf("generation %d:\n", i); + printf(" size: %d\n", (int)(ti->heap->spaces[i].rem_limit - ti->heap->spaces[i].start)); + printf(" allocated: %d\n", allocated); + printf(" remembered: %d\n", remembered); + } +} + +/* NOTE-POINTER-ARITH: In a few places, we do a pointer subtraction, such as + h->spaces[i].limit - h->spaces[i].start. + When p and q have type *foo, then this is much like ((int)p-(int)q)/sizeof(foo). + But note this is a SIGNED division, which makes it quite dangerous if ((int)p-(int)q) + can be larger than the maximum signed integer. So we have to be quite careful in + the program and the proof, especially when (on a 32-bit machines) our largest generation + might be similar in size to the entire address space. +*/ + + + diff --git a/bootstrap/certicoqc/gc_stack.h b/bootstrap/certicoqc/gc_stack.h index e25fdec9..d94870a4 100644 --- a/bootstrap/certicoqc/gc_stack.h +++ b/bootstrap/certicoqc/gc_stack.h @@ -1,3 +1,6 @@ +#ifndef CERTICOQ_GC_STACK_H +#define CERTICOQ_GC_STACK_H + #include "values.h" /* EXPLANATION OF THE CERTICOQ GENERATIONAL GARBAGE COLLECTOR. @@ -88,6 +91,9 @@ recent collection. To call the garbage collector, the mutator passes a fun_info and a thread_info, as follows. */ +#define No_scan_tag 251 +#define No_scan(t) ((t) >= No_scan_tag) + typedef const uintnat *fun_info; /* fi[0]: How many words the function might allocate fi[1]: How many slots of the args array contain live roots @@ -113,6 +119,7 @@ struct thread_info { value args[MAX_ARGS]; /* the args array */ struct stack_frame *fp; /* stack pointer */ uintnat nalloc; /* Remaining allocation until next GC call*/ + void *odata; }; struct thread_info *make_tinfo(void); @@ -162,4 +169,11 @@ value* extract_answer(struct thread_info *ti); can be treated uniformly by the caller of extract_answer(). */ -void* export(struct thread_info *ti); +void* export(struct thread_info *ti, value root); + +/* mutable write barrier */ +void certicoq_modify(struct thread_info *ti, value *p_cell, value p_val); + +void print_heapsize(struct thread_info *ti); + +#endif /* CERTICOQ_GC_STACK_H */ diff --git a/bootstrap/certicoqc/m.h b/bootstrap/certicoqc/m.h new file mode 100644 index 00000000..1c509a54 --- /dev/null +++ b/bootstrap/certicoqc/m.h @@ -0,0 +1,3 @@ +#define SIZEOF_PTR 8 +#define SIZEOF_LONG 8 +#define SIZEOF_INT 4 diff --git a/bootstrap/certicoqc/print.c b/bootstrap/certicoqc/print.c index 6097a593..6c4693e9 100644 --- a/bootstrap/certicoqc/print.c +++ b/bootstrap/certicoqc/print.c @@ -1,9 +1,9 @@ #include #include #include "print.h" +#include "values.h" #include "gc.h" #include -#include #include void call_coq_msg_info(value msg) @@ -53,4 +53,4 @@ value coq_msg_debug(value msg) { value metacoq_guard_impl(value fixcofix, value globalenv, value context, value term) { return Val_true; -} \ No newline at end of file +} diff --git a/bootstrap/certicoqc/print.h b/bootstrap/certicoqc/print.h index 324654bc..f83aa56a 100644 --- a/bootstrap/certicoqc/print.h +++ b/bootstrap/certicoqc/print.h @@ -1,4 +1,4 @@ -#include +#include "values.h" extern value coq_msg_debug(value msg); extern value coq_msg_info(value msg); diff --git a/bootstrap/certicoqc/values.h b/bootstrap/certicoqc/values.h index 6edbc013..a5487d16 100644 --- a/bootstrap/certicoqc/values.h +++ b/bootstrap/certicoqc/values.h @@ -53,7 +53,23 @@ extern "C" { This is for use only by the GC. */ -typedef intnat value; + +/* +CHANGE BY THE CERTICOQ/VERIFFI TEAM: +We do this for CertiCoq's FFI system because for technical reasons related to +VST's program logic type system, we need the value type to be void* with this +attribute. On the other hand, standard OCaml wants the value to be intnat. +*/ +#ifdef VERIFFI + typedef void * value + #ifdef COMPCERT + __attribute((aligned(_Alignof(void *)))) + #endif + ; +#else + typedef intnat value; +#endif +/* because of VST's restrictions. */ typedef uintnat header_t; typedef uintnat mlsize_t; typedef unsigned int tag_t; /* Actually, an unsigned char */ @@ -62,7 +78,12 @@ typedef uintnat mark_t; /* Longs vs blocks. */ #define Is_long(x) (((x) & 1) != 0) -#define Is_block(x) (((x) & 1) == 0) +/* CHANGE BY THE CERTICOQ/VERIFFI TEAM: */ +#ifdef VERIFFI + #define Is_block(x) (((int)(((intnat) x) & 1)) == 0) +#else + #define Is_block(x) (((x) & 1) == 0) +#endif /* Conversion macro names are always of the form "to_from". */ /* Example: Val_long as in "Val from long" or "Val of long". */ diff --git a/bootstrap/certicoqchk/certicoqchk_wrapper.c b/bootstrap/certicoqchk/certicoqchk_wrapper.c index c42ae2f7..a558362c 100644 --- a/bootstrap/certicoqchk/certicoqchk_wrapper.c +++ b/bootstrap/certicoqchk/certicoqchk_wrapper.c @@ -1,9 +1,9 @@ #include #include +#include "values.h" #include "gc.h" #include #include -#include #include extern value body(struct thread_info *); diff --git a/bootstrap/certicoqchk/config.h b/bootstrap/certicoqchk/config.h index f3eb8ce3..4dc00718 100644 --- a/bootstrap/certicoqchk/config.h +++ b/bootstrap/certicoqchk/config.h @@ -16,6 +16,13 @@ #ifndef CONFIG_H #define CONFIG_H +#include "m.h" + +#if SIZEOF_PTR > 0 +#else +#error "Oops! SIZEOF_PTR is not defined" +#endif + #if SIZEOF_PTR == SIZEOF_LONG /* Standard models: ILP32 or I32LP64 */ typedef long intnat; diff --git a/bootstrap/certicoqchk/gc_stack.c b/bootstrap/certicoqchk/gc_stack.c index 40e75e45..7083c891 100644 --- a/bootstrap/certicoqchk/gc_stack.c +++ b/bootstrap/certicoqchk/gc_stack.c @@ -1,6 +1,7 @@ #include #include #include +#include "m.h" /* use printm.c to create m.h */ #include "config.h" #include "values.h" #include "gc_stack.h" @@ -10,23 +11,50 @@ */ +/* The following 5 functions should (in practice) compile correctly in CompCert, + but the CompCert correctness specification does not _require_ that + they compile correctly: their semantics is "undefined behavior" in + CompCert C (and in C11), but in practice they will work in any compiler. */ + +int test_int_or_ptr (value x) /* returns 1 if int, 0 if aligned ptr */ { + return (int)(((intnat)x)&1); +} + +intnat int_or_ptr_to_int (value x) /* precondition: is int */ { + return (intnat)x; +} + +void * int_or_ptr_to_ptr (value x) /* precond: is aligned ptr */ { + return (void *)x; +} + +value int_to_int_or_ptr(intnat x) /* precondition: is odd */ { + return (value)x; +} + +value ptr_to_int_or_ptr(void *x) /* precondition: is aligned */ { + return (value)x; +} + +int is_ptr(value x) { + return test_int_or_ptr(x) == 0; +} + /* A "space" describes one generation of the generational collector. */ struct space { - value *start, *next, *limit; + value *start, *next, *limit, *rem_limit; }; /* Either start==NULL (meaning that this generation has not yet been created), or start <= next <= limit. The words in start..next are allocated and initialized, and the words from next..limit are available to allocate. */ -#define MAX_SPACES 15 /* how many generations */ - #ifndef RATIO #define RATIO 2 /* size of generation i+1 / size of generation i */ /* Using RATIO=2 is faster than larger ratios, empirically */ #endif -#ifndef NURSERY_SIZE -#define NURSERY_SIZE (1<<16) +#ifndef LOG_NURSERY_SIZE +#define LOG_NURSERY_SIZE 16 #endif /* The size of generation 0 (the "nursery") should approximately match the size of the level-2 cache of the machine, according to: @@ -38,8 +66,44 @@ struct space { (which is the size of the Intel Core i7 per-core L2 cache). http://www.tomshardware.com/reviews/Intel-i7-nehalem-cpu,2041-10.html https://en.wikipedia.org/wiki/Nehalem_(microarchitecture) - Empirical measurements show that 64k works well + Empirical measurements on Intel Core i7 in 32-bit mode show that NURSERY_SIZE=64k 4-byte words works well (or anything in the range from 32k to 128k). + Some machines are radically different, however: + Mac M2 "big" core has 64-bit words, L1 cache 128kB, shared L2 cache 16MB + Mac M2 "small" core has 64-bit words, L1 cache 64kB, shared L2 cache 4MB + On such machines the Goncalves rule of thumb may not apply; would be worth measuring + performance with different nursery sizes, on realistic workloads. + +*/ + +#define NURSERY_SIZE (1<>1); + else fprintf(f,"%ld",v>>1); } // XXX todo update for roots arrays @@ -105,13 +169,14 @@ void printtree(FILE *f, struct heap *h, value v) { #endif void abort_with(char *s) { - fprintf(stderr, s); + fprintf(stderr, "%s", s); exit(1); } -#define Is_from(from_start, from_limit, v) \ - (from_start <= (value*)(v) && (value*)(v) < from_limit) -/* Assuming v is a pointer (Is_block(v)), tests whether v points +int Is_from(value* from_start, value * from_limit, value * v) { + return (from_start <= v && v < from_limit); +} +/* Assuming v is a pointer (is_ptr(v)), tests whether v points somewhere into the "from-space" defined by from_start and from_limit */ void forward (value *from_start, /* beginning of from-space */ @@ -130,9 +195,10 @@ void forward (value *from_start, /* beginning of from-space */ may improve the cache locality of the copied graph. */ { - value v = *p; - if(Is_block(v)) { - + value * v; + value va = *p; + if(is_ptr(va)) { + v = (value*)int_or_ptr_to_ptr(va); /* printf("Start: %lld end"" %lld word %lld \n", from_start, from_limit, v); */ /* if (v == 4360698480) printf ("Found it\n"); */ if(Is_from(from_start, from_limit, v)) { @@ -141,20 +207,21 @@ void forward (value *from_start, /* beginning of from-space */ if(hd == 0) { /* already forwarded */ *p = Field(v,0); } else { - int i; - int sz; + intnat i; + intnat sz; value *new; sz = Wosize_hd(hd); new = *next+1; *next = new+sz; - // if (sz > 50) printf("Moving value %lld with tag %lld with %d fields\n", v, hd, sz); - for(i = -1; i < sz; i++) { + /* if (sz > 50) printf("Moving value %p with tag %ld with %d fields\n", (void*)v, hd, sz); */ + Hd_val(new) = hd; + for(i = 0; i < sz; i++) { /* printf("Moving field %d\n", i); */ Field(new, i) = Field(v, i); } Hd_val(v) = 0; - Field(v, 0) = (value)new; - *p = (value)new; + Field(v, 0) = ptr_to_int_or_ptr((void *)new); + *p = ptr_to_int_or_ptr((void *)new); /* printf("New %lld\n", new); */ /* if (*p == 73832) printf("Found it\n"); */ if (depth>0) @@ -164,48 +231,45 @@ void forward (value *from_start, /* beginning of from-space */ } } } +void forward_remset (struct space *from, /* descriptor of from-space */ + struct space *to, /* descriptor of to-space */ + value **next) /* next available spot in to-space */ +{ + value *from_start = from->start, *from_limit=from->limit, *from_rem_limit=from->rem_limit; + value *q = from_limit; + assert (from_rem_limit-from_limit <= to->limit-to->start); + while (q != from_rem_limit) { + value *p = *(value**)q; + if (!(from_start <= p && p < from_limit)) { + value oldp= *p, newp; + forward(from_start, from_limit, next, p, DEPTH); + newp= *p; + if (oldp!=newp) + *(--to->limit) = (value)q; + } + q++; + } +} void forward_roots (value *from_start, /* beginning of from-space */ value *from_limit, /* end of from-space */ value **next, /* next available spot in to-space */ - struct thread_info *ti) /* where's the args array? */ + struct stack_frame *frames) /* data structure to find the roots */ /* Forward each live root in the stack */ { - struct stack_frame *frame = ti->fp; - value *live, *curr, *limit, val; - header_t hd; int sz; + struct stack_frame *frame = frames; + value *start; size_t i, limit; /* Scan the stack by traversing the stack pointers */ - /* printf("Scanning frames \n"); */ while (frame != NULL) { - live = frame->root; - curr = live; - limit = frame->next; - /* printf("Scanning frame, curr: %lld, limit: %lld\n", curr, limit); */ - /* int cnt = 0; */ - while (curr < limit) { - /* printf("%d \n", cnt); */ - /* cnt ++; */ - /* printf("Before \n"); */ - /* Curr has the stack address of the local */ - /* printf("curr: %lld\n", curr); */ - val = *curr; - if Is_block(val) { - hd = Hd_val(val); - sz = Wosize_hd(hd); - /* printf("Moving root %lld with tag %ldd and %d fields\n", val, hd, sz); */ - } - forward(from_start, from_limit, next, curr, DEPTH); - /* printf("After \n"); */ - curr ++; - } + start = frame->root; + limit = frame->next - start; /* See NOTE-POINTER-ARITH below */ frame = frame->prev; + for (i=0; i= No_scan_tag) void do_scan(value *from_start, /* beginning of from-space */ value *from_limit, /* end of from-space */ value *scan, /* start of unforwarded part of to-space */ @@ -219,7 +283,7 @@ void do_scan(value *from_start, /* beginning of from-space */ s = scan; /* printf("in scan \n"); */ while(s < *next) { - header_t hd = *s; + header_t hd = *((header_t*)s); mlsize_t sz = Wosize_hd(hd); int tag = Tag_hd(hd); if (!No_scan(tag)) { @@ -235,17 +299,21 @@ void do_scan(value *from_start, /* beginning of from-space */ void do_generation (struct space *from, /* descriptor of from-space */ struct space *to, /* descriptor of to-space */ - struct thread_info *ti) /* where's the args array? */ + struct stack_frame *fr) /* where are the roots? */ /* Copy the live objects out of the "from" space, into the "to" space, using fi and ti to determine the roots of liveness. */ { value *p = to->next; - assert(from->next-from->start <= to->limit-to->next); - forward_roots(from->start, from->limit, &to->next, ti); + /* assert(from->next-from->start + from->rem_limit-from->limit <= to->limit-to->next); */ + forward_remset(from, to, &to->next); + forward_roots(from->start, from->limit, &to->next, fr); do_scan(from->start, from->limit, p, &to->next); - if(0) fprintf(stderr,"%5.3f%% occupancy\n", + #ifdef CERTICOQ_DEBUG_GC + fprintf(stderr,"%5.3f%% occupancy\n", (to->next-p)/(double)(from->next-from->start)); + #endif from->next=from->start; + from->limit=from->rem_limit; } #if 0 @@ -285,6 +353,7 @@ void create_space(struct space *s, /* which generation to create */ s->start=p; s->next=p; s->limit = p+n; + s->rem_limit = s->limit; } struct heap *create_heap() @@ -300,6 +369,7 @@ struct heap *create_heap() h->spaces[i].start = NULL; h->spaces[i].next = NULL; h->spaces[i].limit = NULL; + h->spaces[i].rem_limit = NULL; } return h; } @@ -309,26 +379,22 @@ struct thread_info *make_tinfo(void) { struct thread_info *tinfo; h = create_heap(); tinfo = (struct thread_info *)malloc(sizeof(struct thread_info)); - if (!tinfo) { - fprintf(stderr, "Could not allocate thread_info struct\n"); - exit(1); - } - - + if (!tinfo) + abort_with("Could not allocate thread_info struct\n"); tinfo->heap=h; tinfo->alloc=h->spaces[0].start; tinfo->limit=h->spaces[0].limit; tinfo->fp=NULL; /* the initial stack pointer is NULL */ + tinfo->nalloc=0; + tinfo->odata=NULL; return tinfo; } void resume(struct thread_info *ti) -/* When the garbage collector is all done, it does not "return" - to the mutator; instead, it uses this function (which does not return) - to resume the mutator by invoking the continuation, fi->fun. - But first, "resume" informs the mutator - of the new values for the alloc and limit pointers. +/* When the garbage collector is all done, inform the mutator + of the new values for the alloc and limit pointers, + and check that enough space has been freed (ti->nalloc words). */ { struct heap *h = ti->heap; @@ -337,7 +403,7 @@ void resume(struct thread_info *ti) assert (h); lo = h->spaces[0].start; hi = h->spaces[0].limit; - if (hi-lo < num_allocs) + if (hi-lo < num_allocs) /* See NOTE-POINTER-ARITH below */ abort_with ("Nursery is too small for function's num_allocs\n"); ti->alloc = lo; ti->limit = hi; @@ -349,17 +415,10 @@ void garbage_collect(struct thread_info *ti) { struct heap *h = ti->heap; /* printf("In GC\n"); */ - if (h==NULL) { - /* If the heap has not yet been initialized, create it and resume */ - h = create_heap(); - ti->heap = h; - resume(ti); - return; - } else { - int i; - assert (h->spaces[0].limit == ti->limit); - h->spaces[0].next = ti->alloc; /* this line is probably unnecessary */ - for (i=0; ispaces[0].limit = ti->limit; + h->spaces[0].next = ti->alloc; /* this line is probably unnecessary */ + for (i=0; ispaces[i+1].start==NULL) { - int w = h->spaces[i].limit-h->spaces[i].start; + intnat w = h->spaces[i].rem_limit-h->spaces[i].start; /* See NOTE-POINTER-ARITH below */ create_space(h->spaces+(i+1), RATIO*w); } /* Copy all the objects in generation i, into generation i+1 */ - if(0) fprintf(stderr, "Generation %d: ", i); - do_generation(h->spaces+i, h->spaces+(i+1), ti); + #ifdef CERTICOQ_DEBUG_GC + fprintf(stderr, "Generation %d: ", i); + #endif + do_generation(h->spaces+i, h->spaces+(i+1), ti->fp); /* If there's enough space in gen i+1 to guarantee that the - NEXT collection into i+1 will succeed, we can stop here */ - if (h->spaces[i].limit - h->spaces[i].start + NEXT collection into i+1 will succeed, we can stop here. + We need enough space in the (unlikely) scenario where + * all the data in gen i is live ([i].limit-[i].start), and + * all the remembered set in i is preserved ([i].rem_limit-[i].limit). + */ + if (h->spaces[i].rem_limit - h->spaces[i].start /* See NOTE-POINTER-ARITH below */ <= h->spaces[i+1].limit - h->spaces[i+1].next) { resume(ti); return; } } - /* If we get to i==MAX_SPACES, that's bad news */ - abort_with("Ran out of generations\n"); - } - /* Can't reach this point */ - assert(0); + + /* If we get to i==MAX_SPACES, that's bad news */ + /* assert (MAX_SPACES == i); */ + abort_with("Ran out of generations\n"); } /* REMARK. The generation-management policy in the garbage_collect function @@ -424,26 +488,30 @@ int garbage_collect_all(struct thread_info *ti) { } int i; - assert (h->spaces[0].limit == ti->limit); - for (i=0; i < MAX_SPACES - 1 && h->spaces[i+1].start != NULL; i++) { + h->spaces[0].limit = ti->limit; + h->spaces[0].next = ti->alloc; /* this line more necessary here than perhaps in garbage_collect() */ + for (i=0; i < MAX_SPACES - 1 && h->spaces[i+1].start != NULL; i++) + do_generation(h->spaces+i, h->spaces+(i+1), ti->fp); - do_generation(h->spaces+i, h->spaces+(i+1), ti); - } - - /* If i is the nursery, its next won't be tracked, so need to be set to alloc */ - if(i == 0){ - h->spaces[i].next = ti->alloc; - } return i; } -/* export (deep copy if boxed) the first field of args */ -void* export(struct thread_info *ti) { +/* export (deep copy if boxed) from the given root */ +void *export(struct thread_info *ti, value root) { - /* if args[1] is unboxed, return it */ - if(!Is_block(ti->args[1])){ - return ti->args[1]; - } + + /* This block of 7 lines is new (appel 2023/06/27) and untested */ + struct stack_frame frame; + value roots[1]; + roots[0]=root; + frame.root=roots; + frame.next=roots+1; + frame.prev=NULL; + ti->fp= &frame; + + /* if root is unboxed, return it */ + if(!is_ptr(root)) + return (void *)root; /* otherwise collect all that is reachable from it to the last generation, then compact it into value_sp */ int gen_level = garbage_collect_all(ti); @@ -451,11 +519,11 @@ void* export(struct thread_info *ti) { struct space* fake_sp = (struct space*)malloc(sizeof(struct space)); create_space(fake_sp, sp->next - sp->start); - do_generation(sp, fake_sp, ti); + do_generation(sp, fake_sp, ti->fp); struct space* value_sp = (struct space*)malloc(sizeof(struct space)); create_space(value_sp, fake_sp->next - fake_sp->start); - do_generation(fake_sp, value_sp, ti); + do_generation(fake_sp, value_sp, ti->fp); /* offset start by the header */ void* result_block = (void *)(value_sp->start +1); @@ -464,6 +532,40 @@ void* export(struct thread_info *ti) { free(fake_sp); free(value_sp); - return result_block; } + +/* mutable write barrier */ +void certicoq_modify(struct thread_info *ti, value *p_cell, value p_val) { + assert (ti->alloc < ti->limit); + *p_cell = p_val; + if (is_ptr(p_val)) { + *(value **)(--ti->limit) = p_cell; + } +} + +void print_heapsize(struct thread_info *ti) { + for (int i = 0; i < MAX_SPACES; i++) { + int allocated = (int)(ti->heap->spaces[i].next - ti->heap->spaces[i].start); + int remembered = (int)(ti->heap->spaces[i].rem_limit - ti->heap->spaces[i].limit); + if (!(allocated || remembered)) { + continue; + } + printf("generation %d:\n", i); + printf(" size: %d\n", (int)(ti->heap->spaces[i].rem_limit - ti->heap->spaces[i].start)); + printf(" allocated: %d\n", allocated); + printf(" remembered: %d\n", remembered); + } +} + +/* NOTE-POINTER-ARITH: In a few places, we do a pointer subtraction, such as + h->spaces[i].limit - h->spaces[i].start. + When p and q have type *foo, then this is much like ((int)p-(int)q)/sizeof(foo). + But note this is a SIGNED division, which makes it quite dangerous if ((int)p-(int)q) + can be larger than the maximum signed integer. So we have to be quite careful in + the program and the proof, especially when (on a 32-bit machines) our largest generation + might be similar in size to the entire address space. +*/ + + + diff --git a/bootstrap/certicoqchk/gc_stack.h b/bootstrap/certicoqchk/gc_stack.h index e25fdec9..d94870a4 100644 --- a/bootstrap/certicoqchk/gc_stack.h +++ b/bootstrap/certicoqchk/gc_stack.h @@ -1,3 +1,6 @@ +#ifndef CERTICOQ_GC_STACK_H +#define CERTICOQ_GC_STACK_H + #include "values.h" /* EXPLANATION OF THE CERTICOQ GENERATIONAL GARBAGE COLLECTOR. @@ -88,6 +91,9 @@ recent collection. To call the garbage collector, the mutator passes a fun_info and a thread_info, as follows. */ +#define No_scan_tag 251 +#define No_scan(t) ((t) >= No_scan_tag) + typedef const uintnat *fun_info; /* fi[0]: How many words the function might allocate fi[1]: How many slots of the args array contain live roots @@ -113,6 +119,7 @@ struct thread_info { value args[MAX_ARGS]; /* the args array */ struct stack_frame *fp; /* stack pointer */ uintnat nalloc; /* Remaining allocation until next GC call*/ + void *odata; }; struct thread_info *make_tinfo(void); @@ -162,4 +169,11 @@ value* extract_answer(struct thread_info *ti); can be treated uniformly by the caller of extract_answer(). */ -void* export(struct thread_info *ti); +void* export(struct thread_info *ti, value root); + +/* mutable write barrier */ +void certicoq_modify(struct thread_info *ti, value *p_cell, value p_val); + +void print_heapsize(struct thread_info *ti); + +#endif /* CERTICOQ_GC_STACK_H */ diff --git a/bootstrap/certicoqchk/m.h b/bootstrap/certicoqchk/m.h new file mode 100644 index 00000000..1c509a54 --- /dev/null +++ b/bootstrap/certicoqchk/m.h @@ -0,0 +1,3 @@ +#define SIZEOF_PTR 8 +#define SIZEOF_LONG 8 +#define SIZEOF_INT 4 diff --git a/bootstrap/certicoqchk/print.c b/bootstrap/certicoqchk/print.c index 9c7196a1..897bdf8f 100644 --- a/bootstrap/certicoqchk/print.c +++ b/bootstrap/certicoqchk/print.c @@ -1,9 +1,9 @@ #include #include #include "print.h" +#include "values.h" #include "gc.h" #include -#include #include void call_coq_msg_info(value msg) @@ -53,4 +53,4 @@ value print_msg_debug(value msg) { value metacoq_guard_impl(value fixcofix, value globalenv, value context, value term) { return Val_true; -} \ No newline at end of file +} diff --git a/bootstrap/certicoqchk/print.h b/bootstrap/certicoqchk/print.h index 942504ac..a1a343a6 100644 --- a/bootstrap/certicoqchk/print.h +++ b/bootstrap/certicoqchk/print.h @@ -1,4 +1,4 @@ -#include +#include "values.h" extern value print_msg_debug(value msg); extern value print_msg_info(value msg); diff --git a/bootstrap/certicoqchk/values.h b/bootstrap/certicoqchk/values.h index 6edbc013..a5487d16 100644 --- a/bootstrap/certicoqchk/values.h +++ b/bootstrap/certicoqchk/values.h @@ -53,7 +53,23 @@ extern "C" { This is for use only by the GC. */ -typedef intnat value; + +/* +CHANGE BY THE CERTICOQ/VERIFFI TEAM: +We do this for CertiCoq's FFI system because for technical reasons related to +VST's program logic type system, we need the value type to be void* with this +attribute. On the other hand, standard OCaml wants the value to be intnat. +*/ +#ifdef VERIFFI + typedef void * value + #ifdef COMPCERT + __attribute((aligned(_Alignof(void *)))) + #endif + ; +#else + typedef intnat value; +#endif +/* because of VST's restrictions. */ typedef uintnat header_t; typedef uintnat mlsize_t; typedef unsigned int tag_t; /* Actually, an unsigned char */ @@ -62,7 +78,12 @@ typedef uintnat mark_t; /* Longs vs blocks. */ #define Is_long(x) (((x) & 1) != 0) -#define Is_block(x) (((x) & 1) == 0) +/* CHANGE BY THE CERTICOQ/VERIFFI TEAM: */ +#ifdef VERIFFI + #define Is_block(x) (((int)(((intnat) x) & 1)) == 0) +#else + #define Is_block(x) (((x) & 1) == 0) +#endif /* Conversion macro names are always of the form "to_from". */ /* Example: Val_long as in "Val from long" or "Val of long". */ diff --git a/cplugin/certicoq.ml b/cplugin/certicoq.ml index 49caa6d0..100c86a1 100644 --- a/cplugin/certicoq.ml +++ b/cplugin/certicoq.ml @@ -337,7 +337,8 @@ module CompileFunctor (CI : CompilerInterface) = struct let compile opts term imports = let debug = opts.debug in let options = make_pipeline_options opts in - let imports = get_global_includes () @ imports in + let runtime_imports = [if opts.cps then "gc.h" else "gc_stack.h"] in + let imports = runtime_imports @ get_global_includes () @ imports in debug_msg debug (Printf.sprintf "Imports: %s" (String.concat " " imports)); let p = CI.compile options term in match p with @@ -349,7 +350,7 @@ module CompileFunctor (CI : CompilerInterface) = struct let cstr = make_fname opts (fname ^ suff ^ ".c") in let hstr = make_fname opts (fname ^ suff ^ ".h") in CI.printProg prg nenv cstr imports (* (List.map Tm_util.string_to_list imports) *); - CI.printProg header nenv hstr []; + CI.printProg header nenv hstr runtime_imports; (* let cstr = Metacoq_template_plugin.Tm_util.string_to_list (Names.KerName.to_string (Names.Constant.canonical const) ^ suff ^ ".c") in * let hstr = Metacoq_template_plugin.Tm_util.string_to_list (Names.KerName.to_string (Names.Constant.canonical const) ^ suff ^ ".h") in @@ -371,7 +372,7 @@ module CompileFunctor (CI : CompilerInterface) = struct else let debug = opts.debug in let options = make_pipeline_options opts in - + let runtime_imports = [if opts.cps then "gc.h" else "gc_stack.h"] in let time = Unix.gettimeofday() in (match CI.generate_glue options globs with | CompM.Ret (((nenv, header), prg), logs) -> @@ -386,8 +387,8 @@ module CompileFunctor (CI : CompilerInterface) = struct let hstr = if standalone then fname ^ ".h" else "glue." ^ fname ^ suff ^ ".h" in let cstr = make_fname opts cstr in let hstr = make_fname opts hstr in - CI.printProg prg nenv cstr []; - CI.printProg header nenv hstr []; + CI.printProg prg nenv cstr runtime_imports; + CI.printProg header nenv hstr runtime_imports; let time = (Unix.gettimeofday() -. time) in debug_msg debug (Printf.sprintf "Printed glue code to file in %f s.." time) diff --git a/cplugin/runtime/config.h b/cplugin/runtime/config.h index f3eb8ce3..4dc00718 100644 --- a/cplugin/runtime/config.h +++ b/cplugin/runtime/config.h @@ -16,6 +16,13 @@ #ifndef CONFIG_H #define CONFIG_H +#include "m.h" + +#if SIZEOF_PTR > 0 +#else +#error "Oops! SIZEOF_PTR is not defined" +#endif + #if SIZEOF_PTR == SIZEOF_LONG /* Standard models: ILP32 or I32LP64 */ typedef long intnat; diff --git a/cplugin/runtime/gc.h b/cplugin/runtime/gc.h new file mode 100644 index 00000000..46c6e6a4 --- /dev/null +++ b/cplugin/runtime/gc.h @@ -0,0 +1,155 @@ +#include "values.h" + +/* EXPLANATION OF THE CERTICOQ GENERATIONAL GARBAGE COLLECTOR. + Andrew W. Appel, September 2016 + +The current Certicoq code generator uses Ocaml object formats, +as described in Chapter 20 of Real World Ocaml by Minsky et al. +https://realworldocaml.org/v1/en/html/memory-representation-of-values.html + +That is: + + 31 10 9 8 7 0 + +-------+---------+----------+ + | size | color | tag byte | + +-------+---------+----------+ +v --> | value[0] | + +----------------------------+ + | value[1] | + +----------------------------+ + | . | + | . | + | . | + +----------------------------+ + | value[size-1] | + +----------------------------+ + +This works for 32-bit or 64-bit words; +if 64-bit words, substitute "63" for "31" in the diagram above. +At present we'll assume 32-bit words. + +The header file "values.h", from the OCaml distribution, +has macros (etc.) for accessing these fields and headers. + +The header file "config.h", from the OCaml distribution, defines +typedef "intnat", the "natural integer type" for this compiler/machine, +and "uintnat", the "natural unsigned integer type". +Config.h also defines (BUT WE DO NOT USE) parameters for the Ocaml +generational garbage collector. + +The important definitions we use from values.h are: + +Is_block(v) : tests whether v is a pointer (even number) +Hd_val(v) : contents of the header word, i.e., just before where v points to +Field(v,i) : the i'th field of object v +Tag_hd(h) : If h is a header-word, get the constructor-tag +Wosize_hd(h): If h is a header-word, get size of the object in words + +We define the following ourselves, following Ocaml's format: + +No_scan(t) : If t is a constructor-tag, true if none of the object's + data words are to be interpreted as pointers. + (For example, character-string data) + +CALLING THE GARBAGE COLLECTOR (this part is NOT standard Ocaml): + +The mutator runs in this environment: + + NURSERY OLDER GENERATIONS + +-------------+ start---->+-------------+ +-------------+ + | args[0] | | | | | + +-------------+ | <-\ | /-->| | + | args[1] *----\ | | *---/ | | + +-------------+ \-----> | *-/ | | | + | . | +-+ | | | | + | . | alloc|*-->+-------------+ | | + | . | +-+ | | | | + +-------------+ | | | | + | args[argc-1]| +-+ | | | | + +-------------+ limit|*-->+-------------+ | | + +-+ +-------------+ + +There is a global "args" array. Certain words in "args" may +point into the heap (either the nursery or into older generations). +The nursery may point within itself, or into older generations. +Older generations may not point into the nursery. +The heap may not point into the args array. + +The mutator creates a new object by using the N+1 words (including header) +starting at "alloc", and then adding N+1 to alloc. This is only +permitted if alloc+N+1 <= limit. Otherwise, the mutator must +first call the garbage collector. + +The variables "alloc" and "limit" are owned by the mutator. +The "start" value is not actually a variable of the mutator, +but it was the value of "alloc" immediately after the most +recent collection. + +To call the garbage collector, the mutator passes a fun_info and +a thread_info, as follows. */ + +typedef const uintnat *fun_info; +/* fi[0]: How many words the function might allocate + fi[1]: How many slots of the args array contain live roots + fi[2..(fi[1]-2)]: Indices of the live roots in the args array +*/ + +struct heap; /* abstract, opaque */ + +#define MAX_ARGS 1024 + +struct thread_info { + value *alloc; /* alloc pointer */ + value *limit; /* limit pointer */ + struct heap *heap; /* Description of the generations in the heap */ + value args[MAX_ARGS]; /* the args array */ +}; + +struct thread_info *make_tinfo(void); + +void garbage_collect(fun_info fi, struct thread_info *ti); +/* Performs one garbage collection; + or if ti->heap==NULL, initializes the heap. + + The returns in a state where + (1) the "after" graph of nodes reachable from args[indices[0..num_args]] + is isomorphic to the "before" graph; and + (2) the alloc pointer points to N words of unallocated heap space + (where N>=num_allocs), such that limit-alloc=N. +*/ + +void free_heap(struct heap *h); +/* Deallocates all heap data associated with h, and returns the + * memory to the operating system (via the malloc/free system). + * After calling this function, h is a dangling pointer and should not be used. + */ + +void reset_heap(struct heap *h); +/* Empties the heap without freeing its storage. + * After a complete execution of the mutator, + * and after whoever invoked the mutator copies whatever result they want + * out of the heap, one can call this function before starting + * another mutator execution. This saves the operating-system overhead of + * free_heap() followed by the implicit create_heap that would have been + * done in the first garbage_collect() call of the next execution. + */ + +/* which slot of the args array has the answer of a certicoq program */ +#define answer_index 1 + +value* extract_answer(struct thread_info *ti); +/* y=extract_answer(x,ti) copies the dag rooted at ti->args[answer_index] + into a compact data structure starting at y[1], outside the heap, + in a single malloc'ed (therefore freeable) object at address y. + All within-the-heap pointers will now be within the object y. + If (the answer within) the heap pointed to records outside + the heap, then those will point at their original locations + outside the object y. + + Note that the start is *(y+1), not (y+1); that is, there's an + extra wrapper-record round the object. That's so that the + root-within-the-heap and the root-outside-the-heap (or root-unboxed) + can be treated uniformly by the caller of extract_answer(). +*/ + +void* export(struct thread_info *ti); diff --git a/cplugin/runtime/gc_stack.c b/cplugin/runtime/gc_stack.c index 5db5203c..7083c891 100644 --- a/cplugin/runtime/gc_stack.c +++ b/cplugin/runtime/gc_stack.c @@ -1,6 +1,7 @@ #include #include #include +#include "m.h" /* use printm.c to create m.h */ #include "config.h" #include "values.h" #include "gc_stack.h" @@ -10,23 +11,50 @@ */ +/* The following 5 functions should (in practice) compile correctly in CompCert, + but the CompCert correctness specification does not _require_ that + they compile correctly: their semantics is "undefined behavior" in + CompCert C (and in C11), but in practice they will work in any compiler. */ + +int test_int_or_ptr (value x) /* returns 1 if int, 0 if aligned ptr */ { + return (int)(((intnat)x)&1); +} + +intnat int_or_ptr_to_int (value x) /* precondition: is int */ { + return (intnat)x; +} + +void * int_or_ptr_to_ptr (value x) /* precond: is aligned ptr */ { + return (void *)x; +} + +value int_to_int_or_ptr(intnat x) /* precondition: is odd */ { + return (value)x; +} + +value ptr_to_int_or_ptr(void *x) /* precondition: is aligned */ { + return (value)x; +} + +int is_ptr(value x) { + return test_int_or_ptr(x) == 0; +} + /* A "space" describes one generation of the generational collector. */ struct space { - value *start, *next, *limit; + value *start, *next, *limit, *rem_limit; }; /* Either start==NULL (meaning that this generation has not yet been created), or start <= next <= limit. The words in start..next are allocated and initialized, and the words from next..limit are available to allocate. */ -#define MAX_SPACES 15 /* how many generations */ - #ifndef RATIO #define RATIO 2 /* size of generation i+1 / size of generation i */ /* Using RATIO=2 is faster than larger ratios, empirically */ #endif -#ifndef NURSERY_SIZE -#define NURSERY_SIZE (1<<16) +#ifndef LOG_NURSERY_SIZE +#define LOG_NURSERY_SIZE 16 #endif /* The size of generation 0 (the "nursery") should approximately match the size of the level-2 cache of the machine, according to: @@ -38,8 +66,44 @@ struct space { (which is the size of the Intel Core i7 per-core L2 cache). http://www.tomshardware.com/reviews/Intel-i7-nehalem-cpu,2041-10.html https://en.wikipedia.org/wiki/Nehalem_(microarchitecture) - Empirical measurements show that 64k works well + Empirical measurements on Intel Core i7 in 32-bit mode show that NURSERY_SIZE=64k 4-byte words works well (or anything in the range from 32k to 128k). + Some machines are radically different, however: + Mac M2 "big" core has 64-bit words, L1 cache 128kB, shared L2 cache 16MB + Mac M2 "small" core has 64-bit words, L1 cache 64kB, shared L2 cache 4MB + On such machines the Goncalves rule of thumb may not apply; would be worth measuring + performance with different nursery sizes, on realistic workloads. + +*/ + +#define NURSERY_SIZE (1<>1); + else fprintf(f,"%ld",v>>1); } // XXX todo update for roots arrays @@ -104,14 +168,15 @@ void printtree(FILE *f, struct heap *h, value v) { #endif -void abort_with(const char *s) { - fputs(s, stderr); +void abort_with(char *s) { + fprintf(stderr, "%s", s); exit(1); } -#define Is_from(from_start, from_limit, v) \ - (from_start <= (value*)(v) && (value*)(v) < from_limit) -/* Assuming v is a pointer (Is_block(v)), tests whether v points +int Is_from(value* from_start, value * from_limit, value * v) { + return (from_start <= v && v < from_limit); +} +/* Assuming v is a pointer (is_ptr(v)), tests whether v points somewhere into the "from-space" defined by from_start and from_limit */ void forward (value *from_start, /* beginning of from-space */ @@ -130,9 +195,10 @@ void forward (value *from_start, /* beginning of from-space */ may improve the cache locality of the copied graph. */ { - value v = *p; - if(Is_block(v)) { - + value * v; + value va = *p; + if(is_ptr(va)) { + v = (value*)int_or_ptr_to_ptr(va); /* printf("Start: %lld end"" %lld word %lld \n", from_start, from_limit, v); */ /* if (v == 4360698480) printf ("Found it\n"); */ if(Is_from(from_start, from_limit, v)) { @@ -141,20 +207,21 @@ void forward (value *from_start, /* beginning of from-space */ if(hd == 0) { /* already forwarded */ *p = Field(v,0); } else { - int i; - int sz; + intnat i; + intnat sz; value *new; sz = Wosize_hd(hd); new = *next+1; *next = new+sz; - if (sz > 50) printf("Moving value %lu with tag %lu with %d fields\n", v, hd, sz); - for(i = -1; i < sz; i++) { + /* if (sz > 50) printf("Moving value %p with tag %ld with %d fields\n", (void*)v, hd, sz); */ + Hd_val(new) = hd; + for(i = 0; i < sz; i++) { /* printf("Moving field %d\n", i); */ Field(new, i) = Field(v, i); } Hd_val(v) = 0; - Field(v, 0) = (value)new; - *p = (value)new; + Field(v, 0) = ptr_to_int_or_ptr((void *)new); + *p = ptr_to_int_or_ptr((void *)new); /* printf("New %lld\n", new); */ /* if (*p == 73832) printf("Found it\n"); */ if (depth>0) @@ -164,48 +231,45 @@ void forward (value *from_start, /* beginning of from-space */ } } } +void forward_remset (struct space *from, /* descriptor of from-space */ + struct space *to, /* descriptor of to-space */ + value **next) /* next available spot in to-space */ +{ + value *from_start = from->start, *from_limit=from->limit, *from_rem_limit=from->rem_limit; + value *q = from_limit; + assert (from_rem_limit-from_limit <= to->limit-to->start); + while (q != from_rem_limit) { + value *p = *(value**)q; + if (!(from_start <= p && p < from_limit)) { + value oldp= *p, newp; + forward(from_start, from_limit, next, p, DEPTH); + newp= *p; + if (oldp!=newp) + *(--to->limit) = (value)q; + } + q++; + } +} void forward_roots (value *from_start, /* beginning of from-space */ value *from_limit, /* end of from-space */ value **next, /* next available spot in to-space */ - struct thread_info *ti) /* where's the args array? */ + struct stack_frame *frames) /* data structure to find the roots */ /* Forward each live root in the stack */ { - struct stack_frame *frame = ti->fp; - value *live, *curr, *limit, val; - header_t hd; int sz; + struct stack_frame *frame = frames; + value *start; size_t i, limit; /* Scan the stack by traversing the stack pointers */ - /* printf("Scanning frames \n"); */ while (frame != NULL) { - live = frame->root; - curr = live; - limit = frame->next; - /* printf("Scanning frame, curr: %lld, limit: %lld\n", curr, limit); */ - /* int cnt = 0; */ - while (curr < limit) { - /* printf("%d \n", cnt); */ - /* cnt ++; */ - /* printf("Before \n"); */ - /* Curr has the stack address of the local */ - /* printf("curr: %lld\n", curr); */ - val = *curr; - if Is_block(val) { - hd = Hd_val(val); - sz = Wosize_hd(hd); - /* printf("Moving root %lld with tag %ldd and %d fields\n", val, hd, sz); */ - } - forward(from_start, from_limit, next, curr, DEPTH); - /* printf("After \n"); */ - curr ++; - } + start = frame->root; + limit = frame->next - start; /* See NOTE-POINTER-ARITH below */ frame = frame->prev; + for (i=0; i= No_scan_tag) void do_scan(value *from_start, /* beginning of from-space */ value *from_limit, /* end of from-space */ value *scan, /* start of unforwarded part of to-space */ @@ -219,7 +283,7 @@ void do_scan(value *from_start, /* beginning of from-space */ s = scan; /* printf("in scan \n"); */ while(s < *next) { - header_t hd = *s; + header_t hd = *((header_t*)s); mlsize_t sz = Wosize_hd(hd); int tag = Tag_hd(hd); if (!No_scan(tag)) { @@ -235,17 +299,21 @@ void do_scan(value *from_start, /* beginning of from-space */ void do_generation (struct space *from, /* descriptor of from-space */ struct space *to, /* descriptor of to-space */ - struct thread_info *ti) /* where's the args array? */ + struct stack_frame *fr) /* where are the roots? */ /* Copy the live objects out of the "from" space, into the "to" space, using fi and ti to determine the roots of liveness. */ { value *p = to->next; - assert(from->next-from->start <= to->limit-to->next); - forward_roots(from->start, from->limit, &to->next, ti); + /* assert(from->next-from->start + from->rem_limit-from->limit <= to->limit-to->next); */ + forward_remset(from, to, &to->next); + forward_roots(from->start, from->limit, &to->next, fr); do_scan(from->start, from->limit, p, &to->next); - if(0) fprintf(stderr,"%5.3f%% occupancy\n", + #ifdef CERTICOQ_DEBUG_GC + fprintf(stderr,"%5.3f%% occupancy\n", (to->next-p)/(double)(from->next-from->start)); + #endif from->next=from->start; + from->limit=from->rem_limit; } #if 0 @@ -285,6 +353,7 @@ void create_space(struct space *s, /* which generation to create */ s->start=p; s->next=p; s->limit = p+n; + s->rem_limit = s->limit; } struct heap *create_heap() @@ -300,6 +369,7 @@ struct heap *create_heap() h->spaces[i].start = NULL; h->spaces[i].next = NULL; h->spaces[i].limit = NULL; + h->spaces[i].rem_limit = NULL; } return h; } @@ -309,26 +379,22 @@ struct thread_info *make_tinfo(void) { struct thread_info *tinfo; h = create_heap(); tinfo = (struct thread_info *)malloc(sizeof(struct thread_info)); - if (!tinfo) { - fprintf(stderr, "Could not allocate thread_info struct\n"); - exit(1); - } - - + if (!tinfo) + abort_with("Could not allocate thread_info struct\n"); tinfo->heap=h; tinfo->alloc=h->spaces[0].start; tinfo->limit=h->spaces[0].limit; tinfo->fp=NULL; /* the initial stack pointer is NULL */ + tinfo->nalloc=0; + tinfo->odata=NULL; return tinfo; } void resume(struct thread_info *ti) -/* When the garbage collector is all done, it does not "return" - to the mutator; instead, it uses this function (which does not return) - to resume the mutator by invoking the continuation, fi->fun. - But first, "resume" informs the mutator - of the new values for the alloc and limit pointers. +/* When the garbage collector is all done, inform the mutator + of the new values for the alloc and limit pointers, + and check that enough space has been freed (ti->nalloc words). */ { struct heap *h = ti->heap; @@ -337,7 +403,7 @@ void resume(struct thread_info *ti) assert (h); lo = h->spaces[0].start; hi = h->spaces[0].limit; - if (hi-lo < num_allocs) + if (hi-lo < num_allocs) /* See NOTE-POINTER-ARITH below */ abort_with ("Nursery is too small for function's num_allocs\n"); ti->alloc = lo; ti->limit = hi; @@ -349,17 +415,10 @@ void garbage_collect(struct thread_info *ti) { struct heap *h = ti->heap; /* printf("In GC\n"); */ - if (h==NULL) { - /* If the heap has not yet been initialized, create it and resume */ - h = create_heap(); - ti->heap = h; - resume(ti); - return; - } else { - int i; - assert (h->spaces[0].limit == ti->limit); - h->spaces[0].next = ti->alloc; /* this line is probably unnecessary */ - for (i=0; ispaces[0].limit = ti->limit; + h->spaces[0].next = ti->alloc; /* this line is probably unnecessary */ + for (i=0; ispaces[i+1].start==NULL) { - int w = h->spaces[i].limit-h->spaces[i].start; + intnat w = h->spaces[i].rem_limit-h->spaces[i].start; /* See NOTE-POINTER-ARITH below */ create_space(h->spaces+(i+1), RATIO*w); } /* Copy all the objects in generation i, into generation i+1 */ - if(0) fprintf(stderr, "Generation %d: ", i); - do_generation(h->spaces+i, h->spaces+(i+1), ti); + #ifdef CERTICOQ_DEBUG_GC + fprintf(stderr, "Generation %d: ", i); + #endif + do_generation(h->spaces+i, h->spaces+(i+1), ti->fp); /* If there's enough space in gen i+1 to guarantee that the - NEXT collection into i+1 will succeed, we can stop here */ - if (h->spaces[i].limit - h->spaces[i].start + NEXT collection into i+1 will succeed, we can stop here. + We need enough space in the (unlikely) scenario where + * all the data in gen i is live ([i].limit-[i].start), and + * all the remembered set in i is preserved ([i].rem_limit-[i].limit). + */ + if (h->spaces[i].rem_limit - h->spaces[i].start /* See NOTE-POINTER-ARITH below */ <= h->spaces[i+1].limit - h->spaces[i+1].next) { resume(ti); return; } } - /* If we get to i==MAX_SPACES, that's bad news */ - abort_with("Ran out of generations\n"); - } - /* Can't reach this point */ - assert(0); + + /* If we get to i==MAX_SPACES, that's bad news */ + /* assert (MAX_SPACES == i); */ + abort_with("Ran out of generations\n"); } /* REMARK. The generation-management policy in the garbage_collect function @@ -424,26 +488,30 @@ int garbage_collect_all(struct thread_info *ti) { } int i; - assert (h->spaces[0].limit == ti->limit); - for (i=0; i < MAX_SPACES - 1 && h->spaces[i+1].start != NULL; i++) { + h->spaces[0].limit = ti->limit; + h->spaces[0].next = ti->alloc; /* this line more necessary here than perhaps in garbage_collect() */ + for (i=0; i < MAX_SPACES - 1 && h->spaces[i+1].start != NULL; i++) + do_generation(h->spaces+i, h->spaces+(i+1), ti->fp); - do_generation(h->spaces+i, h->spaces+(i+1), ti); - } - - /* If i is the nursery, its next won't be tracked, so need to be set to alloc */ - if(i == 0){ - h->spaces[i].next = ti->alloc; - } return i; } -/* export (deep copy if boxed) the first field of args */ -void* export(struct thread_info *ti) { +/* export (deep copy if boxed) from the given root */ +void *export(struct thread_info *ti, value root) { - /* if args[1] is unboxed, return it */ - if(!Is_block(ti->args[1])){ - return (void*) ti->args[1]; - } + + /* This block of 7 lines is new (appel 2023/06/27) and untested */ + struct stack_frame frame; + value roots[1]; + roots[0]=root; + frame.root=roots; + frame.next=roots+1; + frame.prev=NULL; + ti->fp= &frame; + + /* if root is unboxed, return it */ + if(!is_ptr(root)) + return (void *)root; /* otherwise collect all that is reachable from it to the last generation, then compact it into value_sp */ int gen_level = garbage_collect_all(ti); @@ -451,11 +519,11 @@ void* export(struct thread_info *ti) { struct space* fake_sp = (struct space*)malloc(sizeof(struct space)); create_space(fake_sp, sp->next - sp->start); - do_generation(sp, fake_sp, ti); + do_generation(sp, fake_sp, ti->fp); struct space* value_sp = (struct space*)malloc(sizeof(struct space)); create_space(value_sp, fake_sp->next - fake_sp->start); - do_generation(fake_sp, value_sp, ti); + do_generation(fake_sp, value_sp, ti->fp); /* offset start by the header */ void* result_block = (void *)(value_sp->start +1); @@ -464,6 +532,40 @@ void* export(struct thread_info *ti) { free(fake_sp); free(value_sp); - return result_block; } + +/* mutable write barrier */ +void certicoq_modify(struct thread_info *ti, value *p_cell, value p_val) { + assert (ti->alloc < ti->limit); + *p_cell = p_val; + if (is_ptr(p_val)) { + *(value **)(--ti->limit) = p_cell; + } +} + +void print_heapsize(struct thread_info *ti) { + for (int i = 0; i < MAX_SPACES; i++) { + int allocated = (int)(ti->heap->spaces[i].next - ti->heap->spaces[i].start); + int remembered = (int)(ti->heap->spaces[i].rem_limit - ti->heap->spaces[i].limit); + if (!(allocated || remembered)) { + continue; + } + printf("generation %d:\n", i); + printf(" size: %d\n", (int)(ti->heap->spaces[i].rem_limit - ti->heap->spaces[i].start)); + printf(" allocated: %d\n", allocated); + printf(" remembered: %d\n", remembered); + } +} + +/* NOTE-POINTER-ARITH: In a few places, we do a pointer subtraction, such as + h->spaces[i].limit - h->spaces[i].start. + When p and q have type *foo, then this is much like ((int)p-(int)q)/sizeof(foo). + But note this is a SIGNED division, which makes it quite dangerous if ((int)p-(int)q) + can be larger than the maximum signed integer. So we have to be quite careful in + the program and the proof, especially when (on a 32-bit machines) our largest generation + might be similar in size to the entire address space. +*/ + + + diff --git a/cplugin/runtime/gc_stack.h b/cplugin/runtime/gc_stack.h index e25fdec9..d94870a4 100644 --- a/cplugin/runtime/gc_stack.h +++ b/cplugin/runtime/gc_stack.h @@ -1,3 +1,6 @@ +#ifndef CERTICOQ_GC_STACK_H +#define CERTICOQ_GC_STACK_H + #include "values.h" /* EXPLANATION OF THE CERTICOQ GENERATIONAL GARBAGE COLLECTOR. @@ -88,6 +91,9 @@ recent collection. To call the garbage collector, the mutator passes a fun_info and a thread_info, as follows. */ +#define No_scan_tag 251 +#define No_scan(t) ((t) >= No_scan_tag) + typedef const uintnat *fun_info; /* fi[0]: How many words the function might allocate fi[1]: How many slots of the args array contain live roots @@ -113,6 +119,7 @@ struct thread_info { value args[MAX_ARGS]; /* the args array */ struct stack_frame *fp; /* stack pointer */ uintnat nalloc; /* Remaining allocation until next GC call*/ + void *odata; }; struct thread_info *make_tinfo(void); @@ -162,4 +169,11 @@ value* extract_answer(struct thread_info *ti); can be treated uniformly by the caller of extract_answer(). */ -void* export(struct thread_info *ti); +void* export(struct thread_info *ti, value root); + +/* mutable write barrier */ +void certicoq_modify(struct thread_info *ti, value *p_cell, value p_val); + +void print_heapsize(struct thread_info *ti); + +#endif /* CERTICOQ_GC_STACK_H */ diff --git a/cplugin/runtime/m.h b/cplugin/runtime/m.h new file mode 100644 index 00000000..1c509a54 --- /dev/null +++ b/cplugin/runtime/m.h @@ -0,0 +1,3 @@ +#define SIZEOF_PTR 8 +#define SIZEOF_LONG 8 +#define SIZEOF_INT 4 diff --git a/cplugin/runtime/values.h b/cplugin/runtime/values.h index f3b320ad..a5487d16 100644 --- a/cplugin/runtime/values.h +++ b/cplugin/runtime/values.h @@ -53,6 +53,7 @@ extern "C" { This is for use only by the GC. */ + /* CHANGE BY THE CERTICOQ/VERIFFI TEAM: We do this for CertiCoq's FFI system because for technical reasons related to @@ -68,6 +69,7 @@ attribute. On the other hand, standard OCaml wants the value to be intnat. #else typedef intnat value; #endif +/* because of VST's restrictions. */ typedef uintnat header_t; typedef uintnat mlsize_t; typedef unsigned int tag_t; /* Actually, an unsigned char */ @@ -76,7 +78,12 @@ typedef uintnat mark_t; /* Longs vs blocks. */ #define Is_long(x) (((x) & 1) != 0) -#define Is_block(x) (((x) & 1) == 0) +/* CHANGE BY THE CERTICOQ/VERIFFI TEAM: */ +#ifdef VERIFFI + #define Is_block(x) (((int)(((intnat) x) & 1)) == 0) +#else + #define Is_block(x) (((x) & 1) == 0) +#endif /* Conversion macro names are always of the form "to_from". */ /* Example: Val_long as in "Val from long" or "Val of long". */ @@ -208,10 +215,4 @@ bits 63 (64-P) (63-P) 10 9 8 7 0 } #endif -/* Floating-point numbers. */ -#define Double_tag 253 -#define Double_wosize ((sizeof(double) / sizeof(value))) -#define Double_val(v) (* (double *)(v)) -#define Store_double_val(v,d) (* (double *)(v) = (d)) - #endif /* VALUES_H */ diff --git a/cplugin/static/printClight.ml b/cplugin/static/printClight.ml index 8e414d31..bc816db1 100644 --- a/cplugin/static/printClight.ml +++ b/cplugin/static/printClight.ml @@ -361,7 +361,7 @@ let print_dest_names_imports prog names (dest : string) (imports : string list) List.iter (fun n -> add_name (remove_primes n)) names; let fm = formatter_of_out_channel oc in open_vbox 0; - List.iter (fun s -> fprintf fm "#include \"%s\"" s; pp_print_newline fm ();) ("gc.h" :: imports); + List.iter (fun s -> fprintf fm "#include \"%s\"" s; pp_print_newline fm ();) imports; close_box (); open_box 0; print_program Clight2 fm prog; diff --git a/plugin/certicoq.ml b/plugin/certicoq.ml index 1d7698c3..6d8d4541 100644 --- a/plugin/certicoq.ml +++ b/plugin/certicoq.ml @@ -216,7 +216,8 @@ module CompileFunctor (CI : CompilerInterface) = struct let compile opts term imports = let debug = opts.debug in let options = make_pipeline_options opts in - let imports = get_global_includes () @ imports in + let runtime_imports = [if opts.cps then "gc.h" else "gc_stack.h"] in + let imports = runtime_imports @ get_global_includes () @ imports in let p = CI.compile options term in match p with | (CompM.Ret ((nenv, header), prg), dbg) -> @@ -227,7 +228,7 @@ module CompileFunctor (CI : CompilerInterface) = struct let cstr = make_fname opts (fname ^ suff ^ ".c") in let hstr = make_fname opts (fname ^ suff ^ ".h") in CI.printProg prg nenv cstr imports; - CI.printProg header nenv hstr []; + CI.printProg header nenv hstr runtime_imports; (* let cstr = Metacoq_template_plugin.Tm_util.string_to_list (Names.KerName.to_string (Names.Constant.canonical const) ^ suff ^ ".c") in * let hstr = Metacoq_template_plugin.Tm_util.string_to_list (Names.KerName.to_string (Names.Constant.canonical const) ^ suff ^ ".h") in @@ -250,7 +251,7 @@ module CompileFunctor (CI : CompilerInterface) = struct else let debug = opts.debug in let options = make_pipeline_options opts in - + let runtime_imports = [if opts.cps then "gc.h" else "gc_stack.h"] in let time = Unix.gettimeofday() in (match CI.generate_glue options globs with | CompM.Ret (((nenv, header), prg), logs) -> @@ -265,8 +266,8 @@ module CompileFunctor (CI : CompilerInterface) = struct let hstr = if standalone then fname ^ ".h" else "glue." ^ fname ^ suff ^ ".h" in let cstr = make_fname opts cstr in let hstr = make_fname opts hstr in - CI.printProg prg nenv cstr []; - CI.printProg header nenv hstr []; + CI.printProg prg nenv cstr runtime_imports; + CI.printProg header nenv hstr runtime_imports; let time = (Unix.gettimeofday() -. time) in debug_msg debug (Printf.sprintf "Printed glue code to file %s in %f s.." cstr time) diff --git a/plugin/runtime/config.h b/plugin/runtime/config.h index f3eb8ce3..4dc00718 100644 --- a/plugin/runtime/config.h +++ b/plugin/runtime/config.h @@ -16,6 +16,13 @@ #ifndef CONFIG_H #define CONFIG_H +#include "m.h" + +#if SIZEOF_PTR > 0 +#else +#error "Oops! SIZEOF_PTR is not defined" +#endif + #if SIZEOF_PTR == SIZEOF_LONG /* Standard models: ILP32 or I32LP64 */ typedef long intnat; diff --git a/plugin/runtime/gc.h b/plugin/runtime/gc.h new file mode 100644 index 00000000..46c6e6a4 --- /dev/null +++ b/plugin/runtime/gc.h @@ -0,0 +1,155 @@ +#include "values.h" + +/* EXPLANATION OF THE CERTICOQ GENERATIONAL GARBAGE COLLECTOR. + Andrew W. Appel, September 2016 + +The current Certicoq code generator uses Ocaml object formats, +as described in Chapter 20 of Real World Ocaml by Minsky et al. +https://realworldocaml.org/v1/en/html/memory-representation-of-values.html + +That is: + + 31 10 9 8 7 0 + +-------+---------+----------+ + | size | color | tag byte | + +-------+---------+----------+ +v --> | value[0] | + +----------------------------+ + | value[1] | + +----------------------------+ + | . | + | . | + | . | + +----------------------------+ + | value[size-1] | + +----------------------------+ + +This works for 32-bit or 64-bit words; +if 64-bit words, substitute "63" for "31" in the diagram above. +At present we'll assume 32-bit words. + +The header file "values.h", from the OCaml distribution, +has macros (etc.) for accessing these fields and headers. + +The header file "config.h", from the OCaml distribution, defines +typedef "intnat", the "natural integer type" for this compiler/machine, +and "uintnat", the "natural unsigned integer type". +Config.h also defines (BUT WE DO NOT USE) parameters for the Ocaml +generational garbage collector. + +The important definitions we use from values.h are: + +Is_block(v) : tests whether v is a pointer (even number) +Hd_val(v) : contents of the header word, i.e., just before where v points to +Field(v,i) : the i'th field of object v +Tag_hd(h) : If h is a header-word, get the constructor-tag +Wosize_hd(h): If h is a header-word, get size of the object in words + +We define the following ourselves, following Ocaml's format: + +No_scan(t) : If t is a constructor-tag, true if none of the object's + data words are to be interpreted as pointers. + (For example, character-string data) + +CALLING THE GARBAGE COLLECTOR (this part is NOT standard Ocaml): + +The mutator runs in this environment: + + NURSERY OLDER GENERATIONS + +-------------+ start---->+-------------+ +-------------+ + | args[0] | | | | | + +-------------+ | <-\ | /-->| | + | args[1] *----\ | | *---/ | | + +-------------+ \-----> | *-/ | | | + | . | +-+ | | | | + | . | alloc|*-->+-------------+ | | + | . | +-+ | | | | + +-------------+ | | | | + | args[argc-1]| +-+ | | | | + +-------------+ limit|*-->+-------------+ | | + +-+ +-------------+ + +There is a global "args" array. Certain words in "args" may +point into the heap (either the nursery or into older generations). +The nursery may point within itself, or into older generations. +Older generations may not point into the nursery. +The heap may not point into the args array. + +The mutator creates a new object by using the N+1 words (including header) +starting at "alloc", and then adding N+1 to alloc. This is only +permitted if alloc+N+1 <= limit. Otherwise, the mutator must +first call the garbage collector. + +The variables "alloc" and "limit" are owned by the mutator. +The "start" value is not actually a variable of the mutator, +but it was the value of "alloc" immediately after the most +recent collection. + +To call the garbage collector, the mutator passes a fun_info and +a thread_info, as follows. */ + +typedef const uintnat *fun_info; +/* fi[0]: How many words the function might allocate + fi[1]: How many slots of the args array contain live roots + fi[2..(fi[1]-2)]: Indices of the live roots in the args array +*/ + +struct heap; /* abstract, opaque */ + +#define MAX_ARGS 1024 + +struct thread_info { + value *alloc; /* alloc pointer */ + value *limit; /* limit pointer */ + struct heap *heap; /* Description of the generations in the heap */ + value args[MAX_ARGS]; /* the args array */ +}; + +struct thread_info *make_tinfo(void); + +void garbage_collect(fun_info fi, struct thread_info *ti); +/* Performs one garbage collection; + or if ti->heap==NULL, initializes the heap. + + The returns in a state where + (1) the "after" graph of nodes reachable from args[indices[0..num_args]] + is isomorphic to the "before" graph; and + (2) the alloc pointer points to N words of unallocated heap space + (where N>=num_allocs), such that limit-alloc=N. +*/ + +void free_heap(struct heap *h); +/* Deallocates all heap data associated with h, and returns the + * memory to the operating system (via the malloc/free system). + * After calling this function, h is a dangling pointer and should not be used. + */ + +void reset_heap(struct heap *h); +/* Empties the heap without freeing its storage. + * After a complete execution of the mutator, + * and after whoever invoked the mutator copies whatever result they want + * out of the heap, one can call this function before starting + * another mutator execution. This saves the operating-system overhead of + * free_heap() followed by the implicit create_heap that would have been + * done in the first garbage_collect() call of the next execution. + */ + +/* which slot of the args array has the answer of a certicoq program */ +#define answer_index 1 + +value* extract_answer(struct thread_info *ti); +/* y=extract_answer(x,ti) copies the dag rooted at ti->args[answer_index] + into a compact data structure starting at y[1], outside the heap, + in a single malloc'ed (therefore freeable) object at address y. + All within-the-heap pointers will now be within the object y. + If (the answer within) the heap pointed to records outside + the heap, then those will point at their original locations + outside the object y. + + Note that the start is *(y+1), not (y+1); that is, there's an + extra wrapper-record round the object. That's so that the + root-within-the-heap and the root-outside-the-heap (or root-unboxed) + can be treated uniformly by the caller of extract_answer(). +*/ + +void* export(struct thread_info *ti); diff --git a/plugin/runtime/gc_stack.c b/plugin/runtime/gc_stack.c index 5db5203c..7083c891 100644 --- a/plugin/runtime/gc_stack.c +++ b/plugin/runtime/gc_stack.c @@ -1,6 +1,7 @@ #include #include #include +#include "m.h" /* use printm.c to create m.h */ #include "config.h" #include "values.h" #include "gc_stack.h" @@ -10,23 +11,50 @@ */ +/* The following 5 functions should (in practice) compile correctly in CompCert, + but the CompCert correctness specification does not _require_ that + they compile correctly: their semantics is "undefined behavior" in + CompCert C (and in C11), but in practice they will work in any compiler. */ + +int test_int_or_ptr (value x) /* returns 1 if int, 0 if aligned ptr */ { + return (int)(((intnat)x)&1); +} + +intnat int_or_ptr_to_int (value x) /* precondition: is int */ { + return (intnat)x; +} + +void * int_or_ptr_to_ptr (value x) /* precond: is aligned ptr */ { + return (void *)x; +} + +value int_to_int_or_ptr(intnat x) /* precondition: is odd */ { + return (value)x; +} + +value ptr_to_int_or_ptr(void *x) /* precondition: is aligned */ { + return (value)x; +} + +int is_ptr(value x) { + return test_int_or_ptr(x) == 0; +} + /* A "space" describes one generation of the generational collector. */ struct space { - value *start, *next, *limit; + value *start, *next, *limit, *rem_limit; }; /* Either start==NULL (meaning that this generation has not yet been created), or start <= next <= limit. The words in start..next are allocated and initialized, and the words from next..limit are available to allocate. */ -#define MAX_SPACES 15 /* how many generations */ - #ifndef RATIO #define RATIO 2 /* size of generation i+1 / size of generation i */ /* Using RATIO=2 is faster than larger ratios, empirically */ #endif -#ifndef NURSERY_SIZE -#define NURSERY_SIZE (1<<16) +#ifndef LOG_NURSERY_SIZE +#define LOG_NURSERY_SIZE 16 #endif /* The size of generation 0 (the "nursery") should approximately match the size of the level-2 cache of the machine, according to: @@ -38,8 +66,44 @@ struct space { (which is the size of the Intel Core i7 per-core L2 cache). http://www.tomshardware.com/reviews/Intel-i7-nehalem-cpu,2041-10.html https://en.wikipedia.org/wiki/Nehalem_(microarchitecture) - Empirical measurements show that 64k works well + Empirical measurements on Intel Core i7 in 32-bit mode show that NURSERY_SIZE=64k 4-byte words works well (or anything in the range from 32k to 128k). + Some machines are radically different, however: + Mac M2 "big" core has 64-bit words, L1 cache 128kB, shared L2 cache 16MB + Mac M2 "small" core has 64-bit words, L1 cache 64kB, shared L2 cache 4MB + On such machines the Goncalves rule of thumb may not apply; would be worth measuring + performance with different nursery sizes, on realistic workloads. + +*/ + +#define NURSERY_SIZE (1<>1); + else fprintf(f,"%ld",v>>1); } // XXX todo update for roots arrays @@ -104,14 +168,15 @@ void printtree(FILE *f, struct heap *h, value v) { #endif -void abort_with(const char *s) { - fputs(s, stderr); +void abort_with(char *s) { + fprintf(stderr, "%s", s); exit(1); } -#define Is_from(from_start, from_limit, v) \ - (from_start <= (value*)(v) && (value*)(v) < from_limit) -/* Assuming v is a pointer (Is_block(v)), tests whether v points +int Is_from(value* from_start, value * from_limit, value * v) { + return (from_start <= v && v < from_limit); +} +/* Assuming v is a pointer (is_ptr(v)), tests whether v points somewhere into the "from-space" defined by from_start and from_limit */ void forward (value *from_start, /* beginning of from-space */ @@ -130,9 +195,10 @@ void forward (value *from_start, /* beginning of from-space */ may improve the cache locality of the copied graph. */ { - value v = *p; - if(Is_block(v)) { - + value * v; + value va = *p; + if(is_ptr(va)) { + v = (value*)int_or_ptr_to_ptr(va); /* printf("Start: %lld end"" %lld word %lld \n", from_start, from_limit, v); */ /* if (v == 4360698480) printf ("Found it\n"); */ if(Is_from(from_start, from_limit, v)) { @@ -141,20 +207,21 @@ void forward (value *from_start, /* beginning of from-space */ if(hd == 0) { /* already forwarded */ *p = Field(v,0); } else { - int i; - int sz; + intnat i; + intnat sz; value *new; sz = Wosize_hd(hd); new = *next+1; *next = new+sz; - if (sz > 50) printf("Moving value %lu with tag %lu with %d fields\n", v, hd, sz); - for(i = -1; i < sz; i++) { + /* if (sz > 50) printf("Moving value %p with tag %ld with %d fields\n", (void*)v, hd, sz); */ + Hd_val(new) = hd; + for(i = 0; i < sz; i++) { /* printf("Moving field %d\n", i); */ Field(new, i) = Field(v, i); } Hd_val(v) = 0; - Field(v, 0) = (value)new; - *p = (value)new; + Field(v, 0) = ptr_to_int_or_ptr((void *)new); + *p = ptr_to_int_or_ptr((void *)new); /* printf("New %lld\n", new); */ /* if (*p == 73832) printf("Found it\n"); */ if (depth>0) @@ -164,48 +231,45 @@ void forward (value *from_start, /* beginning of from-space */ } } } +void forward_remset (struct space *from, /* descriptor of from-space */ + struct space *to, /* descriptor of to-space */ + value **next) /* next available spot in to-space */ +{ + value *from_start = from->start, *from_limit=from->limit, *from_rem_limit=from->rem_limit; + value *q = from_limit; + assert (from_rem_limit-from_limit <= to->limit-to->start); + while (q != from_rem_limit) { + value *p = *(value**)q; + if (!(from_start <= p && p < from_limit)) { + value oldp= *p, newp; + forward(from_start, from_limit, next, p, DEPTH); + newp= *p; + if (oldp!=newp) + *(--to->limit) = (value)q; + } + q++; + } +} void forward_roots (value *from_start, /* beginning of from-space */ value *from_limit, /* end of from-space */ value **next, /* next available spot in to-space */ - struct thread_info *ti) /* where's the args array? */ + struct stack_frame *frames) /* data structure to find the roots */ /* Forward each live root in the stack */ { - struct stack_frame *frame = ti->fp; - value *live, *curr, *limit, val; - header_t hd; int sz; + struct stack_frame *frame = frames; + value *start; size_t i, limit; /* Scan the stack by traversing the stack pointers */ - /* printf("Scanning frames \n"); */ while (frame != NULL) { - live = frame->root; - curr = live; - limit = frame->next; - /* printf("Scanning frame, curr: %lld, limit: %lld\n", curr, limit); */ - /* int cnt = 0; */ - while (curr < limit) { - /* printf("%d \n", cnt); */ - /* cnt ++; */ - /* printf("Before \n"); */ - /* Curr has the stack address of the local */ - /* printf("curr: %lld\n", curr); */ - val = *curr; - if Is_block(val) { - hd = Hd_val(val); - sz = Wosize_hd(hd); - /* printf("Moving root %lld with tag %ldd and %d fields\n", val, hd, sz); */ - } - forward(from_start, from_limit, next, curr, DEPTH); - /* printf("After \n"); */ - curr ++; - } + start = frame->root; + limit = frame->next - start; /* See NOTE-POINTER-ARITH below */ frame = frame->prev; + for (i=0; i= No_scan_tag) void do_scan(value *from_start, /* beginning of from-space */ value *from_limit, /* end of from-space */ value *scan, /* start of unforwarded part of to-space */ @@ -219,7 +283,7 @@ void do_scan(value *from_start, /* beginning of from-space */ s = scan; /* printf("in scan \n"); */ while(s < *next) { - header_t hd = *s; + header_t hd = *((header_t*)s); mlsize_t sz = Wosize_hd(hd); int tag = Tag_hd(hd); if (!No_scan(tag)) { @@ -235,17 +299,21 @@ void do_scan(value *from_start, /* beginning of from-space */ void do_generation (struct space *from, /* descriptor of from-space */ struct space *to, /* descriptor of to-space */ - struct thread_info *ti) /* where's the args array? */ + struct stack_frame *fr) /* where are the roots? */ /* Copy the live objects out of the "from" space, into the "to" space, using fi and ti to determine the roots of liveness. */ { value *p = to->next; - assert(from->next-from->start <= to->limit-to->next); - forward_roots(from->start, from->limit, &to->next, ti); + /* assert(from->next-from->start + from->rem_limit-from->limit <= to->limit-to->next); */ + forward_remset(from, to, &to->next); + forward_roots(from->start, from->limit, &to->next, fr); do_scan(from->start, from->limit, p, &to->next); - if(0) fprintf(stderr,"%5.3f%% occupancy\n", + #ifdef CERTICOQ_DEBUG_GC + fprintf(stderr,"%5.3f%% occupancy\n", (to->next-p)/(double)(from->next-from->start)); + #endif from->next=from->start; + from->limit=from->rem_limit; } #if 0 @@ -285,6 +353,7 @@ void create_space(struct space *s, /* which generation to create */ s->start=p; s->next=p; s->limit = p+n; + s->rem_limit = s->limit; } struct heap *create_heap() @@ -300,6 +369,7 @@ struct heap *create_heap() h->spaces[i].start = NULL; h->spaces[i].next = NULL; h->spaces[i].limit = NULL; + h->spaces[i].rem_limit = NULL; } return h; } @@ -309,26 +379,22 @@ struct thread_info *make_tinfo(void) { struct thread_info *tinfo; h = create_heap(); tinfo = (struct thread_info *)malloc(sizeof(struct thread_info)); - if (!tinfo) { - fprintf(stderr, "Could not allocate thread_info struct\n"); - exit(1); - } - - + if (!tinfo) + abort_with("Could not allocate thread_info struct\n"); tinfo->heap=h; tinfo->alloc=h->spaces[0].start; tinfo->limit=h->spaces[0].limit; tinfo->fp=NULL; /* the initial stack pointer is NULL */ + tinfo->nalloc=0; + tinfo->odata=NULL; return tinfo; } void resume(struct thread_info *ti) -/* When the garbage collector is all done, it does not "return" - to the mutator; instead, it uses this function (which does not return) - to resume the mutator by invoking the continuation, fi->fun. - But first, "resume" informs the mutator - of the new values for the alloc and limit pointers. +/* When the garbage collector is all done, inform the mutator + of the new values for the alloc and limit pointers, + and check that enough space has been freed (ti->nalloc words). */ { struct heap *h = ti->heap; @@ -337,7 +403,7 @@ void resume(struct thread_info *ti) assert (h); lo = h->spaces[0].start; hi = h->spaces[0].limit; - if (hi-lo < num_allocs) + if (hi-lo < num_allocs) /* See NOTE-POINTER-ARITH below */ abort_with ("Nursery is too small for function's num_allocs\n"); ti->alloc = lo; ti->limit = hi; @@ -349,17 +415,10 @@ void garbage_collect(struct thread_info *ti) { struct heap *h = ti->heap; /* printf("In GC\n"); */ - if (h==NULL) { - /* If the heap has not yet been initialized, create it and resume */ - h = create_heap(); - ti->heap = h; - resume(ti); - return; - } else { - int i; - assert (h->spaces[0].limit == ti->limit); - h->spaces[0].next = ti->alloc; /* this line is probably unnecessary */ - for (i=0; ispaces[0].limit = ti->limit; + h->spaces[0].next = ti->alloc; /* this line is probably unnecessary */ + for (i=0; ispaces[i+1].start==NULL) { - int w = h->spaces[i].limit-h->spaces[i].start; + intnat w = h->spaces[i].rem_limit-h->spaces[i].start; /* See NOTE-POINTER-ARITH below */ create_space(h->spaces+(i+1), RATIO*w); } /* Copy all the objects in generation i, into generation i+1 */ - if(0) fprintf(stderr, "Generation %d: ", i); - do_generation(h->spaces+i, h->spaces+(i+1), ti); + #ifdef CERTICOQ_DEBUG_GC + fprintf(stderr, "Generation %d: ", i); + #endif + do_generation(h->spaces+i, h->spaces+(i+1), ti->fp); /* If there's enough space in gen i+1 to guarantee that the - NEXT collection into i+1 will succeed, we can stop here */ - if (h->spaces[i].limit - h->spaces[i].start + NEXT collection into i+1 will succeed, we can stop here. + We need enough space in the (unlikely) scenario where + * all the data in gen i is live ([i].limit-[i].start), and + * all the remembered set in i is preserved ([i].rem_limit-[i].limit). + */ + if (h->spaces[i].rem_limit - h->spaces[i].start /* See NOTE-POINTER-ARITH below */ <= h->spaces[i+1].limit - h->spaces[i+1].next) { resume(ti); return; } } - /* If we get to i==MAX_SPACES, that's bad news */ - abort_with("Ran out of generations\n"); - } - /* Can't reach this point */ - assert(0); + + /* If we get to i==MAX_SPACES, that's bad news */ + /* assert (MAX_SPACES == i); */ + abort_with("Ran out of generations\n"); } /* REMARK. The generation-management policy in the garbage_collect function @@ -424,26 +488,30 @@ int garbage_collect_all(struct thread_info *ti) { } int i; - assert (h->spaces[0].limit == ti->limit); - for (i=0; i < MAX_SPACES - 1 && h->spaces[i+1].start != NULL; i++) { + h->spaces[0].limit = ti->limit; + h->spaces[0].next = ti->alloc; /* this line more necessary here than perhaps in garbage_collect() */ + for (i=0; i < MAX_SPACES - 1 && h->spaces[i+1].start != NULL; i++) + do_generation(h->spaces+i, h->spaces+(i+1), ti->fp); - do_generation(h->spaces+i, h->spaces+(i+1), ti); - } - - /* If i is the nursery, its next won't be tracked, so need to be set to alloc */ - if(i == 0){ - h->spaces[i].next = ti->alloc; - } return i; } -/* export (deep copy if boxed) the first field of args */ -void* export(struct thread_info *ti) { +/* export (deep copy if boxed) from the given root */ +void *export(struct thread_info *ti, value root) { - /* if args[1] is unboxed, return it */ - if(!Is_block(ti->args[1])){ - return (void*) ti->args[1]; - } + + /* This block of 7 lines is new (appel 2023/06/27) and untested */ + struct stack_frame frame; + value roots[1]; + roots[0]=root; + frame.root=roots; + frame.next=roots+1; + frame.prev=NULL; + ti->fp= &frame; + + /* if root is unboxed, return it */ + if(!is_ptr(root)) + return (void *)root; /* otherwise collect all that is reachable from it to the last generation, then compact it into value_sp */ int gen_level = garbage_collect_all(ti); @@ -451,11 +519,11 @@ void* export(struct thread_info *ti) { struct space* fake_sp = (struct space*)malloc(sizeof(struct space)); create_space(fake_sp, sp->next - sp->start); - do_generation(sp, fake_sp, ti); + do_generation(sp, fake_sp, ti->fp); struct space* value_sp = (struct space*)malloc(sizeof(struct space)); create_space(value_sp, fake_sp->next - fake_sp->start); - do_generation(fake_sp, value_sp, ti); + do_generation(fake_sp, value_sp, ti->fp); /* offset start by the header */ void* result_block = (void *)(value_sp->start +1); @@ -464,6 +532,40 @@ void* export(struct thread_info *ti) { free(fake_sp); free(value_sp); - return result_block; } + +/* mutable write barrier */ +void certicoq_modify(struct thread_info *ti, value *p_cell, value p_val) { + assert (ti->alloc < ti->limit); + *p_cell = p_val; + if (is_ptr(p_val)) { + *(value **)(--ti->limit) = p_cell; + } +} + +void print_heapsize(struct thread_info *ti) { + for (int i = 0; i < MAX_SPACES; i++) { + int allocated = (int)(ti->heap->spaces[i].next - ti->heap->spaces[i].start); + int remembered = (int)(ti->heap->spaces[i].rem_limit - ti->heap->spaces[i].limit); + if (!(allocated || remembered)) { + continue; + } + printf("generation %d:\n", i); + printf(" size: %d\n", (int)(ti->heap->spaces[i].rem_limit - ti->heap->spaces[i].start)); + printf(" allocated: %d\n", allocated); + printf(" remembered: %d\n", remembered); + } +} + +/* NOTE-POINTER-ARITH: In a few places, we do a pointer subtraction, such as + h->spaces[i].limit - h->spaces[i].start. + When p and q have type *foo, then this is much like ((int)p-(int)q)/sizeof(foo). + But note this is a SIGNED division, which makes it quite dangerous if ((int)p-(int)q) + can be larger than the maximum signed integer. So we have to be quite careful in + the program and the proof, especially when (on a 32-bit machines) our largest generation + might be similar in size to the entire address space. +*/ + + + diff --git a/plugin/runtime/gc_stack.h b/plugin/runtime/gc_stack.h index e25fdec9..d94870a4 100644 --- a/plugin/runtime/gc_stack.h +++ b/plugin/runtime/gc_stack.h @@ -1,3 +1,6 @@ +#ifndef CERTICOQ_GC_STACK_H +#define CERTICOQ_GC_STACK_H + #include "values.h" /* EXPLANATION OF THE CERTICOQ GENERATIONAL GARBAGE COLLECTOR. @@ -88,6 +91,9 @@ recent collection. To call the garbage collector, the mutator passes a fun_info and a thread_info, as follows. */ +#define No_scan_tag 251 +#define No_scan(t) ((t) >= No_scan_tag) + typedef const uintnat *fun_info; /* fi[0]: How many words the function might allocate fi[1]: How many slots of the args array contain live roots @@ -113,6 +119,7 @@ struct thread_info { value args[MAX_ARGS]; /* the args array */ struct stack_frame *fp; /* stack pointer */ uintnat nalloc; /* Remaining allocation until next GC call*/ + void *odata; }; struct thread_info *make_tinfo(void); @@ -162,4 +169,11 @@ value* extract_answer(struct thread_info *ti); can be treated uniformly by the caller of extract_answer(). */ -void* export(struct thread_info *ti); +void* export(struct thread_info *ti, value root); + +/* mutable write barrier */ +void certicoq_modify(struct thread_info *ti, value *p_cell, value p_val); + +void print_heapsize(struct thread_info *ti); + +#endif /* CERTICOQ_GC_STACK_H */ diff --git a/plugin/runtime/m.h b/plugin/runtime/m.h new file mode 100644 index 00000000..1c509a54 --- /dev/null +++ b/plugin/runtime/m.h @@ -0,0 +1,3 @@ +#define SIZEOF_PTR 8 +#define SIZEOF_LONG 8 +#define SIZEOF_INT 4 diff --git a/plugin/runtime/values.h b/plugin/runtime/values.h index ed396543..a5487d16 100644 --- a/plugin/runtime/values.h +++ b/plugin/runtime/values.h @@ -53,6 +53,7 @@ extern "C" { This is for use only by the GC. */ + /* CHANGE BY THE CERTICOQ/VERIFFI TEAM: We do this for CertiCoq's FFI system because for technical reasons related to @@ -68,7 +69,7 @@ attribute. On the other hand, standard OCaml wants the value to be intnat. #else typedef intnat value; #endif - +/* because of VST's restrictions. */ typedef uintnat header_t; typedef uintnat mlsize_t; typedef unsigned int tag_t; /* Actually, an unsigned char */ @@ -77,7 +78,12 @@ typedef uintnat mark_t; /* Longs vs blocks. */ #define Is_long(x) (((x) & 1) != 0) -#define Is_block(x) (((x) & 1) == 0) +/* CHANGE BY THE CERTICOQ/VERIFFI TEAM: */ +#ifdef VERIFFI + #define Is_block(x) (((int)(((intnat) x) & 1)) == 0) +#else + #define Is_block(x) (((x) & 1) == 0) +#endif /* Conversion macro names are always of the form "to_from". */ /* Example: Val_long as in "Val from long" or "Val of long". */ @@ -209,10 +215,4 @@ bits 63 (64-P) (63-P) 10 9 8 7 0 } #endif -/* Floating-point numbers. */ -#define Double_tag 253 -#define Double_wosize ((sizeof(double) / sizeof(value))) -#define Double_val(v) (* (double *)(v)) -#define Store_double_val(v,d) (* (double *)(v) = (d)) - #endif /* VALUES_H */ diff --git a/plugin/static/printClight.ml b/plugin/static/printClight.ml index 7a22dafd..e31edc4c 100755 --- a/plugin/static/printClight.ml +++ b/plugin/static/printClight.ml @@ -361,7 +361,7 @@ let print_dest_names_imports prog names (dest : string) (imports : string list) List.iter (fun n -> add_name (remove_primes n)) names; let fm = formatter_of_out_channel oc in open_vbox 0; - List.iter (fun s -> fprintf fm "#include \"%s\"" s; pp_print_newline fm ();) ("gc.h" :: imports); + List.iter (fun s -> fprintf fm "#include \"%s\"" s; pp_print_newline fm ();) imports; close_box (); open_box 0; print_program Clight2 fm prog; diff --git a/theories/Codegen/LambdaANF_to_Clight.v b/theories/Codegen/LambdaANF_to_Clight.v index 0e57c4d5..272298ed 100644 --- a/theories/Codegen/LambdaANF_to_Clight.v +++ b/theories/Codegen/LambdaANF_to_Clight.v @@ -1315,12 +1315,12 @@ Definition make_defs_fast end. Definition composites : list composite_definition := - Composite threadInfIdent Struct - (Member_plain allocIdent valPtr :: - Member_plain limitIdent valPtr :: - Member_plain heapInfIdent (tptr (Tstruct heapInfIdent noattr)) :: - Member_plain argsIdent (Tarray uval maxArgs noattr) :: - nil) noattr :: + (* Composite threadInfIdent Struct *) + (* (Member_plain allocIdent valPtr :: *) + (* Member_plain limitIdent valPtr :: *) + (* Member_plain heapInfIdent (tptr (Tstruct heapInfIdent noattr)) :: *) + (* Member_plain argsIdent (Tarray uval maxArgs noattr) :: *) + (* nil) noattr :: *) nil. Definition mk_prog_opt diff --git a/theories/Codegen/LambdaANF_to_Clight_stack.v b/theories/Codegen/LambdaANF_to_Clight_stack.v index 3cf0f646..e09b97a7 100644 --- a/theories/Codegen/LambdaANF_to_Clight_stack.v +++ b/theories/Codegen/LambdaANF_to_Clight_stack.v @@ -1224,21 +1224,21 @@ Definition to_plain_members (l : list (ident * type)) : list member := (* Types declared at the begining of the program *) Definition composites : list composite_definition := - Composite stackframeTIdent Struct - (to_plain_members ((nextFld, valPtr) :: - (rootFld, valPtr) :: - (prevFld, (tptr stackframeT)) :: nil)) - noattr :: - Composite threadInfIdent Struct - (to_plain_members ((allocIdent, valPtr) :: - (limitIdent, valPtr) :: - (heapInfIdent, (tptr (Tstruct heapInfIdent noattr))) :: - (argsIdent, (Tarray uval maxArgs noattr)) :: - (fpIdent, (tptr stackframeT)) :: - (nallocIdent, val) :: nil)) (* Zoe : This is the number of allocations until the next GC call so that GC can perform a test. - * Note that it will be coerced to UL from ULL. That should be safe for the values we're using but - * consider changing it too. *) - noattr :: + (* Composite stackframeTIdent Struct *) + (* (to_plain_members ((nextFld, valPtr) :: *) + (* (rootFld, valPtr) :: *) + (* (prevFld, (tptr stackframeT)) :: nil)) *) + (* noattr :: *) + (* Composite threadInfIdent Struct *) + (* (to_plain_members ((allocIdent, valPtr) :: *) + (* (limitIdent, valPtr) :: *) + (* (heapInfIdent, (tptr (Tstruct heapInfIdent noattr))) :: *) + (* (argsIdent, (Tarray uval maxArgs noattr)) :: *) + (* (fpIdent, (tptr stackframeT)) :: *) + (* (nallocIdent, val) :: nil)) (1* Zoe : This is the number of allocations until the next GC call so that GC can perform a test. *) + (* * Note that it will be coerced to UL from ULL. That should be safe for the values we're using but *) + (* * consider changing it too. *1) *) + (* noattr :: *) nil. Definition mk_prog_opt (defs: list (ident * globdef Clight.fundef type)) diff --git a/theories/Runtime/config.h b/theories/Runtime/config.h index f3eb8ce3..4dc00718 100644 --- a/theories/Runtime/config.h +++ b/theories/Runtime/config.h @@ -16,6 +16,13 @@ #ifndef CONFIG_H #define CONFIG_H +#include "m.h" + +#if SIZEOF_PTR > 0 +#else +#error "Oops! SIZEOF_PTR is not defined" +#endif + #if SIZEOF_PTR == SIZEOF_LONG /* Standard models: ILP32 or I32LP64 */ typedef long intnat; diff --git a/theories/Runtime/gc.c b/theories/Runtime/gc.c index e32a7dcc..6f628da4 100644 --- a/theories/Runtime/gc.c +++ b/theories/Runtime/gc.c @@ -5,6 +5,35 @@ #include "values.h" #include "gc.h" +/* The following 5 functions should (in practice) compile correctly in CompCert, + but the CompCert correctness specification does not _require_ that + they compile correctly: their semantics is "undefined behavior" in + CompCert C (and in C11), but in practice they will work in any compiler. */ + +int test_int_or_ptr (value x) /* returns 1 if int, 0 if aligned ptr */ { + return (int)(((intnat)x)&1); +} + +intnat int_or_ptr_to_int (value x) /* precondition: is int */ { + return (intnat)x; +} + +void * int_or_ptr_to_ptr (value x) /* precond: is aligned ptr */ { + return (void *)x; +} + +value int_to_int_or_ptr(intnat x) /* precondition: is odd */ { + return (value)x; +} + +value ptr_to_int_or_ptr(void *x) /* precondition: is aligned */ { + return (value)x; +} + +int is_ptr(value x) { + return test_int_or_ptr(x) == 0; +} + /* A "space" describes one generation of the generational collector. */ struct space { value *start, *next, *limit; @@ -59,7 +88,7 @@ int in_heap(struct heap *h, value v) { } void printtree(FILE *f, struct heap *h, value v) { - if(Is_block(v)) + if(is_ptr(v)) if (in_heap(h,v)) { header_t hd = Field(v,-1); int sz = Wosize_hd(hd); @@ -105,7 +134,7 @@ void abort_with(char *s) { #define Is_from(from_start, from_limit, v) \ (from_start <= (value*)(v) && (value*)(v) < from_limit) -/* Assuming v is a pointer (Is_block(v)), tests whether v points +/* Assuming v is a pointer (is_ptr(v)), tests whether v points somewhere into the "from-space" defined by from_start and from_limit */ void forward (value *from_start, /* beginning of from-space */ @@ -125,7 +154,7 @@ void forward (value *from_start, /* beginning of from-space */ */ { value v = *p; - if(Is_block(v)) { + if(is_ptr(v)) { if(Is_from(from_start, from_limit, v)) { @@ -411,7 +440,7 @@ uintnat const fake_fi[3] = {0, 1, 1}; void* export(struct thread_info *ti) { /* if args[1] is unboxed, return it */ - if(!Is_block(ti->args[1])){ + if(!is_ptr(ti->args[1])){ return ti->args[1]; } diff --git a/theories/Runtime/gc_stack.c b/theories/Runtime/gc_stack.c index 62d34563..7083c891 100644 --- a/theories/Runtime/gc_stack.c +++ b/theories/Runtime/gc_stack.c @@ -1,7 +1,9 @@ #include #include #include +#include "m.h" /* use printm.c to create m.h */ #include "config.h" +#include "values.h" #include "gc_stack.h" /* A version of GC that scans a stack in order to find the roots. It is useful @@ -34,7 +36,7 @@ value ptr_to_int_or_ptr(void *x) /* precondition: is aligned */ { return (value)x; } -int Is_block(value x) { +int is_ptr(value x) { return test_int_or_ptr(x) == 0; } @@ -75,11 +77,33 @@ struct space { */ #define NURSERY_SIZE (1< 50) printf("Moving value %p with tag %ld with %d fields\n", (void*)v, hd, sz); - for(i = -1; i < sz; i++) { + /* if (sz > 50) printf("Moving value %p with tag %ld with %d fields\n", (void*)v, hd, sz); */ + Hd_val(new) = hd; + for(i = 0; i < sz; i++) { /* printf("Moving field %d\n", i); */ Field(new, i) = Field(v, i); } Hd_val(v) = 0; - Field(v, 0) = (value)new; - *p = (value)new; + Field(v, 0) = ptr_to_int_or_ptr((void *)new); + *p = ptr_to_int_or_ptr((void *)new); /* printf("New %lld\n", new); */ /* if (*p == 73832) printf("Found it\n"); */ if (depth>0) @@ -228,19 +254,19 @@ void forward_remset (struct space *from, /* descriptor of from-space */ void forward_roots (value *from_start, /* beginning of from-space */ value *from_limit, /* end of from-space */ value **next, /* next available spot in to-space */ - struct thread_info *ti) /* where's the args array? */ + struct stack_frame *frames) /* data structure to find the roots */ /* Forward each live root in the stack */ { - struct stack_frame *frame = ti->fp; - value *curr, *limit; + struct stack_frame *frame = frames; + value *start; size_t i, limit; /* Scan the stack by traversing the stack pointers */ while (frame != NULL) { - curr = frame->root; - limit = frame->next; - for (curr=frame->root; currroot; + limit = frame->next - start; /* See NOTE-POINTER-ARITH below */ frame = frame->prev; + for (i=0; inext; - assert(from->next-from->start + from->rem_limit-from->limit <= to->limit-to->next); + /* assert(from->next-from->start + from->rem_limit-from->limit <= to->limit-to->next); */ forward_remset(from, to, &to->next); - forward_roots(from->start, from->limit, &to->next, ti); + forward_roots(from->start, from->limit, &to->next, fr); do_scan(from->start, from->limit, p, &to->next); #ifdef CERTICOQ_DEBUG_GC fprintf(stderr,"%5.3f%% occupancy\n", @@ -377,7 +403,7 @@ void resume(struct thread_info *ti) assert (h); lo = h->spaces[0].start; hi = h->spaces[0].limit; - if (hi-lo < num_allocs) + if (hi-lo < num_allocs) /* See NOTE-POINTER-ARITH below */ abort_with ("Nursery is too small for function's num_allocs\n"); ti->alloc = lo; ti->limit = hi; @@ -389,17 +415,10 @@ void garbage_collect(struct thread_info *ti) { struct heap *h = ti->heap; /* printf("In GC\n"); */ - if (h==NULL) { - /* If the heap has not yet been initialized, create it and resume */ - h = create_heap(); - ti->heap = h; - resume(ti); - return; - } else { - int i; - h->spaces[0].limit = ti->limit; - h->spaces[0].next = ti->alloc; /* this line is probably unnecessary */ - for (i=0; ispaces[0].limit = ti->limit; + h->spaces[0].next = ti->alloc; /* this line is probably unnecessary */ + for (i=0; ispaces[i+1].start==NULL) { - int w = h->spaces[i].rem_limit-h->spaces[i].start; + intnat w = h->spaces[i].rem_limit-h->spaces[i].start; /* See NOTE-POINTER-ARITH below */ create_space(h->spaces+(i+1), RATIO*w); } /* Copy all the objects in generation i, into generation i+1 */ #ifdef CERTICOQ_DEBUG_GC fprintf(stderr, "Generation %d: ", i); #endif - do_generation(h->spaces+i, h->spaces+(i+1), ti); + do_generation(h->spaces+i, h->spaces+(i+1), ti->fp); /* If there's enough space in gen i+1 to guarantee that the NEXT collection into i+1 will succeed, we can stop here. We need enough space in the (unlikely) scenario where * all the data in gen i is live ([i].limit-[i].start), and * all the remembered set in i is preserved ([i].rem_limit-[i].limit). */ - if (h->spaces[i].rem_limit - h->spaces[i].start + if (h->spaces[i].rem_limit - h->spaces[i].start /* See NOTE-POINTER-ARITH below */ <= h->spaces[i+1].limit - h->spaces[i+1].next) { resume(ti); return; } } - /* If we get to i==MAX_SPACES, that's bad news */ - assert (MAX_SPACES == i); - abort_with("Ran out of generations\n"); - } - /* Can't reach this point */ - assert(0); + /* If we get to i==MAX_SPACES, that's bad news */ + /* assert (MAX_SPACES == i); */ + abort_with("Ran out of generations\n"); } /* REMARK. The generation-management policy in the garbage_collect function @@ -475,7 +491,7 @@ int garbage_collect_all(struct thread_info *ti) { h->spaces[0].limit = ti->limit; h->spaces[0].next = ti->alloc; /* this line more necessary here than perhaps in garbage_collect() */ for (i=0; i < MAX_SPACES - 1 && h->spaces[i+1].start != NULL; i++) - do_generation(h->spaces+i, h->spaces+(i+1), ti); + do_generation(h->spaces+i, h->spaces+(i+1), ti->fp); return i; } @@ -494,7 +510,7 @@ void *export(struct thread_info *ti, value root) { ti->fp= &frame; /* if root is unboxed, return it */ - if(!Is_block(root)) + if(!is_ptr(root)) return (void *)root; /* otherwise collect all that is reachable from it to the last generation, then compact it into value_sp */ @@ -503,11 +519,11 @@ void *export(struct thread_info *ti, value root) { struct space* fake_sp = (struct space*)malloc(sizeof(struct space)); create_space(fake_sp, sp->next - sp->start); - do_generation(sp, fake_sp, ti); + do_generation(sp, fake_sp, ti->fp); struct space* value_sp = (struct space*)malloc(sizeof(struct space)); create_space(value_sp, fake_sp->next - fake_sp->start); - do_generation(fake_sp, value_sp, ti); + do_generation(fake_sp, value_sp, ti->fp); /* offset start by the header */ void* result_block = (void *)(value_sp->start +1); @@ -523,7 +539,7 @@ void *export(struct thread_info *ti, value root) { void certicoq_modify(struct thread_info *ti, value *p_cell, value p_val) { assert (ti->alloc < ti->limit); *p_cell = p_val; - if (Is_block(p_val)) { + if (is_ptr(p_val)) { *(value **)(--ti->limit) = p_cell; } } @@ -541,3 +557,15 @@ void print_heapsize(struct thread_info *ti) { printf(" remembered: %d\n", remembered); } } + +/* NOTE-POINTER-ARITH: In a few places, we do a pointer subtraction, such as + h->spaces[i].limit - h->spaces[i].start. + When p and q have type *foo, then this is much like ((int)p-(int)q)/sizeof(foo). + But note this is a SIGNED division, which makes it quite dangerous if ((int)p-(int)q) + can be larger than the maximum signed integer. So we have to be quite careful in + the program and the proof, especially when (on a 32-bit machines) our largest generation + might be similar in size to the entire address space. +*/ + + + diff --git a/theories/Runtime/gc_stack.h b/theories/Runtime/gc_stack.h index 9b937fd6..d94870a4 100644 --- a/theories/Runtime/gc_stack.h +++ b/theories/Runtime/gc_stack.h @@ -1,6 +1,8 @@ #ifndef CERTICOQ_GC_STACK_H #define CERTICOQ_GC_STACK_H +#include "values.h" + /* EXPLANATION OF THE CERTICOQ GENERATIONAL GARBAGE COLLECTOR. Andrew W. Appel, September 2016 @@ -89,37 +91,6 @@ recent collection. To call the garbage collector, the mutator passes a fun_info and a thread_info, as follows. */ -typedef void * value -#ifdef COMPCERT - __attribute((aligned(_Alignof(void *)))) -#endif - ; -typedef uintnat header_t; -typedef uintnat mlsize_t; -typedef unsigned int tag_t; /* Actually, an unsigned char */ -typedef uintnat color_t; -typedef uintnat mark_t; - -#define Hd_val(val) (((header_t *) (val)) [-1]) /* Also an l-value. */ -#define Field(x, i) (((value *)(x)) [i]) /* Also an l-value. */ -#define PROFINFO_SHIFT (64 - PROFINFO_WIDTH) -#define PROFINFO_MASK ((1ull << PROFINFO_WIDTH) - 1ull) - -#define Tag_hd(hd) ((tag_t) ((hd) & 0xFF)) -#ifdef WITH_SPACETIME -#define Hd_no_profinfo(hd) ((hd) & ~(PROFINFO_MASK << PROFINFO_SHIFT)) -#define Wosize_hd(hd) ((mlsize_t) ((Hd_no_profinfo(hd)) >> 10)) -#else -#define Wosize_hd(hd) ((mlsize_t) ((hd) >> 10)) -#endif /* SPACETIME */ -#ifdef ARCH_SIXTYFOUR -/* [Profinfo_hd] is used when the compiler is not configured for Spacetime - (e.g. when decoding profiles). */ -#define Profinfo_hd(hd) (((mlsize_t) ((hd) >> PROFINFO_SHIFT)) & PROFINFO_MASK) -#else -#define Profinfo_hd(hd) ((hd) & 0) -#endif /* ARCH_SIXTYFOUR */ - #define No_scan_tag 251 #define No_scan(t) ((t) >= No_scan_tag) diff --git a/theories/Runtime/m.h b/theories/Runtime/m.h new file mode 100644 index 00000000..1c509a54 --- /dev/null +++ b/theories/Runtime/m.h @@ -0,0 +1,3 @@ +#define SIZEOF_PTR 8 +#define SIZEOF_LONG 8 +#define SIZEOF_INT 4 diff --git a/theories/Runtime/values.h b/theories/Runtime/values.h index d800c912..a5487d16 100644 --- a/theories/Runtime/values.h +++ b/theories/Runtime/values.h @@ -53,6 +53,7 @@ extern "C" { This is for use only by the GC. */ + /* CHANGE BY THE CERTICOQ/VERIFFI TEAM: We do this for CertiCoq's FFI system because for technical reasons related to @@ -68,6 +69,7 @@ attribute. On the other hand, standard OCaml wants the value to be intnat. #else typedef intnat value; #endif +/* because of VST's restrictions. */ typedef uintnat header_t; typedef uintnat mlsize_t; typedef unsigned int tag_t; /* Actually, an unsigned char */ @@ -76,7 +78,12 @@ typedef uintnat mark_t; /* Longs vs blocks. */ #define Is_long(x) (((x) & 1) != 0) -#define Is_block(x) (((x) & 1) == 0) +/* CHANGE BY THE CERTICOQ/VERIFFI TEAM: */ +#ifdef VERIFFI + #define Is_block(x) (((int)(((intnat) x) & 1)) == 0) +#else + #define Is_block(x) (((x) & 1) == 0) +#endif /* Conversion macro names are always of the form "to_from". */ /* Example: Val_long as in "Val from long" or "Val of long". */ From 067cc01833ba974c7fec3140957fd3d77f4faf0c Mon Sep 17 00:00:00 2001 From: Joomy Korkut Date: Tue, 24 Oct 2023 08:37:23 -0400 Subject: [PATCH 4/5] Fix last errors regarding value type in codegen --- bootstrap/certicoqc/certicoqc_wrapper.c | 4 ---- bootstrap/certicoqc/values.h | 6 ++++++ bootstrap/certicoqchk/certicoqchk_wrapper.c | 4 ---- bootstrap/certicoqchk/values.h | 6 ++++++ cplugin/runtime/values.h | 6 ++++++ plugin/runtime/values.h | 6 ++++++ theories/Codegen/LambdaANF_to_Clight_stack.v | 15 ++------------- theories/Runtime/values.h | 6 ++++++ 8 files changed, 32 insertions(+), 21 deletions(-) diff --git a/bootstrap/certicoqc/certicoqc_wrapper.c b/bootstrap/certicoqc/certicoqc_wrapper.c index adbfa805..9c35e3b4 100644 --- a/bootstrap/certicoqc/certicoqc_wrapper.c +++ b/bootstrap/certicoqc/certicoqc_wrapper.c @@ -10,10 +10,6 @@ extern value body(struct thread_info *); extern value *call(struct thread_info *, value, value); -_Bool is_ptr(value s) { - return (_Bool) Is_block(s); -} - // external certicoq_pipeline : (coq_Options × ExtractedASTBaseQuoter.quoted_program) -> coq_Cprogram error * Bytestring.String.t = "certicoq_pipeline" CAMLprim value certicoq_pipeline_wrapper(value opts, value prog) { CAMLparam2 (opts, prog); diff --git a/bootstrap/certicoqc/values.h b/bootstrap/certicoqc/values.h index a5487d16..aeefec1f 100644 --- a/bootstrap/certicoqc/values.h +++ b/bootstrap/certicoqc/values.h @@ -215,4 +215,10 @@ bits 63 (64-P) (63-P) 10 9 8 7 0 } #endif +/* Floating-point numbers. */ +#define Double_tag 253 +#define Double_wosize ((sizeof(double) / sizeof(value))) +#define Double_val(v) (* (double *)(v)) +#define Store_double_val(v,d) (* (double *)(v) = (d)) + #endif /* VALUES_H */ diff --git a/bootstrap/certicoqchk/certicoqchk_wrapper.c b/bootstrap/certicoqchk/certicoqchk_wrapper.c index a558362c..2bb9ab81 100644 --- a/bootstrap/certicoqchk/certicoqchk_wrapper.c +++ b/bootstrap/certicoqchk/certicoqchk_wrapper.c @@ -10,10 +10,6 @@ extern value body(struct thread_info *); extern value *call(struct thread_info *, value, value); -_Bool is_ptr(value s) { - return (_Bool) Is_block(s); -} - // external : (coq_Options × ExtractedASTBaseQuoter.quoted_program) -> bool = "certicoqchk_wrapper" CAMLprim value certicoqchk_wrapper(value prog) { CAMLparam1 (prog); diff --git a/bootstrap/certicoqchk/values.h b/bootstrap/certicoqchk/values.h index a5487d16..aeefec1f 100644 --- a/bootstrap/certicoqchk/values.h +++ b/bootstrap/certicoqchk/values.h @@ -215,4 +215,10 @@ bits 63 (64-P) (63-P) 10 9 8 7 0 } #endif +/* Floating-point numbers. */ +#define Double_tag 253 +#define Double_wosize ((sizeof(double) / sizeof(value))) +#define Double_val(v) (* (double *)(v)) +#define Store_double_val(v,d) (* (double *)(v) = (d)) + #endif /* VALUES_H */ diff --git a/cplugin/runtime/values.h b/cplugin/runtime/values.h index a5487d16..aeefec1f 100644 --- a/cplugin/runtime/values.h +++ b/cplugin/runtime/values.h @@ -215,4 +215,10 @@ bits 63 (64-P) (63-P) 10 9 8 7 0 } #endif +/* Floating-point numbers. */ +#define Double_tag 253 +#define Double_wosize ((sizeof(double) / sizeof(value))) +#define Double_val(v) (* (double *)(v)) +#define Store_double_val(v,d) (* (double *)(v) = (d)) + #endif /* VALUES_H */ diff --git a/plugin/runtime/values.h b/plugin/runtime/values.h index a5487d16..aeefec1f 100644 --- a/plugin/runtime/values.h +++ b/plugin/runtime/values.h @@ -215,4 +215,10 @@ bits 63 (64-P) (63-P) 10 9 8 7 0 } #endif +/* Floating-point numbers. */ +#define Double_tag 253 +#define Double_wosize ((sizeof(double) / sizeof(value))) +#define Double_val(v) (* (double *)(v)) +#define Store_double_val(v,d) (* (double *)(v) = (d)) + #endif /* VALUES_H */ diff --git a/theories/Codegen/LambdaANF_to_Clight_stack.v b/theories/Codegen/LambdaANF_to_Clight_stack.v index e09b97a7..616feabd 100644 --- a/theories/Codegen/LambdaANF_to_Clight_stack.v +++ b/theories/Codegen/LambdaANF_to_Clight_stack.v @@ -1381,7 +1381,6 @@ Section Check. (* Just for debugging purposes. TODO eventually delete*) End Check. Definition make_tinfoIdent := 20%positive. -Definition exportIdent := 21%positive. Definition make_tinfo_rec : positive * globdef Clight.fundef type := (make_tinfoIdent, @@ -1391,14 +1390,6 @@ Definition make_tinfo_rec : positive * globdef Clight.fundef type := threadInf cc_default)). -Definition export_rec : positive * globdef Clight.fundef type := - (exportIdent, - Gfun (External (EF_external "export" - (mksignature (cons val_typ nil) (Tret val_typ) cc_default)) - (Tcons threadInf Tnil) - valPtr - cc_default)). - Definition compile (args_opt : bool) (e : exp) (cenv : ctor_env) (nenv0 : name_env) : error (name_env * Clight.program * Clight.program) * string := let e := wrap_in_fun e in @@ -1418,10 +1409,8 @@ Definition compile (args_opt : bool) (e : exp) (cenv : ctor_env) (nenv0 : name_e let nenv := (add_inf_vars (ensure_unique nenv1)) in let forward_defs := make_extern_decls nenv defs false in body <- mk_prog_opt [body_external_decl] mainIdent false;; - head <- mk_prog_opt (make_tinfo_rec :: export_rec :: forward_defs ++ defs)%list mainIdent true ;; - ret (M.set make_tinfoIdent (nNamed "make_tinfo"%bs) - (M.set exportIdent (nNamed "export"%bs) nenv), - body, head) + head <- mk_prog_opt (make_tinfo_rec :: forward_defs ++ defs)%list mainIdent true ;; + ret (M.set make_tinfoIdent (nNamed "make_tinfo"%bs) nenv, body, head) in (err, ""). diff --git a/theories/Runtime/values.h b/theories/Runtime/values.h index a5487d16..aeefec1f 100644 --- a/theories/Runtime/values.h +++ b/theories/Runtime/values.h @@ -215,4 +215,10 @@ bits 63 (64-P) (63-P) 10 9 8 7 0 } #endif +/* Floating-point numbers. */ +#define Double_tag 253 +#define Double_wosize ((sizeof(double) / sizeof(value))) +#define Double_val(v) (* (double *)(v)) +#define Store_double_val(v,d) (* (double *)(v) = (d)) + #endif /* VALUES_H */ From edf67d53bbd89ad608cabda20dc9d2c115fe6bd4 Mon Sep 17 00:00:00 2001 From: Joomy Korkut Date: Tue, 24 Oct 2023 20:42:45 -0400 Subject: [PATCH 5/5] Fix the benchmarks to work with the codegen changes --- benchmarks/.gitignore | 1 + benchmarks/basics.c | 27 +++++---------------------- benchmarks/basics.h | 19 +------------------ benchmarks/binom_main.c | 9 ++------- benchmarks/color_main.c | 11 +++-------- benchmarks/demo1_main.c | 11 +++-------- benchmarks/demo2_main.c | 6 +----- benchmarks/demo3_main.c | 16 ++++++---------- benchmarks/list_sum_main.c | 9 +-------- benchmarks/sha_fast_main.c | 7 +------ benchmarks/test_primitive_main.c | 7 +------ benchmarks/vs_easy_main.c | 31 ++++++++++--------------------- benchmarks/vs_hard_main.c | 16 ++++------------ 13 files changed, 39 insertions(+), 131 deletions(-) diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore index 331de874..14ab164e 100644 --- a/benchmarks/.gitignore +++ b/benchmarks/.gitignore @@ -8,6 +8,7 @@ Top.* glue.Top.* ffi.Top.* +m.h gc.c gc.h gc_stack.c diff --git a/benchmarks/basics.c b/benchmarks/basics.c index c357a091..dfac64af 100644 --- a/benchmarks/basics.c +++ b/benchmarks/basics.c @@ -1,27 +1,10 @@ -#include "values.h" +#include "gc_stack.h" struct closure; -struct stack_frame; -struct thread_info; struct closure { value (*func)(struct thread_info, value, value); value env; }; -struct stack_frame { - value *next; - value *root; - struct stack_frame *prev; -}; - -struct thread_info { - value *alloc; - value *limit; - struct heap *heap; - value args[1024]; - struct stack_frame *fp; - unsigned long long nalloc; -}; - extern int printf(signed char *); extern _Bool is_ptr(value); unsigned int get_unboxed_ordinal(value); @@ -58,12 +41,12 @@ signed char const prop_lit[7] = { 60, 112, 114, 111, 112, 62, 0, }; unsigned int get_unboxed_ordinal(value $v) { - return (unsigned long long) $v >> 1LL; + return (value) $v >> 1LL; } unsigned int get_boxed_ordinal(value $v) { - return *((unsigned long long *) $v + -1LL) & 255LL; + return *((value *) $v + -1LL) & 255LL; } value *get_args(value $v) @@ -240,8 +223,8 @@ void print_Coq_Init_Datatypes_bool(value $v) value call(struct thread_info *$tinfo, value $clo, value $arg) { - register unsigned long long *$f; - register unsigned long long *$envi; + register value *$f; + register value *$envi; register value $tmp; $f = (*((struct closure *) $clo)).func; $envi = (*((struct closure *) $clo)).env; diff --git a/benchmarks/basics.h b/benchmarks/basics.h index 5dc242e0..847ae630 100644 --- a/benchmarks/basics.h +++ b/benchmarks/basics.h @@ -1,27 +1,10 @@ -#include "values.h" +#include "gc_stack.h" struct closure; -struct stack_frame; -struct thread_info; struct closure { value (*func)(struct thread_info, value, value); value env; }; -struct stack_frame { - value *next; - value *root; - struct stack_frame *prev; -}; - -struct thread_info { - value *alloc; - value *limit; - struct heap *heap; - value args[1024]; - struct stack_frame *fp; - unsigned long long nalloc; -}; - extern unsigned int get_unboxed_ordinal(value); extern unsigned int get_boxed_ordinal(value); extern value *get_args(value); diff --git a/benchmarks/binom_main.c b/benchmarks/binom_main.c index b522e73c..8a5065d3 100644 --- a/benchmarks/binom_main.c +++ b/benchmarks/binom_main.c @@ -1,16 +1,11 @@ #include #include -#include "gc.h" +#include "gc_stack.h" #include - extern value body(struct thread_info *); -extern void print_Coq_Init_Datatypes_nat(unsigned long long); - -_Bool is_ptr(value s) { - return (_Bool) Is_block(s); -} +extern void print_Coq_Init_Datatypes_nat(value); int main(int argc, char *argv[]) { value val; diff --git a/benchmarks/color_main.c b/benchmarks/color_main.c index 95406b07..0a095171 100644 --- a/benchmarks/color_main.c +++ b/benchmarks/color_main.c @@ -1,18 +1,13 @@ #include #include -#include "gc.h" +#include "gc_stack.h" #include - extern value body(struct thread_info *); -extern void print_Coq_Numbers_BinNums_Z(unsigned long long); - -extern void print_Coq_Init_Datatypes_prod(unsigned long long, void (*)(unsigned long long), void (*)(unsigned long long)); +extern void print_Coq_Numbers_BinNums_Z(value); -_Bool is_ptr(value s) { - return (_Bool) Is_block(s); -} +extern void print_Coq_Init_Datatypes_prod(value, void (*)(value), void (*)(value)); int main(int argc, char *argv[]) { value val; diff --git a/benchmarks/demo1_main.c b/benchmarks/demo1_main.c index 4ebbac7b..58872533 100644 --- a/benchmarks/demo1_main.c +++ b/benchmarks/demo1_main.c @@ -1,18 +1,13 @@ #include #include -#include "gc.h" +#include "gc_stack.h" #include - extern value body(struct thread_info *); -extern void print_Coq_Init_Datatypes_list(unsigned long long, void (*)(unsigned long long)); - -extern void print_Coq_Init_Datatypes_bool(unsigned long long); +extern void print_Coq_Init_Datatypes_list(value, void (*)(value)); -_Bool is_ptr(value s) { - return (_Bool) Is_block(s); -} +extern void print_Coq_Init_Datatypes_bool(value); int main(int argc, char *argv[]) { value val; diff --git a/benchmarks/demo2_main.c b/benchmarks/demo2_main.c index 4ebbac7b..1fa363e8 100644 --- a/benchmarks/demo2_main.c +++ b/benchmarks/demo2_main.c @@ -1,6 +1,6 @@ #include #include -#include "gc.h" +#include "gc_stack.h" #include @@ -10,10 +10,6 @@ extern void print_Coq_Init_Datatypes_list(unsigned long long, void (*)(unsigned extern void print_Coq_Init_Datatypes_bool(unsigned long long); -_Bool is_ptr(value s) { - return (_Bool) Is_block(s); -} - int main(int argc, char *argv[]) { value val; struct thread_info* tinfo; diff --git a/benchmarks/demo3_main.c b/benchmarks/demo3_main.c index 753f14a4..c9dbda66 100644 --- a/benchmarks/demo3_main.c +++ b/benchmarks/demo3_main.c @@ -1,23 +1,19 @@ #include #include #include -#include "gc.h" +#include "gc_stack.h" #include /* TODO: This should be imported with include. */ extern value body(struct thread_info *); extern struct thread_info; -extern void print_Coq_Init_Datatypes_list(unsigned long long, void (*)(unsigned long long)); -extern void print_Coq_Init_Datatypes_bool(unsigned long long); -extern value get_Coq_Init_Datatypes_pair_args (unsigned long long); +extern void print_Coq_Init_Datatypes_list(value, void (*)(value)); +extern void print_Coq_Init_Datatypes_bool(value); +extern value get_Coq_Init_Datatypes_pair_args (value); extern value make_Coq_Init_Datatypes_bool_true (void); extern value make_Coq_Init_Datatypes_bool_false (void); -extern void* call(struct thread_info *tinfo, unsigned long long clos, unsigned long long arg0); - -_Bool is_ptr(value s) { - return (_Bool) Is_block(s); -} +extern value call(struct thread_info *tinfo, value clos, value arg0); value calls(struct thread_info* tinfo, value clos, unsigned int n, ...) { @@ -26,7 +22,7 @@ value calls(struct thread_info* tinfo, value clos, unsigned int n, ...) va_start(args, n); for(; n != 0; n--) { - v = va_arg(args, unsigned long long); + v = va_arg(args, value); clos = call(tinfo, clos, v); } va_end(args); diff --git a/benchmarks/list_sum_main.c b/benchmarks/list_sum_main.c index 8003526f..aa1adbdb 100644 --- a/benchmarks/list_sum_main.c +++ b/benchmarks/list_sum_main.c @@ -3,16 +3,9 @@ #include "gc.h" #include - extern value body(struct thread_info *); -extern void print_Coq_Init_Datatypes_nat(unsigned long long); - -extern value args[]; - -_Bool is_ptr(value s) { - return (_Bool) Is_block(s); -} +extern void print_Coq_Init_Datatypes_nat(value); int main(int argc, char *argv[]) { value val; diff --git a/benchmarks/sha_fast_main.c b/benchmarks/sha_fast_main.c index 0339246c..ebb769c6 100644 --- a/benchmarks/sha_fast_main.c +++ b/benchmarks/sha_fast_main.c @@ -1,15 +1,10 @@ #include #include -#include "gc.h" +#include "gc_stack.h" #include - extern value body(struct thread_info *); -_Bool is_ptr(value s) { - return (_Bool) Is_block(s); -} - int main(int argc, char *argv[]) { value val; struct thread_info* tinfo; diff --git a/benchmarks/test_primitive_main.c b/benchmarks/test_primitive_main.c index 1368d7c8..795977bd 100644 --- a/benchmarks/test_primitive_main.c +++ b/benchmarks/test_primitive_main.c @@ -1,15 +1,10 @@ #include #include -#include "gc.h" +#include "gc_stack.h" #include - extern value body(struct thread_info *); -_Bool is_ptr(value s) { - return (_Bool) Is_block(s); -} - int main(int argc, char *argv[]) { value val; struct thread_info* tinfo; diff --git a/benchmarks/vs_easy_main.c b/benchmarks/vs_easy_main.c index 32caff09..c70730c4 100644 --- a/benchmarks/vs_easy_main.c +++ b/benchmarks/vs_easy_main.c @@ -1,50 +1,39 @@ #include #include -#include "gc.h" +#include "gc_stack.h" #include - extern value body(struct thread_info *); -extern void print_Coq_Init_Datatypes_bool(unsigned long long); - -extern void print_CertiCoq_Benchmarks_lib_vs_space_atom(unsigned long long); +extern void print_Coq_Init_Datatypes_bool(value); -extern unsigned int get_Coq_Init_Datatypes_list_tag(unsigned long long); +extern void print_CertiCoq_Benchmarks_lib_vs_space_atom(value); -_Bool is_ptr(value s) { - return (_Bool) Is_block(s); -} +extern unsigned int get_Coq_Init_Datatypes_list_tag(value); -extern void print_Coq_Init_Datatypes_list(unsigned long long, void (*)(unsigned long long)); +extern void print_Coq_Init_Datatypes_list(value, void (*)(value)); -extern void print_CertiCoq_Benchmarks_lib_vs_clause(unsigned long long); +extern void print_CertiCoq_Benchmarks_lib_vs_clause(value); -void print_elem(unsigned long long v) -{ +void print_elem(value v) { printf("."); } -void print_list(unsigned long long l) -{ +void print_list(value l) { print_Coq_Init_Datatypes_list(l, print_elem); printf("\n"); } -void print_list_space_atom(unsigned long long l) -{ +void print_list_space_atom(value l) { print_Coq_Init_Datatypes_list(l, print_CertiCoq_Benchmarks_lib_vs_space_atom); printf("\n"); } -void print_list_clause(unsigned long long l) -{ +void print_list_clause(value l) { print_Coq_Init_Datatypes_list(l, print_CertiCoq_Benchmarks_lib_vs_clause); printf("\n"); } - - int main(int argc, char *argv[]) { value val; struct thread_info* tinfo; diff --git a/benchmarks/vs_hard_main.c b/benchmarks/vs_hard_main.c index 55b6855c..edc3635b 100644 --- a/benchmarks/vs_hard_main.c +++ b/benchmarks/vs_hard_main.c @@ -3,29 +3,21 @@ #include "gc.h" #include - extern value body(struct thread_info *); -extern void print_Coq_Init_Datatypes_bool(unsigned long long); - -_Bool is_ptr(value s) { - return (_Bool) Is_block(s); -} +extern void print_Coq_Init_Datatypes_bool(value); -extern void print_Coq_Init_Datatypes_list(unsigned long long, void (*)(unsigned long long)); +extern void print_Coq_Init_Datatypes_list(value, void (*)(value)); -void print_elem(unsigned long long v) -{ +void print_elem(value v) { printf("."); } -void print_list(unsigned long long l) -{ +void print_list(value l) { print_Coq_Init_Datatypes_list(l, print_elem); printf("\n"); } - int main(int argc, char *argv[]) { value val; struct thread_info* tinfo;