diff --git a/docs/tree.md b/docs/tree.md index b9b2fcdc0..5c26982b7 100644 --- a/docs/tree.md +++ b/docs/tree.md @@ -147,7 +147,7 @@ contributed originally by Dave Barrett. --- +----------------------+ ^ | | | | | * if hdr is free, hb_sz is the size - HB_MARKS_SZ | char/word hb_marks[] | of a heap chunk (struct hblk) of at + HB_MARKS_SZ | char/AO_t hb_marks[] | of a heap chunk (struct hblk) of at | | | least MININCR*HBLKSIZE bytes (below); v | | otherwise, size of each object in chunk. --- +----------------------+ diff --git a/include/private/gc_pmark.h b/include/private/gc_pmark.h index 6448b35b4..735a582fe 100644 --- a/include/private/gc_pmark.h +++ b/include/private/gc_pmark.h @@ -140,8 +140,8 @@ GC_INLINE mse * GC_push_obj(ptr_t obj, const hdr * hhdr, mse * mark_stack_top, /* twice. But that is only a performance issue. */ # define SET_MARK_BIT_EXIT_IF_SET(hhdr, bit_no) \ { /* cannot use do-while(0) here */ \ - volatile unsigned char * mark_byte_addr = \ - (unsigned char *)(hhdr)->hb_marks + (bit_no); \ + volatile unsigned char *mark_byte_addr \ + = (unsigned char *)((hhdr) -> hb_marks) + (bit_no); \ /* Unordered atomic load and store are sufficient here. */ \ if (AO_char_load(mark_byte_addr) != 0) \ break; /* go to the enclosing loop end */ \ @@ -150,52 +150,44 @@ GC_INLINE mse * GC_push_obj(ptr_t obj, const hdr * hhdr, mse * mark_stack_top, # else # define SET_MARK_BIT_EXIT_IF_SET(hhdr, bit_no) \ { /* cannot use do-while(0) here */ \ - char * mark_byte_addr = (char *)(hhdr)->hb_marks + (bit_no); \ + ptr_t mark_byte_addr = (ptr_t)((hhdr) -> hb_marks) + (bit_no); \ + \ if (*mark_byte_addr != 0) break; /* go to the enclosing loop end */ \ *mark_byte_addr = 1; \ } # endif /* !PARALLEL_MARK */ #else -# ifdef PARALLEL_MARK +# if defined(PARALLEL_MARK) || (defined(THREAD_SANITIZER) && defined(THREADS)) +# ifdef THREAD_SANITIZER +# define MARK_WORD_READ(addr) AO_load(addr) +# else +# define MARK_WORD_READ(addr) (*(addr)) +# endif /* This is used only if we explicitly set USE_MARK_BITS. */ /* The following may fail to exit even if the bit was already set. */ /* For our uses, that's benign: */ -# ifdef THREAD_SANITIZER -# define OR_WORD_EXIT_IF_SET(addr, bits) \ - { /* cannot use do-while(0) here */ \ - if (!((word)AO_load((volatile AO_t *)(addr)) & (bits))) { \ - /* Atomic load is just to avoid TSan false positive. */ \ - AO_or((volatile AO_t *)(addr), (AO_t)(bits)); \ - } else { \ - break; /* go to the enclosing loop end */ \ - } \ - } -# else -# define OR_WORD_EXIT_IF_SET(addr, bits) \ +# define SET_MARK_BIT_EXIT_IF_SET(hhdr, bit_no) \ { /* cannot use do-while(0) here */ \ - if (!(*(addr) & (bits))) { \ - AO_or((volatile AO_t *)(addr), (AO_t)(bits)); \ - } else { \ + volatile AO_t *mark_word_addr \ + = (hhdr) -> hb_marks + divWORDSZ(bit_no); \ + word my_bits = (word)1 << modWORDSZ(bit_no); \ + \ + if ((MARK_WORD_READ(mark_word_addr) & my_bits) != 0) \ break; /* go to the enclosing loop end */ \ - } \ + AO_or(mark_word_addr, my_bits); \ } -# endif /* !THREAD_SANITIZER */ # else -# define OR_WORD_EXIT_IF_SET(addr, bits) \ +# define SET_MARK_BIT_EXIT_IF_SET(hhdr, bit_no) \ { /* cannot use do-while(0) here */ \ - word old = *(addr); \ - word my_bits = (bits); \ - if ((old & my_bits) != 0) \ - break; /* go to the enclosing loop end */ \ - *(addr) = old | my_bits; \ + word *mark_word_addr = (hhdr) -> hb_marks + divWORDSZ(bit_no); \ + word old = *mark_word_addr; \ + word my_bits = (word)1 << modWORDSZ(bit_no); \ + \ + if ((old & my_bits) != 0) \ + break; /* go to the enclosing loop end */ \ + *(mark_word_addr) = old | my_bits; \ } # endif /* !PARALLEL_MARK */ -# define SET_MARK_BIT_EXIT_IF_SET(hhdr, bit_no) \ - { /* cannot use do-while(0) here */ \ - word * mark_word_addr = (hhdr)->hb_marks + divWORDSZ(bit_no); \ - OR_WORD_EXIT_IF_SET(mark_word_addr, \ - (word)1 << modWORDSZ(bit_no)); /* contains "break" */ \ - } #endif /* !USE_MARK_BYTES */ #ifdef ENABLE_TRACE diff --git a/include/private/gc_priv.h b/include/private/gc_priv.h index 8501e9a0d..468b60da9 100644 --- a/include/private/gc_priv.h +++ b/include/private/gc_priv.h @@ -1056,8 +1056,7 @@ typedef word page_hash_table[PHT_SIZE]; /* async_set_pht_entry_from_index (invoked by GC_dirty or the write */ /* fault handler). */ # define set_pht_entry_from_index_concurrent(bl, index) \ - AO_or((volatile AO_t *)&(bl)[divWORDSZ(index)], \ - (AO_t)((word)1 << modWORDSZ(index))) + AO_or((bl) + divWORDSZ(index), (word)1 << modWORDSZ(index)) # ifdef MPROTECT_VDB # define set_pht_entry_from_index_concurrent_volatile(bl, index) \ set_pht_entry_from_index_concurrent(bl, index) @@ -1088,15 +1087,6 @@ typedef word page_hash_table[PHT_SIZE]; /* initial group of mark bits, and it is safe to */ /* allocate smaller header for large objects. */ -union word_ptr_ao_u { - word w; - signed_word sw; - void *vp; -# ifdef PARALLEL_MARK - volatile AO_t ao; -# endif -}; - /* We maintain layout maps for heap blocks containing objects of a given */ /* size. Each entry in this map describes a byte offset and has the */ /* following type. */ @@ -1146,13 +1136,18 @@ struct hblkhdr { /* LARGE_INV_SZ. */ # define LARGE_INV_SZ ((unsigned32)1 << 16) # endif - size_t hb_sz; /* If in use, size in bytes, of objects in the block. */ - /* if free, the size in bytes of the whole block */ - /* We assume that this is convertible to signed_word */ - /* without generating a negative result. We avoid */ - /* generating free blocks larger than that. */ - word hb_descr; /* object descriptor for marking. See */ - /* gc_mark.h. */ +# ifdef AO_HAVE_load + volatile AO_t hb_sz; + volatile AO_t hb_descr; +# else + size_t hb_sz; + /* If in use, size in bytes, of objects in the block. */ + /* Otherwise, the size of the whole free block. */ + /* We assume that this is convertible to signed_word */ + /* without generating a negative result. We avoid */ + /* generating free blocks larger than that. */ + word hb_descr; /* Object descriptor for marking. */ +# endif /* See gc_mark.h. */ # ifndef MARK_BIT_PER_OBJ unsigned short * hb_map; /* Essentially a table of remainders */ /* mod BYTES_TO_GRANULES(hb_sz), except */ @@ -1200,7 +1195,12 @@ struct hblkhdr { # define hb_marks _mark_byte_union._hb_marks # else # define HB_MARKS_SZ (MARK_BITS_PER_HBLK / CPP_WORDSZ + 1) - word hb_marks[HB_MARKS_SZ]; +# if defined(PARALLEL_MARK) \ + || (defined(THREAD_SANITIZER) && defined(THREADS)) + volatile AO_t hb_marks[HB_MARKS_SZ]; +# else + word hb_marks[HB_MARKS_SZ]; +# endif # endif /* !USE_MARK_BYTES */ }; @@ -1299,9 +1299,11 @@ struct roots { typedef struct GC_ms_entry { ptr_t mse_start; /* Beginning of object, pointer-aligned one. */ - union word_ptr_ao_u mse_descr; - /* Descriptor; low order two bits are tags, */ - /* as described in gc_mark.h. */ +# ifdef PARALLEL_MARK + volatile AO_t mse_descr; +# else + word mse_descr; /* Descriptor; low order two bits are tags, */ +# endif /* as described in gc_mark.h. */ } mse; typedef int mark_state_t; /* Current state of marking. */ @@ -1878,24 +1880,21 @@ struct GC_traced_stack_sect_s { struct GC_traced_stack_sect_s *traced_stack_sect); #endif /* IA64 */ -/* Marks are in a reserved area in */ -/* each heap block. Each word has one mark bit associated */ -/* with it. Only those corresponding to the beginning of an */ -/* object are used. */ +/* Marks are in a reserved area in each heap block. Each object or */ +/* granule has one mark bit associated with it. Only those */ +/* corresponding to the beginning of an object are used. */ -/* Mark bit operations */ +/* Mark bit operations. */ -/* - * Retrieve, set, clear the n-th mark bit in a given heap block. - * - * (Recall that bit n corresponds to nth object or allocation granule - * relative to the beginning of the block, including unused words) - */ +/* Retrieve, set, clear the n-th mark bit in a given heap block. */ +/* (Recall that bit n corresponds to n-th object or allocation */ +/* granule relative to the beginning of the block, including unused */ +/* space.) */ #ifdef USE_MARK_BYTES # define mark_bit_from_hdr(hhdr,n) ((hhdr) -> hb_marks[n]) -# define set_mark_bit_from_hdr(hhdr,n) ((hhdr)->hb_marks[n] = 1) -# define clear_mark_bit_from_hdr(hhdr,n) ((hhdr)->hb_marks[n] = 0) +# define set_mark_bit_from_hdr(hhdr,n) (void)((hhdr) -> hb_marks[n] = 1) +# define clear_mark_bit_from_hdr(hhdr,n) (void)((hhdr) -> hb_marks[n] = 0) #else /* Set mark bit correctly, even if mark bits may be concurrently */ /* accessed. */ @@ -1904,7 +1903,7 @@ struct GC_traced_stack_sect_s { /* mark_bit_from_hdr and set_mark_bit_from_hdr when n is different */ /* (alternatively, USE_MARK_BYTES could be used). If TSan is off, */ /* AO_or() is used only if we set USE_MARK_BITS explicitly. */ -# define OR_WORD(addr, bits) AO_or((volatile AO_t *)(addr), (AO_t)(bits)) +# define OR_WORD(addr, bits) AO_or(addr, bits) # else # define OR_WORD(addr, bits) (void)(*(addr) |= (bits)) # endif @@ -1913,7 +1912,8 @@ struct GC_traced_stack_sect_s { # define set_mark_bit_from_hdr(hhdr,n) \ OR_WORD((hhdr) -> hb_marks + divWORDSZ(n), (word)1 << modWORDSZ(n)) # define clear_mark_bit_from_hdr(hhdr,n) \ - ((hhdr)->hb_marks[divWORDSZ(n)] &= ~((word)1 << modWORDSZ(n))) + (void)((hhdr) -> hb_marks[divWORDSZ(n)] \ + &= ~((word)1 << modWORDSZ(n))) #endif /* !USE_MARK_BYTES */ #ifdef MARK_BIT_PER_OBJ diff --git a/mallocx.c b/mallocx.c index f31e7d12b..71483859f 100644 --- a/mallocx.c +++ b/mallocx.c @@ -130,9 +130,8 @@ GC_API void * GC_CALL GC_realloc(void * p, size_t lb) /* But that is probably more expensive, since we may end up */ /* scanning a bunch of zeros during GC.) */ # ifdef AO_HAVE_store - GC_STATIC_ASSERT(sizeof(hhdr->hb_sz) == sizeof(AO_t)); - AO_store((volatile AO_t *)&hhdr->hb_sz, (AO_t)sz); - AO_store((volatile AO_t *)&hhdr->hb_descr, (AO_t)descr); + AO_store(&(hhdr -> hb_sz), sz); + AO_store(&(hhdr -> hb_descr), descr); # else { LOCK(); diff --git a/mark.c b/mark.c index 60b547896..2e3eb000a 100644 --- a/mark.c +++ b/mark.c @@ -183,13 +183,14 @@ GC_INNER void GC_clear_hdr_marks(hdr *hhdr) # ifdef AO_HAVE_load /* Atomic access is used to avoid racing with GC_realloc. */ - last_bit = FINAL_MARK_BIT(AO_load((volatile AO_t *)&hhdr->hb_sz)); + last_bit = FINAL_MARK_BIT(AO_load(&(hhdr -> hb_sz))); # else /* No race as GC_realloc holds the allocator lock while updating hb_sz. */ last_bit = FINAL_MARK_BIT(hhdr -> hb_sz); # endif - BZERO(hhdr -> hb_marks, sizeof(hhdr->hb_marks)); + BZERO(CAST_AWAY_VOLATILE_PVOID(hhdr -> hb_marks), + sizeof(hhdr -> hb_marks)); set_mark_bit_from_hdr(hhdr, last_bit); hhdr -> hb_n_marks = 0; } @@ -675,7 +676,7 @@ GC_INNER mse * GC_mark_from(mse *mark_stack_top, mse *mark_stack, # endif { current_p = mark_stack_top -> mse_start; - descr = mark_stack_top -> mse_descr.w; + descr = mark_stack_top -> mse_descr; retry: /* current_p and descr describe the current object. */ /* (*mark_stack_top) is vacant. */ @@ -705,7 +706,7 @@ GC_INNER mse * GC_mark_from(mse *mark_stack_top, mse *mark_stack, word new_size = (descr >> 1) & ~(word)(sizeof(ptr_t)-1); mark_stack_top -> mse_start = current_p; - mark_stack_top -> mse_descr.w + mark_stack_top -> mse_descr = (new_size + sizeof(ptr_t)) | GC_DS_LENGTH; /* This makes sure we handle */ /* misaligned pointers. */ @@ -726,7 +727,7 @@ GC_INNER mse * GC_mark_from(mse *mark_stack_top, mse *mark_stack, # endif /* PARALLEL_MARK */ limit = current_p + PTRS_TO_BYTES(SPLIT_RANGE_PTRS-1); mark_stack_top -> mse_start = limit; - mark_stack_top -> mse_descr.w + mark_stack_top -> mse_descr = descr - PTRS_TO_BYTES(SPLIT_RANGE_PTRS-1); # ifdef ENABLE_TRACE if (ADDR_INSIDE(GC_trace_ptr, current_p, current_p + descr)) { @@ -992,16 +993,16 @@ STATIC mse * GC_steal_mark_stack(mse * low, mse * high, mse * local, GC_ASSERT(ADDR_GE((ptr_t)high, (ptr_t)(low - 1)) && (word)(high - low + 1) <= GC_mark_stack_size); for (p = low; ADDR_GE((ptr_t)high, (ptr_t)p) && i <= n_to_get; ++p) { - word descr = (word)AO_load(&p->mse_descr.ao); + word descr = AO_load(&(p -> mse_descr)); if (descr != 0) { /* Must be ordered after read of descr: */ - AO_store_release_write(&p->mse_descr.ao, 0); + AO_store_release_write(&(p -> mse_descr), 0); /* More than one thread may get this entry, but that's only */ /* a minor performance problem. */ ++top; top -> mse_start = p -> mse_start; - top -> mse_descr.w = descr; + top -> mse_descr = descr; GC_ASSERT((descr & GC_DS_TAGS) != GC_DS_LENGTH /* 0 */ || descr < GC_greatest_real_heap_addr - GC_least_real_heap_addr @@ -1251,7 +1252,7 @@ STATIC void GC_do_parallel_mark(void) /* only, the initiating thread holds the allocator lock. */ GC_INNER void GC_help_marker(word my_mark_no) { -# define my_id my_id_mse.mse_descr.w +# define my_id my_id_mse.mse_descr mse my_id_mse; /* align local_mark_stack explicitly */ mse local_mark_stack[LOCAL_MARK_STACK_SIZE]; /* Note: local_mark_stack is quite big (up to 128 KiB). */ @@ -1354,7 +1355,7 @@ GC_API void GC_CALL GC_push_all(void *bottom, void *top) length = (length + GC_DS_TAGS) & ~(word)GC_DS_TAGS; /* round up */ # endif mark_stack_top -> mse_start = (ptr_t)bottom; - mark_stack_top -> mse_descr.w = length | GC_DS_LENGTH; + mark_stack_top -> mse_descr = length | GC_DS_LENGTH; GC_mark_stack_top = mark_stack_top; } @@ -1385,7 +1386,7 @@ GC_API struct GC_ms_entry * GC_CALL GC_custom_push_proc(GC_word descr, mark_stack_top = GC_signal_mark_stack_overflow(mark_stack_top); } mark_stack_top -> mse_start = (ptr_t)obj; - mark_stack_top -> mse_descr.w = descr; + mark_stack_top -> mse_descr = descr; return mark_stack_top; } @@ -1756,7 +1757,8 @@ GC_INNER void GC_push_all_stack(ptr_t bottom, ptr_t top) GC_ATTR_NO_SANITIZE_THREAD STATIC void GC_push_marked1(struct hblk *h, const hdr *hhdr) { - const word *mark_word_addr = &(hhdr -> hb_marks[0]); + const word *mark_word_addr + = (word *)CAST_AWAY_VOLATILE_PVOID(hhdr -> hb_marks); ptr_t *p; ptr_t plim; @@ -1805,7 +1807,8 @@ GC_INNER void GC_push_all_stack(ptr_t bottom, ptr_t top) GC_ATTR_NO_SANITIZE_THREAD STATIC void GC_push_marked2(struct hblk *h, const hdr *hhdr) { - const word *mark_word_addr = &(hhdr -> hb_marks[0]); + const word *mark_word_addr + = (word *)CAST_AWAY_VOLATILE_PVOID(hhdr -> hb_marks); ptr_t *p; ptr_t plim; ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr; @@ -1855,7 +1858,8 @@ GC_INNER void GC_push_all_stack(ptr_t bottom, ptr_t top) GC_ATTR_NO_SANITIZE_THREAD STATIC void GC_push_marked4(struct hblk *h, const hdr *hhdr) { - const word *mark_word_addr = &(hhdr -> hb_marks[0]); + const word *mark_word_addr + = (word *)CAST_AWAY_VOLATILE_PVOID(hhdr -> hb_marks); ptr_t *p; ptr_t plim; ptr_t greatest_ha = (ptr_t)GC_greatest_plausible_heap_addr; @@ -1999,7 +2003,7 @@ STATIC void GC_push_marked(struct hblk *h, const hdr *hhdr) # ifdef AO_HAVE_load /* Atomic access is used to avoid racing with GC_realloc. */ - sz = AO_load((volatile AO_t *)&(hhdr -> hb_sz)); + sz = AO_load(&(hhdr -> hb_sz)); # else sz = hhdr -> hb_sz; # endif diff --git a/misc.c b/misc.c index b31f03615..ae62cbfdd 100644 --- a/misc.c +++ b/misc.c @@ -1332,6 +1332,11 @@ GC_API void GC_CALL GC_init(void) # endif # if !defined(CPPCHECK) GC_STATIC_ASSERT(sizeof(size_t) <= sizeof(ptrdiff_t)); +# ifdef AO_HAVE_store + /* As of now, hb/mse_descr and hb_marks[i] might be treated */ + /* as words but might be accessed atomically. */ + GC_STATIC_ASSERT(sizeof(AO_t) == sizeof(word)); +# endif GC_STATIC_ASSERT(sizeof(ptrdiff_t) == sizeof(word)); GC_STATIC_ASSERT(sizeof(signed_word) == sizeof(word)); GC_STATIC_ASSERT(sizeof(word) * 8 == CPP_WORDSZ); @@ -2482,7 +2487,7 @@ static void GC_CALLBACK block_add_size(struct hblk *h, void *pbytes) { const hdr *hhdr = HDR(h); - *(word *)pbytes += ((word)hhdr->hb_sz + HBLKSIZE-1) & ~(word)(HBLKSIZE-1); + *(word *)pbytes += (hhdr -> hb_sz + HBLKSIZE-1) & ~(HBLKSIZE-1); # if defined(CPPCHECK) GC_noop1_ptr(h); # endif diff --git a/reclaim.c b/reclaim.c index c2e805d64..4d8f1a488 100644 --- a/reclaim.c +++ b/reclaim.c @@ -292,8 +292,7 @@ STATIC void GC_reclaim_check(struct hblk *hbp, const hdr *hhdr, size_t sz) /* Is a pointer-free block? Same as IS_PTRFREE() macro but uses */ /* unordered atomic access to avoid racing with GC_realloc. */ #ifdef AO_HAVE_load -# define IS_PTRFREE_SAFE(hhdr) \ - (AO_load((volatile AO_t *)&(hhdr)->hb_descr) == 0) +# define IS_PTRFREE_SAFE(hhdr) (AO_load(&((hhdr) -> hb_descr)) == 0) #else /* No race as GC_realloc holds the allocator lock when updating hb_descr. */ # define IS_PTRFREE_SAFE(hhdr) IS_PTRFREE(hhdr) @@ -402,7 +401,7 @@ STATIC void GC_CALLBACK GC_reclaim_block(struct hblk *hbp, ok = &GC_obj_kinds[hhdr -> hb_obj_kind]; # ifdef AO_HAVE_load /* Atomic access is used to avoid racing with GC_realloc. */ - sz = AO_load((volatile AO_t *)&(hhdr -> hb_sz)); + sz = AO_load(&(hhdr -> hb_sz)); # else /* No race as GC_realloc holds the allocator lock while */ /* updating hb_sz. */ @@ -534,7 +533,7 @@ STATIC void GC_CALLBACK GC_reclaim_block(struct hblk *hbp, size_t limit = FINAL_MARK_BIT(hhdr -> hb_sz); for (i = 0; i < limit; i += offset) { - result += hhdr -> hb_marks[i]; + result += (unsigned)(hhdr -> hb_marks[i]); } GC_ASSERT(hhdr -> hb_marks[limit]); /* the one set past the end */ return result; diff --git a/typd_mlc.c b/typd_mlc.c index c115615ae..d8625126d 100644 --- a/typd_mlc.c +++ b/typd_mlc.c @@ -638,7 +638,7 @@ STATIC mse *GC_push_complex_descriptor(ptr_t current, for (i = 0; i < nelements; i++) { msp++; msp -> mse_start = current; - msp -> mse_descr.w = d; + msp -> mse_descr = d; current += sz; } break; @@ -712,12 +712,12 @@ STATIC mse *GC_CALLBACK GC_array_mark_proc(word *addr, mse *mark_stack_top, } new_mark_stack_top = orig_mark_stack_top + 1; new_mark_stack_top -> mse_start = (ptr_t)addr; - new_mark_stack_top -> mse_descr.w = sz | GC_DS_LENGTH; + new_mark_stack_top -> mse_descr = sz | GC_DS_LENGTH; } else { /* Push descriptor itself. */ new_mark_stack_top++; new_mark_stack_top -> mse_start = (ptr_t)((ptr_t *)addr + lpw - 1); - new_mark_stack_top -> mse_descr.w = sizeof(ptr_t) | GC_DS_LENGTH; + new_mark_stack_top -> mse_descr = sizeof(ptr_t) | GC_DS_LENGTH; } return new_mark_stack_top; }