Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More precise writebarrier for regions #67389

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
242d361
Initial version using a byte[] lookup table for the generations, usin…
PeterSolMS Mar 11, 2022
dfc2cd8
Fix server GC issue.
PeterSolMS Mar 14, 2022
f6cd0cd
Merge branch 'main' into Optimize_mark_through_cards
PeterSolMS Mar 14, 2022
2d527b5
Introduce ephemeral range (ephemeral_low/ephemeral_high) as the globa…
PeterSolMS Mar 15, 2022
c9e9694
Snapshot.
PeterSolMS Mar 18, 2022
242101d
Changes to update ephemeral_low, ephemeral_high and the writebarrier.
PeterSolMS Mar 21, 2022
01c18fe
Fix issues:
PeterSolMS Mar 23, 2022
d5714e9
Fix issue where the card bundle bits at the beginning and end of regi…
PeterSolMS Mar 26, 2022
3fcca4e
More optimizations:
PeterSolMS Mar 31, 2022
08fbf50
Cleanup.
PeterSolMS Mar 31, 2022
7e864c7
Commit only the part of the map_region_to_generation table that is ne…
PeterSolMS Apr 4, 2022
5e7be14
Initial version of more precise write barrier helpers for x64 / Linux.
PeterSolMS Apr 6, 2022
aacd621
Attempt to fix OSX code alignment issues following pattern seen in ot…
PeterSolMS Apr 7, 2022
1c95773
Try replacing ugly .byte directives by jcc short.
PeterSolMS Apr 8, 2022
766df8f
Undo change trying "jcc short" - doesn't work. Disable more precise w…
PeterSolMS Apr 8, 2022
b9b53e8
Merge branch 'main' into More_precise_writebarrier_for_regions
PeterSolMS Apr 19, 2022
9c31f09
Revert code changes in JIT_ByRefWriteBarrier to check whether the fai…
PeterSolMS Apr 19, 2022
2d3bf3e
Merge branch 'main' into More_precise_writebarrier_for_regions
PeterSolMS Apr 21, 2022
30e1233
Add a write barrier type for regions that sets a whole byte instead o…
PeterSolMS Apr 24, 2022
93011aa
Merge branch 'main' into More_precise_writebarrier_for_regions
PeterSolMS May 11, 2022
5c2e9f3
Add narrowing cast to fix build issue, remove temporary hack to force…
PeterSolMS May 13, 2022
2f6991f
Merge branch 'main' into More_precise_writebarrier_for_regions
PeterSolMS Jul 13, 2022
cd5cb8c
Generalize the lookup via the map_region_to_generation table to also …
PeterSolMS Jul 25, 2022
653db9c
Revert hack to enable regions for clrc.
PeterSolMS Jul 25, 2022
2823b22
Fix logic error in check_demotion_helper(_sip): child_object is alrea…
PeterSolMS Aug 1, 2022
cb6a1bd
Merge with main.
PeterSolMS Aug 3, 2022
ecf265b
Use conservative values for the ephemeral range so the write barrier …
PeterSolMS Aug 3, 2022
88b3313
Fix Linux build issue with Volatile<uint8_t*>.
PeterSolMS Aug 3, 2022
a840b9b
Fix build issue on Windows - had overlooked the fact that on Windows,…
PeterSolMS Aug 4, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
512 changes: 446 additions & 66 deletions src/coreclr/gc/gc.cpp

Large diffs are not rendered by default.

10 changes: 9 additions & 1 deletion src/coreclr/gc/gcconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ class GCConfigStringHolder
INT_CONFIG (GCHeapHardLimitPOHPercent, "GCHeapHardLimitPOHPercent", "System.GC.HeapHardLimitPOHPercent", 0, "Specifies the GC heap POH usage as a percentage of the total memory") \
INT_CONFIG (GCEnabledInstructionSets, "GCEnabledInstructionSets", NULL, -1, "Specifies whether GC can use AVX2 or AVX512F - 0 for neither, 1 for AVX2, 3 for AVX512F")\
INT_CONFIG (GCConserveMem, "GCConserveMemory", "System.GC.ConserveMemory", 0, "Specifies how hard GC should try to conserve memory - values 0-9") \

INT_CONFIG (GCWriteBarrier, "GCWriteBarrier", NULL, 0, "Specifies whether GC should use more precise but slower write barrier")
// This class is responsible for retreiving configuration information
// for how the GC should operate.
class GCConfig
Expand Down Expand Up @@ -182,6 +182,14 @@ enum HeapVerifyFlags {
HEAPVERIFY_DEEP_ON_COMPACT = 0x80 // Performs deep object verfication only on compacting GCs.
};

enum WriteBarrierFlavor
{
WRITE_BARRIER_DEFAULT = 0,
WRITE_BARRIER_REGION_BIT = 1,
WRITE_BARRIER_REGION_BYTE = 2,
WRITE_BARRIER_SERVER = 3,
};

// Initializes the GCConfig subsystem. Must be called before accessing any
// configuration information.
static void Initialize();
Expand Down
9 changes: 9 additions & 0 deletions src/coreclr/gc/gcinterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,15 @@ struct WriteBarrierParameters
// The new write watch table, if we are using our own write watch
// implementation. Used for WriteBarrierOp::SwitchToWriteWatch only.
uint8_t* write_watch_table;

// mapping table from region index to generation
uint8_t* region_to_generation_table;

// shift count - how many bits to shift right to obtain region index from address
uint8_t region_shr;

// whether to use the more precise but slower write barrier
bool region_use_bitwise_write_barrier;
};

struct EtwGCSettingsInfo
Expand Down
68 changes: 65 additions & 3 deletions src/coreclr/gc/gcpriv.h
Original file line number Diff line number Diff line change
Expand Up @@ -1206,6 +1206,9 @@ enum bookkeeping_element
#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
software_write_watch_table_element,
#endif
#ifdef USE_REGIONS
region_to_generation_table_element,
#endif //USE_REGIONS
seg_mapping_table_element,
#ifdef BACKGROUND_GC
mark_array_element,
Expand Down Expand Up @@ -1378,6 +1381,12 @@ class gc_heap
PER_HEAP
void set_region_plan_gen_num_sip (heap_segment* region, int plan_gen_num);
PER_HEAP
void set_region_sweep_in_plan (heap_segment* region);
PER_HEAP
void clear_region_sweep_in_plan (heap_segment* region);
PER_HEAP
void clear_region_demoted (heap_segment* region);
PER_HEAP
void decide_on_demotion_pin_surv (heap_segment* region);
PER_HEAP
void skip_pins_in_alloc_region (generation* consing_gen, int plan_gen_num);
Expand Down Expand Up @@ -1445,6 +1454,12 @@ class gc_heap
// This relocates the SIP regions and return the next non SIP region.
PER_HEAP
heap_segment* relocate_advance_to_non_sip (heap_segment* region);

PER_HEAP_ISOLATED
void verify_region_to_generation_map();

PER_HEAP_ISOLATED
void compute_gc_and_ephemeral_range (int condemned_gen_number, bool end_of_gc_p);
#ifdef STRESS_REGIONS
PER_HEAP
void pin_by_gc (uint8_t* object);
Expand Down Expand Up @@ -3083,6 +3098,8 @@ class gc_heap
#endif //BACKGROUND_GC

#ifdef USE_REGIONS
PER_HEAP_ISOLATED
bool is_in_gc_range (uint8_t* o);
// o is guaranteed to be in the heap range.
PER_HEAP_ISOLATED
bool is_in_condemned_gc (uint8_t* o);
Expand Down Expand Up @@ -3673,6 +3690,39 @@ class gc_heap
size_t* old_card_survived_per_region;
PER_HEAP_ISOLATED
size_t region_count;

// table mapping region number to generation
// there are actually two generation numbers per entry:
// - the region's current generation
// - the region's planned generation, i.e. after the GC
// and there are flags
// - whether the region is sweep in plan
// - and whether the region is demoted
enum region_info : uint8_t
{
// lowest 2 bits are current generation number
RI_GEN_0 = 0x0,
RI_GEN_1 = 0x1,
RI_GEN_2 = 0x2,
RI_GEN_MASK = 0x3,

// we have 4 bits available for flags, of which 2 are used
RI_SIP = 0x4,
RI_DEMOTED = 0x8,

// top 2 bits are planned generation number
RI_PLAN_GEN_SHR = 0x6, // how much to shift the value right to obtain plan gen
RI_PLAN_GEN_0 = 0x00,
RI_PLAN_GEN_1 = 0x40,
RI_PLAN_GEN_2 = 0x80,
RI_PLAN_GEN_MASK= 0xC0,
};
PER_HEAP_ISOLATED
region_info* map_region_to_generation;
// same table as above, but skewed so that we can index
// directly with address >> min_segment_size_shr
PER_HEAP_ISOLATED
region_info* map_region_to_generation_skewed;
#endif //USE_REGIONS

#define max_oom_history_count 4
Expand Down Expand Up @@ -3723,7 +3773,13 @@ class gc_heap
PER_HEAP
void exit_gc_done_event_lock();

#ifndef USE_REGIONS
#ifdef USE_REGIONS
PER_HEAP_ISOLATED
VOLATILE(uint8_t*) ephemeral_low; //lowest ephemeral address

PER_HEAP_ISOLATED
VOLATILE(uint8_t*) ephemeral_high; //highest ephemeral address
#else //!USE_REGIONS
PER_HEAP
uint8_t* ephemeral_low; //lowest ephemeral address

Expand Down Expand Up @@ -4089,13 +4145,19 @@ class gc_heap
PER_HEAP
uint64_t time_bgc_last;

//#ifndef USE_REGIONS
#ifdef USE_REGIONS
PER_HEAP_ISOLATED
uint8_t* gc_low; // low end of the lowest region being condemned

PER_HEAP_ISOLATED
uint8_t* gc_high; // high end of the highest region being condemned
#else // USE_REGIONS
PER_HEAP
uint8_t* gc_low; // lowest address being condemned

PER_HEAP
uint8_t* gc_high; // highest address being condemned
//#endif //USE_REGIONS
#endif //USE_REGIONS

PER_HEAP
size_t mark_stack_tos;
Expand Down
85 changes: 85 additions & 0 deletions src/coreclr/vm/amd64/JitHelpers_Fast.asm
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ EXTERN g_ephemeral_high:QWORD
EXTERN g_lowest_address:QWORD
EXTERN g_highest_address:QWORD
EXTERN g_card_table:QWORD
EXTERN g_region_shr:BYTE
EXTERN g_region_use_bitwise_write_barrier:BYTE
EXTERN g_region_to_generation_table:QWORD

ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
EXTERN g_card_bundle_table:QWORD
Expand Down Expand Up @@ -135,6 +138,31 @@ endif
align 16
Exit:
REPRET

NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE

NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE

NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE

NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE

else
; JIT_WriteBarrier_PostGrow64

Expand Down Expand Up @@ -305,6 +333,60 @@ endif
cmp rcx, [g_ephemeral_high]
jnb Exit

; do the following checks only if we are allowed to trash rax
; otherwise we don't have enough registers
ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
mov rax, rcx

mov cl, [g_region_shr]
test cl, cl
je SkipCheck

; check if the source is in gen 2 - then it's not an ephemeral pointer
shr rax, cl
add rax, [g_region_to_generation_table]
cmp byte ptr [rax], 82h
je Exit

; check if the destination happens to be in gen 0
mov rax, rdi
shr rax, cl
add rax, [g_region_to_generation_table]
cmp byte ptr [rax], 0
je Exit
SkipCheck:

cmp [g_region_use_bitwise_write_barrier], 0
je CheckCardTableByte

; compute card table bit
mov rcx, rdi
mov al, 1
shr rcx, 8
and cl, 7
shl al, cl

; move current rdi value into rcx and then increment the pointers
mov rcx, rdi
add rsi, 8h
add rdi, 8h

; Check if we need to update the card table
; Calc pCardByte
shr rcx, 0Bh
add rcx, [g_card_table]

; Check if this card table bit is already set
test byte ptr [rcx], al
je SetCardTableBit
REPRET

SetCardTableBit:
lock or byte ptr [rcx], al
jmp CheckCardBundle
endif
CheckCardTableByte:

; move current rdi value into rcx and then increment the pointers
mov rcx, rdi
add rsi, 8h
Expand All @@ -322,6 +404,9 @@ endif

UpdateCardTable:
mov byte ptr [rcx], 0FFh

CheckCardBundle:

ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
; check if we need to update the card bundle table
; restore destination address from rdi - rdi has been incremented by 8 already
Expand Down
Loading