Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More precise writebarrier for regions #67389

Merged
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
242d361
Initial version using a byte[] lookup table for the generations, usin…
PeterSolMS Mar 11, 2022
dfc2cd8
Fix server GC issue.
PeterSolMS Mar 14, 2022
f6cd0cd
Merge branch 'main' into Optimize_mark_through_cards
PeterSolMS Mar 14, 2022
2d527b5
Introduce ephemeral range (ephemeral_low/ephemeral_high) as the globa…
PeterSolMS Mar 15, 2022
c9e9694
Snapshot.
PeterSolMS Mar 18, 2022
242101d
Changes to update ephemeral_low, ephemeral_high and the writebarrier.
PeterSolMS Mar 21, 2022
01c18fe
Fix issues:
PeterSolMS Mar 23, 2022
d5714e9
Fix issue where the card bundle bits at the beginning and end of regi…
PeterSolMS Mar 26, 2022
3fcca4e
More optimizations:
PeterSolMS Mar 31, 2022
08fbf50
Cleanup.
PeterSolMS Mar 31, 2022
7e864c7
Commit only the part of the map_region_to_generation table that is ne…
PeterSolMS Apr 4, 2022
5e7be14
Initial version of more precise write barrier helpers for x64 / Linux.
PeterSolMS Apr 6, 2022
aacd621
Attempt to fix OSX code alignment issues following pattern seen in ot…
PeterSolMS Apr 7, 2022
1c95773
Try replacing ugly .byte directives by jcc short.
PeterSolMS Apr 8, 2022
766df8f
Undo change trying "jcc short" - doesn't work. Disable more precise w…
PeterSolMS Apr 8, 2022
b9b53e8
Merge branch 'main' into More_precise_writebarrier_for_regions
PeterSolMS Apr 19, 2022
9c31f09
Revert code changes in JIT_ByRefWriteBarrier to check whether the fai…
PeterSolMS Apr 19, 2022
2d3bf3e
Merge branch 'main' into More_precise_writebarrier_for_regions
PeterSolMS Apr 21, 2022
30e1233
Add a write barrier type for regions that sets a whole byte instead o…
PeterSolMS Apr 24, 2022
93011aa
Merge branch 'main' into More_precise_writebarrier_for_regions
PeterSolMS May 11, 2022
5c2e9f3
Add narrowing cast to fix build issue, remove temporary hack to force…
PeterSolMS May 13, 2022
2f6991f
Merge branch 'main' into More_precise_writebarrier_for_regions
PeterSolMS Jul 13, 2022
cd5cb8c
Generalize the lookup via the map_region_to_generation table to also …
PeterSolMS Jul 25, 2022
653db9c
Revert hack to enable regions for clrc.
PeterSolMS Jul 25, 2022
2823b22
Fix logic error in check_demotion_helper(_sip): child_object is alrea…
PeterSolMS Aug 1, 2022
cb6a1bd
Merge with main.
PeterSolMS Aug 3, 2022
ecf265b
Use conservative values for the ephemeral range so the write barrier …
PeterSolMS Aug 3, 2022
88b3313
Fix Linux build issue with Volatile<uint8_t*>.
PeterSolMS Aug 3, 2022
a840b9b
Fix build issue on Windows - had overlooked the fact that on Windows,…
PeterSolMS Aug 4, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
283 changes: 260 additions & 23 deletions src/coreclr/gc/gc.cpp

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions src/coreclr/gc/gcinterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,12 @@ struct WriteBarrierParameters
// The new write watch table, if we are using our own write watch
// implementation. Used for WriteBarrierOp::SwitchToWriteWatch only.
uint8_t* write_watch_table;

// mapping table from region index to generation
uint8_t* region_to_generation_table;

// shift count - how many bits to shift right to obtain region index from address
uint8_t region_shr;
};

struct EtwGCSettingsInfo
Expand Down
28 changes: 27 additions & 1 deletion src/coreclr/gc/gcpriv.h
Original file line number Diff line number Diff line change
Expand Up @@ -1202,6 +1202,9 @@ enum bookkeeping_element
#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
software_write_watch_table_element,
#endif
#ifdef USE_REGIONS
region_to_generation_table_element,
#endif //USE_REGIONS
seg_mapping_table_element,
#ifdef BACKGROUND_GC
mark_array_element,
Expand Down Expand Up @@ -1441,6 +1444,12 @@ class gc_heap
// This relocates the SIP regions and return the next non SIP region.
PER_HEAP
heap_segment* relocate_advance_to_non_sip (heap_segment* region);

PER_HEAP_ISOLATED
void verify_region_to_generation_map();

PER_HEAP_ISOLATED
void compute_ephemeral_range();
#ifdef STRESS_REGIONS
PER_HEAP
void pin_by_gc (uint8_t* object);
Expand Down Expand Up @@ -3662,6 +3671,17 @@ class gc_heap
size_t* old_card_survived_per_region;
PER_HEAP_ISOLATED
size_t region_count;

// table mapping region number to generation
// there are actually two generation numbers per entry:
// - the region's current generation
// - the region's planned generation, i.e. after the GC
PER_HEAP_ISOLATED
uint8_t* map_region_to_generation;
// same table as above, but skewed so that we can index
// directly with address >> min_segment_size_shr
PER_HEAP_ISOLATED
uint8_t* map_region_to_generation_skewed;
#endif //USE_REGIONS

#define max_oom_history_count 4
Expand Down Expand Up @@ -3712,7 +3732,13 @@ class gc_heap
PER_HEAP
void exit_gc_done_event_lock();

#ifndef USE_REGIONS
#ifdef USE_REGIONS
PER_HEAP_ISOLATED
uint8_t* ephemeral_low; //lowest ephemeral address

PER_HEAP_ISOLATED
uint8_t* ephemeral_high; //highest ephemeral address
#else //!USE_REGIONS
PER_HEAP
uint8_t* ephemeral_low; //lowest ephemeral address

Expand Down
77 changes: 77 additions & 0 deletions src/coreclr/vm/amd64/JitHelpers_Fast.asm
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ EXTERN g_ephemeral_high:QWORD
EXTERN g_lowest_address:QWORD
EXTERN g_highest_address:QWORD
EXTERN g_card_table:QWORD
EXTERN g_region_shr:BYTE
EXTERN g_region_to_generation_table:QWORD

ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
EXTERN g_card_bundle_table:QWORD
Expand Down Expand Up @@ -140,6 +142,31 @@ endif
align 16
Exit:
REPRET

NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE

NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE

NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE

NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE
NOP_3_BYTE

else
; JIT_WriteBarrier_PostGrow64

Expand Down Expand Up @@ -310,6 +337,55 @@ endif
cmp rcx, [g_ephemeral_high]
jnb Exit

; do the following checks only if we are allowed to trash rax
; otherwise we don't have enough registers
ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
mov rax, rcx

mov cl, [g_region_shr]
test cl, cl
je SkipCheck

; check if the source is in gen 2 - then it's not an ephemeral pointer
shr rax, cl
add rax, [g_region_to_generation_table]
cmp byte ptr [rax], 22h
je Exit

; check if the destination happens to be in gen 0
mov rax, rdi
shr rax, cl
add rax, [g_region_to_generation_table]
cmp byte ptr [rax], 0
je Exit
SkipCheck:

; compute card table bit
mov rcx, rdi
mov al, 1
shr rcx, 8
and cl, 7
shl al, cl

; move current rdi value into rcx and then increment the pointers
mov rcx, rdi
add rsi, 8h
add rdi, 8h

; Check if we need to update the card table
; Calc pCardByte
shr rcx, 0Bh
add rcx, [g_card_table]

; Check if this card table bit is already set
test byte ptr [rcx], al
je SetCardTableBit
REPRET

SetCardTableBit:
lock or byte ptr [rcx], al
else

; move current rdi value into rcx and then increment the pointers
mov rcx, rdi
add rsi, 8h
Expand All @@ -327,6 +403,7 @@ endif

UpdateCardTable:
mov byte ptr [rcx], 0FFh
endif
ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
; check if we need to update the card bundle table
; restore destination address from rdi - rdi has been incremented by 8 already
Expand Down
161 changes: 161 additions & 0 deletions src/coreclr/vm/amd64/JitHelpers_FastWriteBarriers.asm
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,81 @@ endif
ret
LEAF_END_MARKED JIT_WriteBarrier_SVR64, _TEXT

LEAF_ENTRY JIT_WriteBarrier_Region64, _TEXT
align 8

; Do the move into the GC . It is correct to take an AV here, the EH code
; figures out that this came from a WriteBarrier and correctly maps it back
; to the managed method which called the WriteBarrier (see setup in
; InitializeExceptionHandling, vm\exceptionhandling.cpp).
mov [rcx], rdx

mov r8, rcx

PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionToGeneration
mov rax, 0F0F0F0F0F0F0F0F0h

PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionShrDest
shr rcx, 16h ; compute region index

; Check whether the region we're storing into is gen 0 - nothing to do in this case
cmp byte ptr [rcx + rax], 0
jne NotGen0
REPRET

NOP_2_BYTE ; padding for alignment of constant

NotGen0:
PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_Lower
mov r9, 0F0F0F0F0F0F0F0F0h
cmp rdx, r9
jae NotLow
ret
NotLow:
PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_Upper
mov r9, 0F0F0F0F0F0F0F0F0h
cmp rdx, r9
jb NotHigh
REPRET
NotHigh:
PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_RegionShrSrc
shr rdx, 16h ; compute region index
mov dl, [rdx + rax]
cmp dl, [rcx + rax]
jb isOldToYoung
REPRET
nop

IsOldToYoung:
PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_CardTable
mov rax, 0F0F0F0F0F0F0F0F0h

mov ecx, r8d
shr r8, 0Bh
shr ecx, 8
and ecx, 7
mov dl, 1
shl dl, cl
test byte ptr [r8 + rax], dl
je UpdateCardTable
REPRET

UpdateCardTable:
lock or byte ptr [r8 + rax], dl
ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
PATCH_LABEL JIT_WriteBarrier_Region64_Patch_Label_CardBundleTable
mov rax, 0F0F0F0F0F0F0F0F0h
shr r8, 0Ah
cmp byte ptr [r8 + rax], 0FFh
jne UpdateCardBundleTable
REPRET

UpdateCardBundleTable:
mov byte ptr [r8 + rax], 0FFh
endif
ret
LEAF_END_MARKED JIT_WriteBarrier_Region64, _TEXT

endif


Expand Down Expand Up @@ -410,6 +485,92 @@ endif
LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_SVR64, _TEXT

endif

LEAF_ENTRY JIT_WriteBarrier_WriteWatch_Region64, _TEXT
align 8

; Do the move into the GC . It is correct to take an AV here, the EH code
; figures out that this came from a WriteBarrier and correctly maps it back
; to the managed method which called the WriteBarrier (see setup in
; InitializeExceptionHandling, vm\exceptionhandling.cpp).
mov [rcx], rdx

; Update the write watch table if necessary
mov rax, rcx
PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_WriteWatchTable
mov r8, 0F0F0F0F0F0F0F0F0h
shr rax, 0Ch ; SoftwareWriteWatch::AddressToTableByteIndexShift
add rax, r8
mov r8, rcx
PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionShrDest
shr rcx, 16h ; compute region index
cmp byte ptr [rax], 0h
jne JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionToGeneration
mov byte ptr [rax], 0FFh

PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionToGeneration
mov rax, 0F0F0F0F0F0F0F0F0h

; Check whether the region we're storing into is gen 0 - nothing to do in this case
cmp byte ptr [rcx + rax], 0
jne NotGen0
REPRET

NOP_2_BYTE ; padding for alignment of constant
NOP_2_BYTE ; padding for alignment of constant
NOP_2_BYTE ; padding for alignment of constant

NotGen0:
PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_Lower
mov r9, 0F0F0F0F0F0F0F0F0h
cmp rdx, r9
jae NotLow
ret
NotLow:
PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_Upper
mov r9, 0F0F0F0F0F0F0F0F0h
cmp rdx, r9
jb NotHigh
REPRET
NotHigh:
PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_RegionShrSrc
shr rdx, 16h ; compute region index
mov dl, [rdx + rax]
cmp dl, [rcx + rax]
jb isOldToYoung
REPRET
nop

IsOldToYoung:
PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_CardTable
mov rax, 0F0F0F0F0F0F0F0F0h

mov ecx, r8d
shr r8, 0Bh
shr ecx, 8
and ecx, 7
mov dl, 1
shl dl, cl
test byte ptr [r8 + rax], dl
je UpdateCardTable
REPRET

UpdateCardTable:
lock or byte ptr [r8 + rax], dl
ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
PATCH_LABEL JIT_WriteBarrier_WriteWatch_Region64_Patch_Label_CardBundleTable
mov rax, 0F0F0F0F0F0F0F0F0h
shr r8, 0Ah
cmp byte ptr [r8 + rax], 0FFh
jne UpdateCardBundleTable
REPRET

UpdateCardBundleTable:
mov byte ptr [r8 + rax], 0FFh
endif
ret
LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_Region64, _TEXT

endif


Expand Down
Loading