Skip to content

Commit

Permalink
Switch back to setting the type tag in LLVM IR, rather than in C.
Browse files Browse the repository at this point in the history
The tradeoff here is:
- the compiler gets to see the set instruction and it can participate in optimization
- but the code size and compilation time of every allocation increases by 1 instruction.
  • Loading branch information
NHDaly committed Jun 30, 2023
1 parent b513082 commit 45f3f5e
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 9 deletions.
6 changes: 2 additions & 4 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1014,10 +1014,9 @@ JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz)
return val;
}
// Instrumented version of jl_gc_big_alloc_inner, called into by LLVM-generated code.
JL_DLLEXPORT jl_value_t *jl_gc_big_alloc_typed(jl_ptls_t ptls, size_t sz, jl_value_t *type)
JL_DLLEXPORT jl_value_t *jl_gc_big_alloc_instrumented(jl_ptls_t ptls, size_t sz, jl_value_t *type)
{
jl_value_t *val = jl_gc_big_alloc_inner(ptls, sz);
jl_set_typeof(val, type);
maybe_record_alloc_to_profile(val, sz, (jl_datatype_t*)type);
return val;
}
Expand Down Expand Up @@ -1333,11 +1332,10 @@ JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset,
return val;
}
// Instrumented version of jl_gc_pool_alloc_inner, called into by LLVM-generated code.
JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc_typed(jl_ptls_t ptls, int pool_offset,
JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc_instrumented(jl_ptls_t ptls, int pool_offset,
int osize, jl_value_t* type)
{
jl_value_t *val = jl_gc_pool_alloc_inner(ptls, pool_offset, osize);
jl_set_typeof(val, type);
maybe_record_alloc_to_profile(val, osize, (jl_datatype_t*)type);
return val;
}
Expand Down
4 changes: 2 additions & 2 deletions src/jl_exported_funcs.inc
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@
XX(jl_gc_alloc_3w) \
XX(jl_gc_alloc_typed) \
XX(jl_gc_big_alloc) \
XX(jl_gc_big_alloc_typed) \
XX(jl_gc_big_alloc_instrumented) \
XX(jl_gc_collect) \
XX(jl_gc_conservative_gc_support_enabled) \
XX(jl_gc_counted_calloc) \
Expand Down Expand Up @@ -186,7 +186,7 @@
XX(jl_gc_new_weakref_th) \
XX(jl_gc_num) \
XX(jl_gc_pool_alloc) \
XX(jl_gc_pool_alloc_typed) \
XX(jl_gc_pool_alloc_instrumented) \
XX(jl_gc_queue_multiroot) \
XX(jl_gc_queue_root) \
XX(jl_gc_safepoint) \
Expand Down
18 changes: 17 additions & 1 deletion src/llvm-late-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2384,7 +2384,9 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {

// Create a call to the `julia.gc_alloc_bytes` intrinsic, which is like
// `julia.gc_alloc_obj` except it specializes the call based on the constant
// size of the object to allocate, to save one indirection.
// size of the object to allocate, to save one indirection, and doesn't set
// the type tag. (Note that if the size is not a constant, it will call
// gc_alloc_obj, and will redundantly set the tag.)
auto allocBytesIntrinsic = getOrDeclare(jl_intrinsics::GCAllocBytes);
auto ptlsLoad = get_current_ptls_from_task(builder, T_size, CI->getArgOperand(0), tbaa_gcframe);
auto ptls = builder.CreateBitCast(ptlsLoad, Type::getInt8PtrTy(builder.getContext()));
Expand All @@ -2400,6 +2402,20 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
});
newI->takeName(CI);

// Now, finally, set the tag. We do this in IR instead of in the C alloc
// function, to provide possible optimization opportunities. (I think? TBH
// the most recent editor of this code is not entirely clear on why we
// prefer to set the tag in the generated code. Providing optimziation
// opportunities is the most likely reason; the tradeoff is slightly
// larger code size and increased compilation time, compiling this
// instruction at every allocation site, rather than once in the C alloc
// function.)
auto &M = *builder.GetInsertBlock()->getModule();
StoreInst *store = builder.CreateAlignedStore(
tag, EmitTagPtr(builder, tag_type, T_size, newI), M.getDataLayout().getPointerABIAlignment(0));
store->setOrdering(AtomicOrdering::Unordered);
store->setMetadata(LLVMContext::MD_tbaa, tbaa_tag);

// Replace uses of the call to `julia.gc_alloc_obj` with the call to
// `julia.gc_alloc_bytes`.
CI->replaceAllUsesWith(newI);
Expand Down
4 changes: 2 additions & 2 deletions src/llvm-pass-helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,8 @@ namespace jl_intrinsics {
}

namespace jl_well_known {
static const char *GC_BIG_ALLOC_NAME = XSTR(jl_gc_big_alloc_typed);
static const char *GC_POOL_ALLOC_NAME = XSTR(jl_gc_pool_alloc_typed);
static const char *GC_BIG_ALLOC_NAME = XSTR(jl_gc_big_alloc_instrumented);
static const char *GC_POOL_ALLOC_NAME = XSTR(jl_gc_pool_alloc_instrumented);
static const char *GC_QUEUE_ROOT_NAME = XSTR(jl_gc_queue_root);
static const char *GC_ALLOC_TYPED_NAME = XSTR(jl_gc_alloc_typed);

Expand Down

0 comments on commit 45f3f5e

Please sign in to comment.