Skip to content

Commit

Permalink
Add stats to various code paths (#46407)
Browse files Browse the repository at this point in the history
  • Loading branch information
pchintalapudi authored Sep 5, 2022
1 parent 64378db commit ff69a48
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 3 deletions.
14 changes: 14 additions & 0 deletions src/aotcompile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

// target support
#include <llvm/ADT/Triple.h>
#include <llvm/ADT/Statistic.h>
#include <llvm/Analysis/TargetLibraryInfo.h>
#include <llvm/Analysis/TargetTransformInfo.h>
#include <llvm/IR/DataLayout.h>
Expand Down Expand Up @@ -63,6 +64,14 @@ using namespace llvm;
#include "jitlayers.h"
#include "julia_assert.h"

#define DEBUG_TYPE "julia_aotcompile"

STATISTIC(CICacheLookups, "Number of codeinst cache lookups");
STATISTIC(CreateNativeCalls, "Number of jl_create_native calls made");
STATISTIC(CreateNativeMethods, "Number of methods compiled for jl_create_native");
STATISTIC(CreateNativeMax, "Max number of methods compiled at once for jl_create_native");
STATISTIC(CreateNativeGlobals, "Number of globals compiled for jl_create_native");

template<class T> // for GlobalObject's
static T *addComdat(T *G)
{
Expand Down Expand Up @@ -215,6 +224,7 @@ static void makeSafeName(GlobalObject &G)

static void jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_instance_t *mi, size_t world, jl_code_instance_t **ci_out, jl_code_info_t **src_out)
{
++CICacheLookups;
jl_value_t *ci = cgparams.lookup(mi, world, world);
JL_GC_PROMISE_ROOTED(ci);
jl_code_instance_t *codeinst = NULL;
Expand Down Expand Up @@ -253,6 +263,8 @@ static void jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_instance
extern "C" JL_DLLEXPORT
void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy)
{
++CreateNativeCalls;
CreateNativeMax.updateMax(jl_array_len(methods));
if (cgparams == NULL)
cgparams = &jl_default_cgparams;
jl_native_code_desc_t *data = new jl_native_code_desc_t;
Expand Down Expand Up @@ -334,6 +346,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
gvars.push_back(std::string(global.second->getName()));
data->jl_value_to_llvm[global.first] = gvars.size();
}
CreateNativeMethods += emitted.size();

// clones the contents of the module `m` to the shadow_output collector
// while examining and recording what kind of function pointer we have
Expand Down Expand Up @@ -376,6 +389,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
G->setLinkage(GlobalVariable::InternalLinkage);
data->jl_sysimg_gvars.push_back(G);
}
CreateNativeGlobals += gvars.size();

//Safe b/c context is locked by params
#if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
Expand Down
46 changes: 44 additions & 2 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <stdint.h>

#include "llvm/IR/Mangler.h"
#include <llvm/ADT/Statistic.h>
#include <llvm/ADT/StringMap.h>
#include <llvm/Analysis/TargetLibraryInfo.h>
#include <llvm/Analysis/TargetTransformInfo.h>
Expand Down Expand Up @@ -52,6 +53,23 @@ using namespace llvm;
# include <llvm/ExecutionEngine/SectionMemoryManager.h>
#endif

#define DEBUG_TYPE "julia_jitlayers"

STATISTIC(LinkedGlobals, "Number of globals linked");
STATISTIC(CompiledCodeinsts, "Number of codeinsts compiled directly");
STATISTIC(MaxWorkqueueSize, "Maximum number of elements in the workqueue");
STATISTIC(IndirectCodeinsts, "Number of dependent codeinsts compiled");
STATISTIC(SpecFPtrCount, "Number of specialized function pointers compiled");
STATISTIC(UnspecFPtrCount, "Number of specialized function pointers compiled");
STATISTIC(ModulesAdded, "Number of modules added to the JIT");
STATISTIC(ModulesOptimized, "Number of modules optimized by the JIT");
STATISTIC(OptO0, "Number of modules optimized at level -O0");
STATISTIC(OptO1, "Number of modules optimized at level -O1");
STATISTIC(OptO2, "Number of modules optimized at level -O2");
STATISTIC(OptO3, "Number of modules optimized at level -O3");
STATISTIC(ModulesMerged, "Number of modules merged");
STATISTIC(InternedGlobals, "Number of global constants interned in the string pool");

#ifdef _COMPILER_MSAN_ENABLED_
// TODO: This should not be necessary on ELF x86_64, but LLVM's implementation
// of the TLS relocations is currently broken, so enable this unconditionally.
Expand Down Expand Up @@ -104,8 +122,6 @@ static void *getTLSAddress(void *control)
}
#endif

#define DEBUG_TYPE "jitlayers"

// Snooping on which functions are being compiled, and how long it takes
extern "C" JL_DLLEXPORT
void jl_dump_compiles_impl(void *s)
Expand All @@ -124,6 +140,7 @@ static uint64_t getAddressForFunction(StringRef fname);

void jl_link_global(GlobalVariable *GV, void *addr)
{
++LinkedGlobals;
Constant *P = literal_static_pointer_val(addr, GV->getValueType());
GV->setInitializer(P);
if (jl_options.image_codegen) {
Expand Down Expand Up @@ -214,6 +231,9 @@ static jl_callptr_t _jl_compile_codeinst(
orc::ThreadSafeModule &M = std::get<0>(def.second);
jl_add_to_ee(M, NewExports);
}
++CompiledCodeinsts;
MaxWorkqueueSize.updateMax(emitted.size());
IndirectCodeinsts += emitted.size() - 1;
}
JL_TIMING(LLVM_MODULE_FINISH);

Expand Down Expand Up @@ -409,6 +429,7 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES
jl_atomic_cmpswap_relaxed(&codeinst->inferred, &null, jl_nothing);
}
}
++SpecFPtrCount;
_jl_compile_codeinst(codeinst, src, world, context);
if (jl_atomic_load_relaxed(&codeinst->invoke) == NULL)
codeinst = NULL;
Expand Down Expand Up @@ -459,6 +480,7 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec)
src = (jl_code_info_t*)unspec->def->uninferred;
}
assert(src && jl_is_code_info(src));
++UnspecFPtrCount;
_jl_compile_codeinst(unspec, src, unspec->min_world, context);
if (jl_atomic_load_relaxed(&unspec->invoke) == NULL) {
// if we hit a codegen bug (or ran into a broken generated function or llvmcall), fall back to the interpreter as a last resort
Expand Down Expand Up @@ -551,6 +573,7 @@ static auto countBasicBlocks(const Function &F)
}

void JuliaOJIT::OptSelLayerT::emit(std::unique_ptr<orc::MaterializationResponsibility> R, orc::ThreadSafeModule TSM) {
++ModulesOptimized;
size_t optlevel = SIZE_MAX;
TSM.withModuleDo([&](Module &M) {
if (jl_generating_output()) {
Expand Down Expand Up @@ -1051,6 +1074,22 @@ namespace {
}
}
});
switch (optlevel) {
case 0:
++OptO0;
break;
case 1:
++OptO1;
break;
case 2:
++OptO2;
break;
case 3:
++OptO3;
break;
default:
llvm_unreachable("optlevel is between 0 and 3!");
}
return Expected<orc::ThreadSafeModule>{std::move(TSM)};
}
private:
Expand Down Expand Up @@ -1230,6 +1269,7 @@ void JuliaOJIT::addGlobalMapping(StringRef Name, uint64_t Addr)
void JuliaOJIT::addModule(orc::ThreadSafeModule TSM)
{
JL_TIMING(LLVM_MODULE_FINISH);
++ModulesAdded;
std::vector<std::string> NewExports;
TSM.withModuleDo([&](Module &M) {
jl_decorate_module(M);
Expand Down Expand Up @@ -1408,6 +1448,7 @@ JuliaOJIT *jl_ExecutionEngine;
// Comdat is also removed, since the JIT doesn't need it
void jl_merge_module(orc::ThreadSafeModule &destTSM, orc::ThreadSafeModule srcTSM)
{
++ModulesMerged;
destTSM.withModuleDo([&](Module &dest) {
srcTSM.withModuleDo([&](Module &src) {
assert(&dest != &src && "Cannot merge module with itself!");
Expand Down Expand Up @@ -1521,6 +1562,7 @@ void jl_merge_module(orc::ThreadSafeModule &destTSM, orc::ThreadSafeModule srcTS
// making a copy per object file of output.
void JuliaOJIT::shareStrings(Module &M)
{
++InternedGlobals;
std::vector<GlobalVariable*> erase;
for (auto &GV : M.globals()) {
if (!GV.hasInitializer() || !GV.isConstant())
Expand Down
15 changes: 15 additions & 0 deletions src/llvm-final-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "llvm-version.h"
#include "passes.h"

#include <llvm/ADT/Statistic.h>
#include <llvm/IR/LegacyPassManager.h>
#include <llvm/IR/Function.h>
#include <llvm/IR/IntrinsicInst.h>
Expand All @@ -18,6 +19,13 @@
#include "llvm-pass-helpers.h"

#define DEBUG_TYPE "final_gc_lowering"
STATISTIC(NewGCFrameCount, "Number of lowered newGCFrameFunc intrinsics");
STATISTIC(PushGCFrameCount, "Number of lowered pushGCFrameFunc intrinsics");
STATISTIC(PopGCFrameCount, "Number of lowered popGCFrameFunc intrinsics");
STATISTIC(GetGCFrameSlotCount, "Number of lowered getGCFrameSlotFunc intrinsics");
STATISTIC(GCAllocBytesCount, "Number of lowered GCAllocBytesFunc intrinsics");
STATISTIC(QueueGCRootCount, "Number of lowered queueGCRootFunc intrinsics");
STATISTIC(QueueGCBindingCount, "Number of lowered queueGCBindingFunc intrinsics");

using namespace llvm;

Expand Down Expand Up @@ -66,6 +74,7 @@ struct FinalLowerGC: private JuliaPassContext {

Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
{
++NewGCFrameCount;
assert(target->arg_size() == 1);
unsigned nRoots = cast<ConstantInt>(target->getArgOperand(0))->getLimitedValue(INT_MAX);

Expand Down Expand Up @@ -107,6 +116,7 @@ Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)

void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)
{
++PushGCFrameCount;
assert(target->arg_size() == 2);
auto gcframe = target->getArgOperand(0);
unsigned nRoots = cast<ConstantInt>(target->getArgOperand(1))->getLimitedValue(INT_MAX);
Expand Down Expand Up @@ -136,6 +146,7 @@ void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)

void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F)
{
++PopGCFrameCount;
assert(target->arg_size() == 1);
auto gcframe = target->getArgOperand(0);

Expand All @@ -155,6 +166,7 @@ void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F)

Value *FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)
{
++GetGCFrameSlotCount;
assert(target->arg_size() == 2);
auto gcframe = target->getArgOperand(0);
auto index = target->getArgOperand(1);
Expand All @@ -174,20 +186,23 @@ Value *FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)

Value *FinalLowerGC::lowerQueueGCRoot(CallInst *target, Function &F)
{
++QueueGCRootCount;
assert(target->arg_size() == 1);
target->setCalledFunction(queueRootFunc);
return target;
}

Value *FinalLowerGC::lowerQueueGCBinding(CallInst *target, Function &F)
{
++QueueGCBindingCount;
assert(target->arg_size() == 1);
target->setCalledFunction(queueBindingFunc);
return target;
}

Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
{
++GCAllocBytesCount;
assert(target->arg_size() == 2);
auto sz = (size_t)cast<ConstantInt>(target->getArgOperand(1))->getZExtValue();
// This is strongly architecture and OS dependent
Expand Down
5 changes: 5 additions & 0 deletions src/llvm-lower-handlers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <llvm-c/Types.h>

#include <llvm/ADT/DepthFirstIterator.h>
#include <llvm/ADT/Statistic.h>
#include <llvm/Analysis/CFG.h>
#include <llvm/IR/BasicBlock.h>
#include <llvm/IR/Constants.h>
Expand All @@ -27,6 +28,8 @@

#define DEBUG_TYPE "lower_handlers"
#undef DEBUG
STATISTIC(MaxExceptionHandlerDepth, "Maximum nesting of exception handlers");
STATISTIC(ExceptionHandlerBuffers, "Number of exception handler buffers inserted");

using namespace llvm;

Expand Down Expand Up @@ -156,6 +159,8 @@ static bool lowerExcHandlers(Function &F) {
/* Remember the depth at the BB boundary */
ExitDepth[BB] = Depth;
}
MaxExceptionHandlerDepth.updateMax(MaxDepth);
ExceptionHandlerBuffers += MaxDepth;

/* Step 2: EH Frame lowering */
// Allocate stack space for each handler. We allocate these as separate
Expand Down
6 changes: 5 additions & 1 deletion src/llvm-simdloop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ STATISTIC(SimdLoops, "Number of loops with SIMD instructions");
STATISTIC(IVDepInstructions, "Number of instructions marked ivdep");
STATISTIC(ReductionChains, "Number of reduction chains folded");
STATISTIC(ReductionChainLength, "Total sum of instructions folded from reduction chain");
STATISTIC(MaxChainLength, "Max length of reduction chain");
STATISTIC(AddChains, "Addition reduction chains");
STATISTIC(MulChains, "Multiply reduction chains");

Expand Down Expand Up @@ -119,11 +120,14 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L)
break;
}
++ReductionChains;
int length = 0;
for (chainVector::const_iterator K=chain.begin(); K!=chain.end(); ++K) {
LLVM_DEBUG(dbgs() << "LSL: marking " << **K << "\n");
(*K)->setFast(true);
++ReductionChainLength;
++length;
}
ReductionChainLength += length;
MaxChainLength.updateMax(length);
}

static bool markLoopInfo(Module &M, Function *marker, function_ref<LoopInfo &(Function &)> GetLI)
Expand Down

2 comments on commit ff69a48

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily package evaluation, I will reply here when finished:

@nanosoldier runtests(ALL, isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your package evaluation job has completed - possible new issues were detected. A full report can be found here.

Please sign in to comment.