Skip to content

Commit

Permalink
Pool more JIT resources to reduce memory usage/contention (#44912)
Browse files Browse the repository at this point in the history
  • Loading branch information
pchintalapudi authored Apr 12, 2022
1 parent 4c858f8 commit c0c60e8
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 113 deletions.
5 changes: 2 additions & 3 deletions doc/src/devdocs/locks.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,10 @@ The following are definitely leaf locks (level 1), and must not try to acquire a
> * gc_perm_lock
> * flisp
> * jl_in_stackwalk (Win32)
> * PM_mutex[i]
> * ContextPool::mutex
> * ResourcePool<?>::mutex
>
> > flisp itself is already threadsafe, this lock only protects the `jl_ast_context_list_t` pool
> > likewise, orc::ThreadSafeContexts carry their own lock, the ContextPool::mutex just protects the pool
> > likewise, the ResourcePool<?>::mutexes just protect the associated resource pool
The following is a leaf lock (level 2), and only acquires level 1 locks (safepoint) internally:

Expand Down
185 changes: 112 additions & 73 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -494,58 +494,6 @@ static auto countBasicBlocks(const Function &F)
return std::distance(F.begin(), F.end());
}

OptimizerResultT JuliaOJIT::OptimizerT::operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) {
TSM.withModuleDo([&](Module &M) {
uint64_t start_time = 0;
if (dump_llvm_opt_stream != NULL) {
// Print LLVM function statistics _before_ optimization
// Print all the information about this invocation as a YAML object
jl_printf(dump_llvm_opt_stream, "- \n");
// We print the name and some statistics for each function in the module, both
// before optimization and again afterwards.
jl_printf(dump_llvm_opt_stream, " before: \n");
for (auto &F : M.functions()) {
if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
continue;
}
// Each function is printed as a YAML object with several attributes
jl_printf(dump_llvm_opt_stream, " \"%s\":\n", F.getName().str().c_str());
jl_printf(dump_llvm_opt_stream, " instructions: %u\n", F.getInstructionCount());
jl_printf(dump_llvm_opt_stream, " basicblocks: %lu\n", countBasicBlocks(F));
}

start_time = jl_hrtime();
}

JL_TIMING(LLVM_OPT);

{
//Lock around our pass manager
std::lock_guard<std::mutex> lock(this->mutex);
PM.run(M);
}

uint64_t end_time = 0;
if (dump_llvm_opt_stream != NULL) {
end_time = jl_hrtime();
jl_printf(dump_llvm_opt_stream, " time_ns: %" PRIu64 "\n", end_time - start_time);
jl_printf(dump_llvm_opt_stream, " optlevel: %d\n", optlevel);

// Print LLVM function statistics _after_ optimization
jl_printf(dump_llvm_opt_stream, " after: \n");
for (auto &F : M.functions()) {
if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
continue;
}
jl_printf(dump_llvm_opt_stream, " \"%s\":\n", F.getName().str().c_str());
jl_printf(dump_llvm_opt_stream, " instructions: %u\n", F.getInstructionCount());
jl_printf(dump_llvm_opt_stream, " basicblocks: %lu\n", countBasicBlocks(F));
}
}
});
return Expected<orc::ThreadSafeModule>{std::move(TSM)};
}

void JuliaOJIT::OptSelLayerT::emit(std::unique_ptr<orc::MaterializationResponsibility> R, orc::ThreadSafeModule TSM) {
size_t optlevel = ~0ull;
TSM.withModuleDo([&](Module &M) {
Expand All @@ -570,7 +518,7 @@ void JuliaOJIT::OptSelLayerT::emit(std::unique_ptr<orc::MaterializationResponsib
}
});
assert(optlevel != ~0ull && "Failed to select a valid optimization level!");
this->optimizers[optlevel].emit(std::move(R), std::move(TSM));
this->optimizers[optlevel]->OptimizeLayer.emit(std::move(R), std::move(TSM));
}

void jl_register_jit_object(const object::ObjectFile &debugObj,
Expand Down Expand Up @@ -911,6 +859,106 @@ namespace {
.setCodeModel(TM.getCodeModel())
.setCodeGenOptLevel(CodeGenOptLevelFor(optlevel));
}

struct TMCreator {
orc::JITTargetMachineBuilder JTMB;

TMCreator(TargetMachine &TM, int optlevel) : JTMB(createJTMBFromTM(TM, optlevel)) {}

std::unique_ptr<TargetMachine> operator()() {
return cantFail(JTMB.createTargetMachine());
}
};

struct PMCreator {
std::unique_ptr<TargetMachine> TM;
int optlevel;
PMCreator(TargetMachine &TM, int optlevel) : TM(cantFail(createJTMBFromTM(TM, optlevel).createTargetMachine())), optlevel(optlevel) {}
PMCreator(const PMCreator &other) : PMCreator(*other.TM, other.optlevel) {}
PMCreator(PMCreator &&other) : TM(std::move(other.TM)), optlevel(other.optlevel) {}
friend void swap(PMCreator &self, PMCreator &other) {
using std::swap;
swap(self.TM, other.TM);
swap(self.optlevel, other.optlevel);
}
PMCreator &operator=(PMCreator other) {
swap(*this, other);
return *this;
}
std::unique_ptr<legacy::PassManager> operator()() {
auto PM = std::make_unique<legacy::PassManager>();
addPassesForOptLevel(*PM, *TM, optlevel);
return PM;
}
};

struct OptimizerT {
OptimizerT(TargetMachine &TM, int optlevel) : optlevel(optlevel), PMs(PMCreator(TM, optlevel)) {}

OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) {
TSM.withModuleDo([&](Module &M) {
uint64_t start_time = 0;
if (dump_llvm_opt_stream != NULL) {
// Print LLVM function statistics _before_ optimization
// Print all the information about this invocation as a YAML object
jl_printf(dump_llvm_opt_stream, "- \n");
// We print the name and some statistics for each function in the module, both
// before optimization and again afterwards.
jl_printf(dump_llvm_opt_stream, " before: \n");
for (auto &F : M.functions()) {
if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
continue;
}
// Each function is printed as a YAML object with several attributes
jl_printf(dump_llvm_opt_stream, " \"%s\":\n", F.getName().str().c_str());
jl_printf(dump_llvm_opt_stream, " instructions: %u\n", F.getInstructionCount());
jl_printf(dump_llvm_opt_stream, " basicblocks: %lu\n", countBasicBlocks(F));
}

start_time = jl_hrtime();
}

JL_TIMING(LLVM_OPT);

//Run the optimization
(***PMs).run(M);

uint64_t end_time = 0;
if (dump_llvm_opt_stream != NULL) {
end_time = jl_hrtime();
jl_printf(dump_llvm_opt_stream, " time_ns: %" PRIu64 "\n", end_time - start_time);
jl_printf(dump_llvm_opt_stream, " optlevel: %d\n", optlevel);

// Print LLVM function statistics _after_ optimization
jl_printf(dump_llvm_opt_stream, " after: \n");
for (auto &F : M.functions()) {
if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
continue;
}
jl_printf(dump_llvm_opt_stream, " \"%s\":\n", F.getName().str().c_str());
jl_printf(dump_llvm_opt_stream, " instructions: %u\n", F.getInstructionCount());
jl_printf(dump_llvm_opt_stream, " basicblocks: %lu\n", countBasicBlocks(F));
}
}
});
return Expected<orc::ThreadSafeModule>{std::move(TSM)};
}
private:
int optlevel;
JuliaOJIT::ResourcePool<std::unique_ptr<legacy::PassManager>> PMs;
};

struct CompilerT : orc::IRCompileLayer::IRCompiler {

CompilerT(orc::IRSymbolMapper::ManglingOptions MO, TargetMachine &TM, int optlevel)
: orc::IRCompileLayer::IRCompiler(MO), TMs(TMCreator(TM, optlevel)) {}

Expected<std::unique_ptr<MemoryBuffer>> operator()(Module &M) override {
return orc::SimpleCompiler(***TMs)(M);
}

JuliaOJIT::ResourcePool<std::unique_ptr<TargetMachine>> TMs;
};
}

llvm::DataLayout jl_create_datalayout(TargetMachine &TM) {
Expand All @@ -920,15 +968,14 @@ llvm::DataLayout jl_create_datalayout(TargetMachine &TM) {
return jl_data_layout;
}

JuliaOJIT::PipelineT::PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel)
: CompileLayer(BaseLayer.getExecutionSession(), BaseLayer,
std::make_unique<CompilerT>(orc::irManglingOptionsFromTargetOptions(TM.Options), TM, optlevel)),
OptimizeLayer(CompileLayer.getExecutionSession(), CompileLayer, OptimizerT(TM, optlevel)) {}

JuliaOJIT::JuliaOJIT()
: TM(createTargetMachine()),
DL(jl_create_datalayout(*TM)),
TMs{
cantFail(createJTMBFromTM(*TM, 0).createTargetMachine()),
cantFail(createJTMBFromTM(*TM, 1).createTargetMachine()),
cantFail(createJTMBFromTM(*TM, 2).createTargetMachine()),
cantFail(createJTMBFromTM(*TM, 3).createTargetMachine())
},
#if JL_LLVM_VERSION >= 130000
ES(cantFail(orc::SelfExecutorProcessControl::Create())),
#else
Expand All @@ -955,17 +1002,13 @@ JuliaOJIT::JuliaOJIT()
}
),
#endif
CompileLayer0(ES, ObjectLayer, std::make_unique<orc::ConcurrentIRCompiler>(createJTMBFromTM(*TM, 0))),
CompileLayer1(ES, ObjectLayer, std::make_unique<orc::ConcurrentIRCompiler>(createJTMBFromTM(*TM, 1))),
CompileLayer2(ES, ObjectLayer, std::make_unique<orc::ConcurrentIRCompiler>(createJTMBFromTM(*TM, 2))),
CompileLayer3(ES, ObjectLayer, std::make_unique<orc::ConcurrentIRCompiler>(createJTMBFromTM(*TM, 3))),
OptimizeLayers{
{ES, CompileLayer0, OptimizerT(PM0, PM_mutexes[0], 0)},
{ES, CompileLayer1, OptimizerT(PM1, PM_mutexes[1], 1)},
{ES, CompileLayer2, OptimizerT(PM2, PM_mutexes[2], 2)},
{ES, CompileLayer3, OptimizerT(PM3, PM_mutexes[3], 3)},
Pipelines{
std::make_unique<PipelineT>(ObjectLayer, *TM, 0),
std::make_unique<PipelineT>(ObjectLayer, *TM, 1),
std::make_unique<PipelineT>(ObjectLayer, *TM, 2),
std::make_unique<PipelineT>(ObjectLayer, *TM, 3),
},
OptSelLayer(OptimizeLayers)
OptSelLayer(Pipelines)
{
#ifdef JL_USE_JITLINK
# if defined(_OS_DARWIN_) && defined(LLVM_SHLIB)
Expand All @@ -987,10 +1030,6 @@ JuliaOJIT::JuliaOJIT()
registerRTDyldJITObject(Object, LO, MemMgr);
});
#endif
addPassesForOptLevel(PM0, *TMs[0], 0);
addPassesForOptLevel(PM1, *TMs[1], 1);
addPassesForOptLevel(PM2, *TMs[2], 2);
addPassesForOptLevel(PM3, *TMs[3], 3);

// Make sure SectionMemoryManager::getSymbolAddressInProcess can resolve
// symbols in the program as well. The nullptr argument to the function
Expand Down
63 changes: 26 additions & 37 deletions src/jitlayers.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,11 +193,10 @@ class JuliaOJIT {
typedef orc::IRCompileLayer CompileLayerT;
typedef orc::IRTransformLayer OptimizeLayerT;
typedef object::OwningBinary<object::ObjectFile> OwningObj;
private:
template<typename ResourceT, size_t max = 0>
struct ResourcePool {
public:
ResourcePool(function_ref<ResourceT()> creator) : creator(std::move(creator)), mutex(std::make_unique<WNMutex>()) {}
ResourcePool(std::function<ResourceT()> creator) : creator(std::move(creator)), mutex(std::make_unique<WNMutex>()) {}
class OwningResource {
public:
OwningResource(ResourcePool &pool, ResourceT resource) : pool(pool), resource(std::move(resource)) {}
Expand All @@ -206,7 +205,7 @@ class JuliaOJIT {
OwningResource(OwningResource &&) = default;
OwningResource &operator=(OwningResource &&) = default;
~OwningResource() {
if (resource) pool.release_(std::move(*resource));
if (resource) pool.release(std::move(*resource));
}
ResourceT release() {
ResourceT res(std::move(*resource));
Expand Down Expand Up @@ -242,11 +241,15 @@ class JuliaOJIT {
llvm::Optional<ResourceT> resource;
};

OwningResource acquire() {
return OwningResource(*this, acquire_());
OwningResource operator*() {
return OwningResource(*this, acquire());
}

OwningResource get() {
return **this;
}

ResourceT acquire_() {
ResourceT acquire() {
std::unique_lock<std::mutex> lock(mutex->mutex);
if (!pool.empty()) {
return pool.pop_back_val();
Expand All @@ -259,13 +262,13 @@ class JuliaOJIT {
assert(!pool.empty() && "Expected resource pool to have a value!");
return pool.pop_back_val();
}
void release_(ResourceT &&resource) {
void release(ResourceT &&resource) {
std::lock_guard<std::mutex> lock(mutex->mutex);
pool.push_back(std::move(resource));
mutex->empty.notify_one();
}
private:
llvm::function_ref<ResourceT()> creator;
std::function<ResourceT()> creator;
size_t created = 0;
llvm::SmallVector<ResourceT, max == 0 ? 8 : max> pool;
struct WNMutex {
Expand All @@ -275,33 +278,31 @@ class JuliaOJIT {

std::unique_ptr<WNMutex> mutex;
};
struct OptimizerT {
OptimizerT(legacy::PassManager &PM, std::mutex &mutex, int optlevel) : optlevel(optlevel), PM(PM), mutex(mutex) {}

OptimizerResultT operator()(orc::ThreadSafeModule M, orc::MaterializationResponsibility &R);
private:
int optlevel;
legacy::PassManager &PM;
std::mutex &mutex;
struct PipelineT {
PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel);
CompileLayerT CompileLayer;
OptimizeLayerT OptimizeLayer;
};
// Custom object emission notification handler for the JuliaOJIT
template <typename ObjT, typename LoadResult>
void registerObject(const ObjT &Obj, const LoadResult &LO);

struct OptSelLayerT : orc::IRLayer {

template<size_t N>
OptSelLayerT(OptimizeLayerT (&optimizers)[N]) : orc::IRLayer(optimizers[0].getExecutionSession(), optimizers[0].getManglingOptions()), optimizers(optimizers), count(N) {
OptSelLayerT(std::unique_ptr<PipelineT> (&optimizers)[N]) : orc::IRLayer(optimizers[0]->OptimizeLayer.getExecutionSession(), optimizers[0]->OptimizeLayer.getManglingOptions()), optimizers(optimizers), count(N) {
static_assert(N > 0, "Expected array with at least one optimizer!");
}

void emit(std::unique_ptr<orc::MaterializationResponsibility> R, orc::ThreadSafeModule TSM) override;

private:
OptimizeLayerT *optimizers;
std::unique_ptr<PipelineT> *optimizers;
size_t count;
};

private:
// Custom object emission notification handler for the JuliaOJIT
template <typename ObjT, typename LoadResult>
void registerObject(const ObjT &Obj, const LoadResult &LO);

public:

JuliaOJIT();
Expand All @@ -321,13 +322,13 @@ class JuliaOJIT {
uint64_t getFunctionAddress(StringRef Name);
StringRef getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *codeinst);
auto getContext() {
return ContextPool.acquire();
return *ContextPool;
}
orc::ThreadSafeContext acquireContext() {
return ContextPool.acquire_();
return ContextPool.acquire();
}
void releaseContext(orc::ThreadSafeContext &&ctx) {
ContextPool.release_(std::move(ctx));
ContextPool.release(std::move(ctx));
}
const DataLayout& getDataLayout() const;
TargetMachine &getTargetMachine();
Expand All @@ -340,14 +341,6 @@ class JuliaOJIT {

std::unique_ptr<TargetMachine> TM;
DataLayout DL;
// Should be big enough that in the common case, The
// object fits in its entirety
legacy::PassManager PM0; // per-optlevel pass managers
legacy::PassManager PM1;
legacy::PassManager PM2;
legacy::PassManager PM3;
std::mutex PM_mutexes[4];
std::unique_ptr<TargetMachine> TMs[4];

orc::ExecutionSession ES;
orc::JITDylib &GlobalJD;
Expand All @@ -359,11 +352,7 @@ class JuliaOJIT {
std::shared_ptr<RTDyldMemoryManager> MemMgr;
#endif
ObjLayerT ObjectLayer;
CompileLayerT CompileLayer0;
CompileLayerT CompileLayer1;
CompileLayerT CompileLayer2;
CompileLayerT CompileLayer3;
OptimizeLayerT OptimizeLayers[4];
std::unique_ptr<PipelineT> Pipelines[4];
OptSelLayerT OptSelLayer;

DenseMap<void*, std::string> ReverseLocalSymbolTable;
Expand Down

0 comments on commit c0c60e8

Please sign in to comment.