Skip to content

Commit

Permalink
Reduce llvm-gsymutil memory usage (llvm#91023)
Browse files Browse the repository at this point in the history
llvm-gsymutil eats a lot of RAM. On some large binaries, it causes OOM's on smaller hardware, consuming well over 64GB of RAM. This change frees line tables once we're done with them, and frees DWARFUnits's DIE's when we finish processing each DU, though they may get reconstituted if there are references from other DU's during processing. Once the conversion is complete, all DIE's are freed. The reduction in peak memory usage from these changes showed between 7-12% in my tests.

The double-checked locking around the creation & freeing of the data structures was tested on a 166 core system. I validated that it trivially malfunctioned without the locks (and with stupid reordering of the locks) and worked reliably with them.

---------

Co-authored-by: Kevin Frei <[email protected]>
  • Loading branch information
kevinfrei and kevinfrei authored Jul 2, 2024
1 parent e94a00c commit 60cd3eb
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 14 deletions.
23 changes: 20 additions & 3 deletions llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h"
#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/RWMutex.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
Expand Down Expand Up @@ -257,6 +258,10 @@ class DWARFUnit {

std::shared_ptr<DWARFUnit> DWO;

mutable llvm::sys::RWMutex FreeDIEsMutex;
mutable llvm::sys::RWMutex ExtractCUDieMutex;
mutable llvm::sys::RWMutex ExtractNonCUDIEsMutex;

protected:
friend dwarf_linker::parallel::CompileUnit;

Expand Down Expand Up @@ -566,6 +571,9 @@ class DWARFUnit {

Error tryExtractDIEsIfNeeded(bool CUDieOnly);

/// clearDIEs - Clear parsed DIEs to keep memory usage low.
void clearDIEs(bool KeepCUDie);

private:
/// Size in bytes of the .debug_info data associated with this compile unit.
size_t getDebugInfoSize() const {
Expand All @@ -577,13 +585,22 @@ class DWARFUnit {
/// hasn't already been done
void extractDIEsIfNeeded(bool CUDieOnly);

/// extracCUDieIfNeeded - Parse CU DIE if it hasn't already been done.
/// Only to be used from extractDIEsIfNeeded, which holds the correct locks.
bool extractCUDieIfNeeded(bool CUDieOnly, bool &HasCUDie);

/// extractNonCUDIEsIfNeeded - Parses non-CU DIE's for a given CU if needed.
/// Only to be used from extractDIEsIfNeeded, which holds the correct locks.
Error extractNonCUDIEsIfNeeded(bool HasCUDie);

/// extractNonCUDIEsHelper - helper to be invoked *only* from inside
/// tryExtractDIEsIfNeeded, which holds the correct locks.
Error extractNonCUDIEsHelper();

/// extractDIEsToVector - Appends all parsed DIEs to a vector.
void extractDIEsToVector(bool AppendCUDie, bool AppendNonCUDIEs,
std::vector<DWARFDebugInfoEntry> &DIEs) const;

/// clearDIEs - Clear parsed DIEs to keep memory usage low.
void clearDIEs(bool KeepCUDie);

/// parseDWO - Parses .dwo file for current compile unit. Returns true if
/// it was actually constructed.
/// The \p AlternativeLocation specifies an alternative location to get
Expand Down
81 changes: 71 additions & 10 deletions llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -495,21 +495,78 @@ void DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
Context.getRecoverableErrorHandler()(std::move(e));
}

Error DWARFUnit::tryExtractDIEsIfNeeded(bool CUDieOnly) {
if ((CUDieOnly && !DieArray.empty()) ||
DieArray.size() > 1)
return Error::success(); // Already parsed.
static bool DoubleCheckedRWLocker(llvm::sys::RWMutex &Mutex,
const std::function<bool()> &reader,
const std::function<void()> &writer) {
{
llvm::sys::ScopedReader Lock(Mutex);
if (reader())
return true;
}
llvm::sys::ScopedWriter Lock(Mutex);
if (reader())
return true;
// If we get here, then the reader function returned false. This means that
// no one else is currently writing to this data structure and it's safe for
// us to write to it now. The scoped writer lock guarantees there are no
// other readers or writers at this point.
writer();
return false;
}

bool HasCUDie = !DieArray.empty();
extractDIEsToVector(!HasCUDie, !CUDieOnly, DieArray);
// Helper to safely check if the Compile-Unit DIE has been extracted already.
// If not, then extract it, and return false, indicating that it was *not*
// already extracted.
bool DWARFUnit::extractCUDieIfNeeded(bool CUDieOnly, bool &HasCUDie) {
return DoubleCheckedRWLocker(
ExtractCUDieMutex,
// Calculate if the CU DIE has been extracted already.
[&]() {
return ((CUDieOnly && !DieArray.empty()) || DieArray.size() > 1);
},
// Lambda to extract the CU DIE.
[&]() {
HasCUDie = !DieArray.empty();
extractDIEsToVector(!HasCUDie, !CUDieOnly, DieArray);
});
}

if (DieArray.empty())
return Error::success();
// Helper to safely check if the non-Compile-Unit DIEs have been parsed
// already. If they haven't been parsed, go ahead and parse them.
Error DWARFUnit::extractNonCUDIEsIfNeeded(bool HasCUDie) {
Error Result = Error::success();
DoubleCheckedRWLocker(
ExtractNonCUDIEsMutex,
// Lambda to check if all DIEs have been extracted already.
[=]() { return (DieArray.empty() || HasCUDie); },
// Lambda to extract all the DIEs using the helper function
[&]() {
if (Error E = extractNonCUDIEsHelper()) {
// Consume the success placeholder and save the actual error
consumeError(std::move(Result));
Result = std::move(E);
}
});
return Result;
}

// If CU DIE was just parsed, copy several attribute values from it.
if (HasCUDie)
Error DWARFUnit::tryExtractDIEsIfNeeded(bool CUDieOnly) {
// Acquire the FreeDIEsMutex lock (in read-mode) to prevent the Compile Unit
// DIE from being freed by a thread calling clearDIEs() after the CU DIE was
// parsed, but before the rest of the DIEs are parsed, as there are no other
// locks held during that brief period.
llvm::sys::ScopedReader FreeLock(FreeDIEsMutex);
bool HasCUDie = false;
if (extractCUDieIfNeeded(CUDieOnly, HasCUDie))
return Error::success();
// Right here is where the above-mentioned race condition exists.
return extractNonCUDIEsIfNeeded(HasCUDie);
}

// Helper used from the tryExtractDIEsIfNeeded function: it must already have
// acquired the ExtractNonCUDIEsMutex for writing.
Error DWARFUnit::extractNonCUDIEsHelper() {
// If CU DIE was just parsed, copy several attribute values from it.
DWARFDie UnitDie(this, &DieArray[0]);
if (std::optional<uint64_t> DWOId =
toUnsigned(UnitDie.find(DW_AT_GNU_dwo_id)))
Expand Down Expand Up @@ -653,6 +710,10 @@ bool DWARFUnit::parseDWO(StringRef DWOAlternativeLocation) {
}

void DWARFUnit::clearDIEs(bool KeepCUDie) {
// We need to acquire the FreeDIEsMutex lock in write-mode, because we are
// going to free the DIEs, when other threads might be trying to create them.
llvm::sys::ScopedWriter FreeLock(FreeDIEsMutex);

// Do not use resize() + shrink_to_fit() to free memory occupied by dies.
// shrink_to_fit() is a *non-binding* request to reduce capacity() to size().
// It depends on the implementation whether the request is fulfilled.
Expand Down
15 changes: 14 additions & 1 deletion llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,11 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
DWARFDie Die = getDie(*CU);
CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
handleDie(Out, CUI, Die);
// Release the line table, once we're done.
DICtx.clearLineTableForUnit(CU.get());
// Free any DIEs that were allocated by the DWARF parser.
// If/when they're needed by other CU's, they'll be recreated.
CU->clearDIEs(/*KeepCUDie=*/false);
}
} else {
// LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up
Expand All @@ -612,11 +617,16 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
DWARFDie Die = getDie(*CU);
if (Die) {
CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
pool.async([this, CUI, &LogMutex, &Out, Die]() mutable {
pool.async([this, CUI, &CU, &LogMutex, &Out, Die]() mutable {
std::string storage;
raw_string_ostream StrStream(storage);
OutputAggregator ThreadOut(Out.GetOS() ? &StrStream : nullptr);
handleDie(ThreadOut, CUI, Die);
// Release the line table once we're done.
DICtx.clearLineTableForUnit(CU.get());
// Free any DIEs that were allocated by the DWARF parser.
// If/when they're needed by other CU's, they'll be recreated.
CU->clearDIEs(/*KeepCUDie=*/false);
// Print ThreadLogStorage lines into an actual stream under a lock
std::lock_guard<std::mutex> guard(LogMutex);
if (Out.GetOS()) {
Expand All @@ -629,6 +639,9 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
}
pool.wait();
}
// Now get rid of all the DIEs that may have been recreated
for (const auto &CU : DICtx.compile_units())
CU->clearDIEs(/*KeepCUDie=*/false);
size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore;
Out << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n";
return Error::success();
Expand Down

0 comments on commit 60cd3eb

Please sign in to comment.