From 60cd3eb880fe48d192a58c64a1e38e875fc65377 Mon Sep 17 00:00:00 2001 From: Kevin Frei Date: Tue, 2 Jul 2024 10:14:26 -0700 Subject: [PATCH] Reduce llvm-gsymutil memory usage (#91023) llvm-gsymutil eats a lot of RAM. On some large binaries, it causes OOM's on smaller hardware, consuming well over 64GB of RAM. This change frees line tables once we're done with them, and frees DWARFUnits's DIE's when we finish processing each DU, though they may get reconstituted if there are references from other DU's during processing. Once the conversion is complete, all DIE's are freed. The reduction in peak memory usage from these changes showed between 7-12% in my tests. The double-checked locking around the creation & freeing of the data structures was tested on a 166 core system. I validated that it trivially malfunctioned without the locks (and with stupid reordering of the locks) and worked reliably with them. --------- Co-authored-by: Kevin Frei --- llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h | 23 +++++- llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp | 81 ++++++++++++++++--- llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp | 15 +++- 3 files changed, 105 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h index 80c27aea893123..26ef7db718dd54 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -22,6 +22,7 @@ #include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h" #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" #include "llvm/Support/DataExtractor.h" +#include "llvm/Support/RWMutex.h" #include #include #include @@ -257,6 +258,10 @@ class DWARFUnit { std::shared_ptr DWO; + mutable llvm::sys::RWMutex FreeDIEsMutex; + mutable llvm::sys::RWMutex ExtractCUDieMutex; + mutable llvm::sys::RWMutex ExtractNonCUDIEsMutex; + protected: friend dwarf_linker::parallel::CompileUnit; @@ -566,6 +571,9 @@ class DWARFUnit { Error tryExtractDIEsIfNeeded(bool CUDieOnly); + /// clearDIEs - Clear parsed DIEs to keep memory usage low. + void clearDIEs(bool KeepCUDie); + private: /// Size in bytes of the .debug_info data associated with this compile unit. size_t getDebugInfoSize() const { @@ -577,13 +585,22 @@ class DWARFUnit { /// hasn't already been done void extractDIEsIfNeeded(bool CUDieOnly); + /// extracCUDieIfNeeded - Parse CU DIE if it hasn't already been done. + /// Only to be used from extractDIEsIfNeeded, which holds the correct locks. + bool extractCUDieIfNeeded(bool CUDieOnly, bool &HasCUDie); + + /// extractNonCUDIEsIfNeeded - Parses non-CU DIE's for a given CU if needed. + /// Only to be used from extractDIEsIfNeeded, which holds the correct locks. + Error extractNonCUDIEsIfNeeded(bool HasCUDie); + + /// extractNonCUDIEsHelper - helper to be invoked *only* from inside + /// tryExtractDIEsIfNeeded, which holds the correct locks. + Error extractNonCUDIEsHelper(); + /// extractDIEsToVector - Appends all parsed DIEs to a vector. void extractDIEsToVector(bool AppendCUDie, bool AppendNonCUDIEs, std::vector &DIEs) const; - /// clearDIEs - Clear parsed DIEs to keep memory usage low. - void clearDIEs(bool KeepCUDie); - /// parseDWO - Parses .dwo file for current compile unit. Returns true if /// it was actually constructed. /// The \p AlternativeLocation specifies an alternative location to get diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp index bdd04b00f557bd..2760cef7edfdbc 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -495,21 +495,78 @@ void DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) { Context.getRecoverableErrorHandler()(std::move(e)); } -Error DWARFUnit::tryExtractDIEsIfNeeded(bool CUDieOnly) { - if ((CUDieOnly && !DieArray.empty()) || - DieArray.size() > 1) - return Error::success(); // Already parsed. +static bool DoubleCheckedRWLocker(llvm::sys::RWMutex &Mutex, + const std::function &reader, + const std::function &writer) { + { + llvm::sys::ScopedReader Lock(Mutex); + if (reader()) + return true; + } + llvm::sys::ScopedWriter Lock(Mutex); + if (reader()) + return true; + // If we get here, then the reader function returned false. This means that + // no one else is currently writing to this data structure and it's safe for + // us to write to it now. The scoped writer lock guarantees there are no + // other readers or writers at this point. + writer(); + return false; +} - bool HasCUDie = !DieArray.empty(); - extractDIEsToVector(!HasCUDie, !CUDieOnly, DieArray); +// Helper to safely check if the Compile-Unit DIE has been extracted already. +// If not, then extract it, and return false, indicating that it was *not* +// already extracted. +bool DWARFUnit::extractCUDieIfNeeded(bool CUDieOnly, bool &HasCUDie) { + return DoubleCheckedRWLocker( + ExtractCUDieMutex, + // Calculate if the CU DIE has been extracted already. + [&]() { + return ((CUDieOnly && !DieArray.empty()) || DieArray.size() > 1); + }, + // Lambda to extract the CU DIE. + [&]() { + HasCUDie = !DieArray.empty(); + extractDIEsToVector(!HasCUDie, !CUDieOnly, DieArray); + }); +} - if (DieArray.empty()) - return Error::success(); +// Helper to safely check if the non-Compile-Unit DIEs have been parsed +// already. If they haven't been parsed, go ahead and parse them. +Error DWARFUnit::extractNonCUDIEsIfNeeded(bool HasCUDie) { + Error Result = Error::success(); + DoubleCheckedRWLocker( + ExtractNonCUDIEsMutex, + // Lambda to check if all DIEs have been extracted already. + [=]() { return (DieArray.empty() || HasCUDie); }, + // Lambda to extract all the DIEs using the helper function + [&]() { + if (Error E = extractNonCUDIEsHelper()) { + // Consume the success placeholder and save the actual error + consumeError(std::move(Result)); + Result = std::move(E); + } + }); + return Result; +} - // If CU DIE was just parsed, copy several attribute values from it. - if (HasCUDie) +Error DWARFUnit::tryExtractDIEsIfNeeded(bool CUDieOnly) { + // Acquire the FreeDIEsMutex lock (in read-mode) to prevent the Compile Unit + // DIE from being freed by a thread calling clearDIEs() after the CU DIE was + // parsed, but before the rest of the DIEs are parsed, as there are no other + // locks held during that brief period. + llvm::sys::ScopedReader FreeLock(FreeDIEsMutex); + bool HasCUDie = false; + if (extractCUDieIfNeeded(CUDieOnly, HasCUDie)) return Error::success(); + // Right here is where the above-mentioned race condition exists. + return extractNonCUDIEsIfNeeded(HasCUDie); +} +// Helper used from the tryExtractDIEsIfNeeded function: it must already have +// acquired the ExtractNonCUDIEsMutex for writing. +Error DWARFUnit::extractNonCUDIEsHelper() { + // If CU DIE was just parsed, copy several attribute values from it. DWARFDie UnitDie(this, &DieArray[0]); if (std::optional DWOId = toUnsigned(UnitDie.find(DW_AT_GNU_dwo_id))) @@ -653,6 +710,10 @@ bool DWARFUnit::parseDWO(StringRef DWOAlternativeLocation) { } void DWARFUnit::clearDIEs(bool KeepCUDie) { + // We need to acquire the FreeDIEsMutex lock in write-mode, because we are + // going to free the DIEs, when other threads might be trying to create them. + llvm::sys::ScopedWriter FreeLock(FreeDIEsMutex); + // Do not use resize() + shrink_to_fit() to free memory occupied by dies. // shrink_to_fit() is a *non-binding* request to reduce capacity() to size(). // It depends on the implementation whether the request is fulfilled. diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp index 601686fdd3dd51..e1b30648b6a773 100644 --- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp +++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp @@ -587,6 +587,11 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) { DWARFDie Die = getDie(*CU); CUInfo CUI(DICtx, dyn_cast(CU.get())); handleDie(Out, CUI, Die); + // Release the line table, once we're done. + DICtx.clearLineTableForUnit(CU.get()); + // Free any DIEs that were allocated by the DWARF parser. + // If/when they're needed by other CU's, they'll be recreated. + CU->clearDIEs(/*KeepCUDie=*/false); } } else { // LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up @@ -612,11 +617,16 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) { DWARFDie Die = getDie(*CU); if (Die) { CUInfo CUI(DICtx, dyn_cast(CU.get())); - pool.async([this, CUI, &LogMutex, &Out, Die]() mutable { + pool.async([this, CUI, &CU, &LogMutex, &Out, Die]() mutable { std::string storage; raw_string_ostream StrStream(storage); OutputAggregator ThreadOut(Out.GetOS() ? &StrStream : nullptr); handleDie(ThreadOut, CUI, Die); + // Release the line table once we're done. + DICtx.clearLineTableForUnit(CU.get()); + // Free any DIEs that were allocated by the DWARF parser. + // If/when they're needed by other CU's, they'll be recreated. + CU->clearDIEs(/*KeepCUDie=*/false); // Print ThreadLogStorage lines into an actual stream under a lock std::lock_guard guard(LogMutex); if (Out.GetOS()) { @@ -629,6 +639,9 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) { } pool.wait(); } + // Now get rid of all the DIEs that may have been recreated + for (const auto &CU : DICtx.compile_units()) + CU->clearDIEs(/*KeepCUDie=*/false); size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore; Out << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n"; return Error::success();