Skip to content

Commit

Permalink
[native] Add metrics for memory pushback mechanism
Browse files Browse the repository at this point in the history
  • Loading branch information
tanjialiang authored and xiaoxmeng committed Aug 13, 2024
1 parent 9db7e1b commit e782f02
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 45 deletions.
68 changes: 39 additions & 29 deletions presto-native-execution/presto_cpp/main/PeriodicMemoryChecker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,12 @@

#include "presto_cpp/main/PeriodicMemoryChecker.h"
#include "presto_cpp/main/common/Configs.h"
#include "presto_cpp/main/common/Counters.h"
#include "presto_cpp/main/common/Utils.h"
#include "velox/common/base/StatsReporter.h"
#include "velox/common/caching/AsyncDataCache.h"
#include "velox/common/memory/Memory.h"
#include "velox/common/time/Timer.h"

namespace facebook::presto {
PeriodicMemoryChecker::PeriodicMemoryChecker(Config config)
Expand Down Expand Up @@ -154,6 +157,7 @@ void PeriodicMemoryChecker::maybeDumpHeap() {
}

void PeriodicMemoryChecker::pushbackMemory() {
RECORD_METRIC_VALUE(kCounterMemoryPushbackCount);
const uint64_t currentMemBytes = systemUsedMemoryBytes();
VELOX_CHECK(config_.systemMemPushbackEnabled);
LOG(WARNING) << "System used memory " << velox::succinctBytes(currentMemBytes)
Expand All @@ -165,40 +169,46 @@ void PeriodicMemoryChecker::pushbackMemory() {
const uint64_t bytesToShrink = currentMemBytes - targetMemBytes;
VELOX_CHECK_GT(bytesToShrink, 0);

auto* cache = velox::cache::AsyncDataCache::getInstance();
auto systemConfig = SystemConfig::instance();
auto freedBytes = cache != nullptr ? cache->shrink(bytesToShrink) : 0;
if (freedBytes < bytesToShrink) {
try {
auto* memoryManager = velox::memory::memoryManager();
freedBytes += velox::memory::AllocationTraits::pageBytes(
memoryManager->allocator()->unmap(
velox::memory::AllocationTraits::numPages(
bytesToShrink - freedBytes)));
if (freedBytes < bytesToShrink &&
systemConfig->systemMemPushBackAbortEnabled()) {
memoryManager->shrinkPools(
bytesToShrink - freedBytes,
/*allowSpill=*/false,
/*allowAbort=*/true);
uint64_t latencyMs{0};
uint64_t freedBytes{0};
{
velox::MicrosecondTimer timer(&latencyMs);
auto* cache = velox::cache::AsyncDataCache::getInstance();
auto systemConfig = SystemConfig::instance();
freedBytes = cache != nullptr ? cache->shrink(bytesToShrink) : 0;
if (freedBytes < bytesToShrink) {
try {
auto* memoryManager = velox::memory::memoryManager();
freedBytes += velox::memory::AllocationTraits::pageBytes(
memoryManager->allocator()->unmap(
velox::memory::AllocationTraits::numPages(
bytesToShrink - freedBytes)));
if (freedBytes < bytesToShrink &&
systemConfig->systemMemPushBackAbortEnabled()) {
memoryManager->shrinkPools(
bytesToShrink - freedBytes,
/*allowSpill=*/false,
/*allowAbort=*/true);

// Try to shrink from cache again as aborted query might hold cache
// reference.
if (cache != nullptr) {
freedBytes += cache->shrink(bytesToShrink - freedBytes);
}
if (freedBytes < bytesToShrink) {
freedBytes += velox::memory::AllocationTraits::pageBytes(
memoryManager->allocator()->unmap(
velox::memory::AllocationTraits::numPages(
bytesToShrink - freedBytes)));
// Try to shrink from cache again as aborted query might hold cache
// reference.
if (cache != nullptr) {
freedBytes += cache->shrink(bytesToShrink - freedBytes);
}
if (freedBytes < bytesToShrink) {
freedBytes += velox::memory::AllocationTraits::pageBytes(
memoryManager->allocator()->unmap(
velox::memory::AllocationTraits::numPages(
bytesToShrink - freedBytes)));
}
}
} catch (const velox::VeloxException& ex) {
LOG(ERROR) << ex.what();
}
} catch (const velox::VeloxException& ex) {
LOG(ERROR) << ex.what();
}
}

RECORD_HISTOGRAM_METRIC_VALUE(
kCounterMemoryPushbackLatencyMs, latencyMs * 1000);
LOG(INFO) << "Shrunk " << velox::succinctBytes(freedBytes);
}
} // namespace facebook::presto
15 changes: 7 additions & 8 deletions presto-native-execution/presto_cpp/main/PeriodicTaskManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#include "presto_cpp/main/PrestoExchangeSource.h"
#include "presto_cpp/main/PrestoServer.h"
#include "presto_cpp/main/common/Counters.h"
#include "presto_cpp/main/http/HttpClient.h"
#include "presto_cpp/main/http/filters/HttpEndpointLatencyFilter.h"
#include "velox/common/base/PeriodicStatsReporter.h"
#include "velox/common/base/StatsReporter.h"
Expand Down Expand Up @@ -286,25 +285,25 @@ class HiveConnectorStatsReporter {
std::shared_ptr<velox::connector::hive::HiveConnector> connector)
: connector_(std::move(connector)),
numElementsMetricName_(fmt::format(
kCounterHiveFileHandleCacheNumElementsFormat,
kCounterHiveFileHandleCacheNumElementsFormat.toString(),
connector_->connectorId())),
pinnedSizeMetricName_(fmt::format(
kCounterHiveFileHandleCachePinnedSizeFormat,
kCounterHiveFileHandleCachePinnedSizeFormat.toString(),
connector_->connectorId())),
curSizeMetricName_(fmt::format(
kCounterHiveFileHandleCacheCurSizeFormat,
kCounterHiveFileHandleCacheCurSizeFormat.toString(),
connector_->connectorId())),
numAccumulativeHitsMetricName_(fmt::format(
kCounterHiveFileHandleCacheNumAccumulativeHitsFormat,
kCounterHiveFileHandleCacheNumAccumulativeHitsFormat.toString(),
connector_->connectorId())),
numAccumulativeLookupsMetricName_(fmt::format(
kCounterHiveFileHandleCacheNumAccumulativeLookupsFormat,
kCounterHiveFileHandleCacheNumAccumulativeLookupsFormat.toString(),
connector_->connectorId())),
numHitsMetricName_(fmt::format(
kCounterHiveFileHandleCacheNumHitsFormat,
kCounterHiveFileHandleCacheNumHitsFormat.toString(),
connector_->connectorId())),
numLookupsMetricName_(fmt::format(
kCounterHiveFileHandleCacheNumLookupsFormat,
kCounterHiveFileHandleCacheNumLookupsFormat.toString(),
connector_->connectorId())) {
DEFINE_METRIC(numElementsMetricName_, velox::StatType::AVG);
DEFINE_METRIC(pinnedSizeMetricName_, velox::StatType::AVG);
Expand Down
3 changes: 3 additions & 0 deletions presto-native-execution/presto_cpp/main/common/Counters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ void registerPrestoMetrics() {
95,
99,
100);
DEFINE_METRIC(kCounterMemoryPushbackCount, facebook::velox::StatType::COUNT);
DEFINE_HISTOGRAM_METRIC(
kCounterMemoryPushbackLatencyMs, 10'000, 0, 100'000, 50, 90, 99, 100);

// NOTE: Metrics type exporting for file handle cache counters are in
// PeriodicTaskManager because they have dynamic names. The following counters
Expand Down
28 changes: 20 additions & 8 deletions presto-native-execution/presto_cpp/main/common/Counters.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,21 +136,33 @@ constexpr folly::StringPiece kCounterOsNumForcedContextSwitches{
"presto_cpp.os_num_forced_context_switches"};

/// ================== HiveConnector Counters ==================

/// Format template strings use 'constexpr std::string_view' to be 'fmt::format'
/// compatible.
constexpr std::string_view kCounterHiveFileHandleCacheNumElementsFormat{
constexpr folly::StringPiece kCounterHiveFileHandleCacheNumElementsFormat{
"presto_cpp.{}.hive_file_handle_cache_num_elements"};
constexpr std::string_view kCounterHiveFileHandleCachePinnedSizeFormat{
constexpr folly::StringPiece kCounterHiveFileHandleCachePinnedSizeFormat{
"presto_cpp.{}.hive_file_handle_cache_pinned_size"};
constexpr std::string_view kCounterHiveFileHandleCacheCurSizeFormat{
constexpr folly::StringPiece kCounterHiveFileHandleCacheCurSizeFormat{
"presto_cpp.{}.hive_file_handle_cache_cur_size"};
constexpr std::string_view kCounterHiveFileHandleCacheNumAccumulativeHitsFormat{
"presto_cpp.{}.hive_file_handle_cache_num_accumulative_hits"};
constexpr std::string_view
constexpr folly::StringPiece
kCounterHiveFileHandleCacheNumAccumulativeHitsFormat{
"presto_cpp.{}.hive_file_handle_cache_num_accumulative_hits"};
constexpr folly::StringPiece
kCounterHiveFileHandleCacheNumAccumulativeLookupsFormat{
"presto_cpp.{}.hive_file_handle_cache_num_accumulative_lookups"};
constexpr std::string_view kCounterHiveFileHandleCacheNumHitsFormat{
constexpr folly::StringPiece kCounterHiveFileHandleCacheNumHitsFormat{
"presto_cpp.{}.hive_file_handle_cache_num_hits"};
constexpr std::string_view kCounterHiveFileHandleCacheNumLookupsFormat{
constexpr folly::StringPiece kCounterHiveFileHandleCacheNumLookupsFormat{
"presto_cpp.{}.hive_file_handle_cache_num_lookups"};

/// ================== Memory Pushback Counters =================

/// Number of times memory pushback mechanism is triggered.
constexpr folly::StringPiece kCounterMemoryPushbackCount{
"presto_cpp.memory_pushback_count"};
/// Latency distribution of each memory pushback run in range of [0, 100s] and
/// reports P50, P90, P99, and P100.
constexpr folly::StringPiece kCounterMemoryPushbackLatencyMs{
"presto_cpp.memory_pushback_latency_ms"};
} // namespace facebook::presto

0 comments on commit e782f02

Please sign in to comment.