Skip to content

Commit

Permalink
Merge pull request #11768 from dotnwat/space-management
Browse files Browse the repository at this point in the history
[storage] space management bug fixes and logging improvements
  • Loading branch information
dotnwat authored Jun 29, 2023
2 parents 0f05eb0 + 9f60e73 commit d761026
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 5 deletions.
30 changes: 27 additions & 3 deletions src/v/resource_mgmt/storage.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ ss::future<> disk_space_manager::run_loop() {
* upcall to start the monitor loop when it appears that we are getting
* close to an important threshold.
*/
constexpr auto frequency = std::chrono::seconds(5);
constexpr auto frequency = std::chrono::seconds(20);

while (!_gate.is_closed()) {
try {
Expand Down Expand Up @@ -126,6 +126,13 @@ set_partition_retention_offsets(cluster::partition_manager& pm, size_t target) {
partitions.push_back(p.second);
}

vlog(
rlog.info,
"Attempting to recover {} from {} remote partitions on core {}",
human::bytes(target),
partitions.size(),
ss::this_shard_id());

size_t partitions_total = 0;
for (const auto& p : partitions) {
if (partitions_total >= target) {
Expand All @@ -137,6 +144,13 @@ set_partition_retention_offsets(cluster::partition_manager& pm, size_t target) {
auto gate = log->gate().hold();

auto segments = log->cloud_gc_eligible_segments();

vlog(
rlog.info,
"Remote partition {} reports {} reclaimable segments",
p->ntp(),
segments.size());

if (segments.empty()) {
continue;
}
Expand All @@ -146,6 +160,14 @@ set_partition_retention_offsets(cluster::partition_manager& pm, size_t target) {
for (const auto& seg : segments) {
auto usage = co_await seg->persistent_size();
log_total += usage.total();
offset = seg->offsets().committed_offset;
vlog(
rlog.info,
"Collecting segment {}:{}-{} estimated to recover {}",
p->ntp(),
seg->offsets().base_offset(),
seg->offsets().committed_offset(),
human::bytes(usage.total()));
if (log_total >= target) {
break;
}
Expand All @@ -154,10 +176,12 @@ set_partition_retention_offsets(cluster::partition_manager& pm, size_t target) {
vlog(
rlog.info,
"Setting retention offset override {} estimated reclaim of {} for "
"cloud topic {}",
"cloud topic {}. Total reclaim {} of target {}.",
offset,
log_total,
p->ntp());
p->ntp(),
partitions_total,
target);

log->set_cloud_gc_offset(offset);
partitions_total += log_total;
Expand Down
2 changes: 1 addition & 1 deletion src/v/storage/disk_log_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2249,7 +2249,7 @@ disk_log_impl::cloud_gc_eligible_segments() {
if (seg->offsets().committed_offset <= max_collectible) {
segments.push_back(seg);
}
if (--remaining > 0) {
if (--remaining <= 0) {
break;
}
}
Expand Down
2 changes: 1 addition & 1 deletion tests/rptest/tests/full_disk_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,4 +593,4 @@ def target_size_reached():
return total < (15 * 2**20 + 2 * self.log_segment_size)

# give it plenty of time. on debug it is hella slow
wait_until(target_size_reached, timeout_sec=240, backoff_sec=5)
wait_until(target_size_reached, timeout_sec=120, backoff_sec=5)

0 comments on commit d761026

Please sign in to comment.