Skip to content
This repository has been archived by the owner on Nov 14, 2024. It is now read-only.

Commit

Permalink
More efficient scans for sweep priority (#3410)
Browse files Browse the repository at this point in the history
  • Loading branch information
sandorw authored Jul 25, 2018
1 parent d9f97ab commit 1d159bc
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,9 @@
package com.palantir.atlasdb.keyvalue.impl;

import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
Expand Down Expand Up @@ -70,7 +68,7 @@
public class SweepStatsKeyValueService extends ForwardingKeyValueService {

private static final Logger log = LoggerFactory.getLogger(SweepStatsKeyValueService.class);
private static final int CLEAR_WEIGHT = 1 << 14;
private static final int CLEAR_WEIGHT = 1 << 14; // 16384
private static final long FLUSH_DELAY_SECONDS = 42;

// This is gross and won't work if someone starts namespacing sweep differently
Expand All @@ -84,8 +82,7 @@ public class SweepStatsKeyValueService extends ForwardingKeyValueService {

private final Multiset<TableReference> writesByTable = ConcurrentHashMultiset.create();

private final Set<TableReference> clearedTables = Collections.newSetFromMap(
new ConcurrentHashMap<TableReference, Boolean>());
private final Set<TableReference> clearedTables = Sets.newConcurrentHashSet();

private final AtomicInteger totalModifications = new AtomicInteger();
private final AtomicLong totalModificationsSize = new AtomicLong();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ private static Schema generateSchema() {
// was last swept.
column("cells_examined", "e", ValueType.VAR_LONG);
conflictHandler(ConflictHandler.IGNORE_ALL);
rangeScanAllowed();
}});

schema.validate();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1054,20 +1054,69 @@ public Iterator<Map.Entry<SweepPriorityRow, SweepPriorityNamedColumnValue<?>>> g
});
}

public BatchingVisitableView<SweepPriorityRowResult> getAllRowsUnordered() {
return getAllRowsUnordered(allColumns);
}

public BatchingVisitableView<SweepPriorityRowResult> getAllRowsUnordered(ColumnSelection columns) {
return BatchingVisitables.transform(t.getRange(tableRef, RangeRequest.builder().retainColumns(columns).build()),
new Function<RowResult<byte[]>, SweepPriorityRowResult>() {
public BatchingVisitableView<SweepPriorityRowResult> getRange(RangeRequest range) {
if (range.getColumnNames().isEmpty()) {
range = range.getBuilder().retainColumns(allColumns).build();
}
return BatchingVisitables.transform(t.getRange(tableRef, range), new Function<RowResult<byte[]>, SweepPriorityRowResult>() {
@Override
public SweepPriorityRowResult apply(RowResult<byte[]> input) {
return SweepPriorityRowResult.of(input);
}
});
}

@Deprecated
public IterableView<BatchingVisitable<SweepPriorityRowResult>> getRanges(Iterable<RangeRequest> ranges) {
Iterable<BatchingVisitable<RowResult<byte[]>>> rangeResults = t.getRanges(tableRef, ranges);
return IterableView.of(rangeResults).transform(
new Function<BatchingVisitable<RowResult<byte[]>>, BatchingVisitable<SweepPriorityRowResult>>() {
@Override
public BatchingVisitable<SweepPriorityRowResult> apply(BatchingVisitable<RowResult<byte[]>> visitable) {
return BatchingVisitables.transform(visitable, new Function<RowResult<byte[]>, SweepPriorityRowResult>() {
@Override
public SweepPriorityRowResult apply(RowResult<byte[]> row) {
return SweepPriorityRowResult.of(row);
}
});
}
});
}

public <T> Stream<T> getRanges(Iterable<RangeRequest> ranges,
int concurrencyLevel,
BiFunction<RangeRequest, BatchingVisitable<SweepPriorityRowResult>, T> visitableProcessor) {
return t.getRanges(tableRef, ranges, concurrencyLevel,
(rangeRequest, visitable) -> visitableProcessor.apply(rangeRequest, BatchingVisitables.transform(visitable, SweepPriorityRowResult::of)));
}

public <T> Stream<T> getRanges(Iterable<RangeRequest> ranges,
BiFunction<RangeRequest, BatchingVisitable<SweepPriorityRowResult>, T> visitableProcessor) {
return t.getRanges(tableRef, ranges,
(rangeRequest, visitable) -> visitableProcessor.apply(rangeRequest, BatchingVisitables.transform(visitable, SweepPriorityRowResult::of)));
}

public Stream<BatchingVisitable<SweepPriorityRowResult>> getRangesLazy(Iterable<RangeRequest> ranges) {
Stream<BatchingVisitable<RowResult<byte[]>>> rangeResults = t.getRangesLazy(tableRef, ranges);
return rangeResults.map(visitable -> BatchingVisitables.transform(visitable, SweepPriorityRowResult::of));
}

public void deleteRange(RangeRequest range) {
deleteRanges(ImmutableSet.of(range));
}

public void deleteRanges(Iterable<RangeRequest> ranges) {
BatchingVisitables.concat(getRanges(ranges))
.transform(SweepPriorityRowResult.getRowNameFun())
.batchAccept(1000, new AbortingVisitor<List<SweepPriorityRow>, RuntimeException>() {
@Override
public boolean visit(List<SweepPriorityRow> rows) {
delete(rows);
return true;
}
});
}

@Override
public List<String> findConstraintFailures(Map<Cell, byte[]> writes,
ConstraintCheckingTransaction transaction,
Expand Down Expand Up @@ -1165,5 +1214,5 @@ public List<String> findConstraintFailuresNoRead(Map<Cell, byte[]> writes,
* {@link UnsignedBytes}
* {@link ValueType}
*/
static String __CLASS_HASH = "TJJRvx5uQ/E1NQVzUnggYQ==";
static String __CLASS_HASH = "SQZh8Tiuo9hisseHEHNpnA==";
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,23 @@
import java.util.HashMap;
import java.util.Map;

import com.palantir.atlasdb.keyvalue.api.RangeRequest;
import com.palantir.atlasdb.schema.generated.SweepPriorityTable;
import com.palantir.atlasdb.schema.generated.SweepPriorityTable.SweepPriorityNamedColumn;
import com.palantir.atlasdb.schema.generated.SweepTableFactory;
import com.palantir.atlasdb.transaction.api.Transaction;

class SweepHistoryProvider {

private static final int READ_BATCH_SIZE = 100;

Map<String, Long> getHistory(Transaction tx) {
Map<String, Long> tableToLastTimeSwept = new HashMap<>();
SweepPriorityTable sweepPriorityTable = SweepTableFactory.of().getSweepPriorityTable(tx);
sweepPriorityTable.getAllRowsUnordered(SweepPriorityTable.getColumnSelection(
SweepPriorityTable.SweepPriorityNamedColumn.LAST_SWEEP_TIME))
sweepPriorityTable.getRange(RangeRequest.builder()
.retainColumns(SweepPriorityTable.getColumnSelection(SweepPriorityNamedColumn.LAST_SWEEP_TIME))
.batchHint(READ_BATCH_SIZE)
.build())
.forEach(row -> {
Long lastSweepTime = row.getLastSweepTime();
String tableName = row.getRowName().getFullTableName();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,16 @@
import java.util.OptionalLong;

import com.google.common.collect.Collections2;
import com.google.common.collect.Lists;
import com.palantir.async.initializer.AsyncInitializer;
import com.palantir.atlasdb.keyvalue.api.KeyValueService;
import com.palantir.atlasdb.keyvalue.api.RangeRequest;
import com.palantir.atlasdb.keyvalue.api.TableReference;
import com.palantir.atlasdb.schema.SweepSchema;
import com.palantir.atlasdb.schema.generated.SweepPriorityTable;
import com.palantir.atlasdb.schema.generated.SweepPriorityTable.SweepPriorityNamedColumn;
import com.palantir.atlasdb.schema.generated.SweepPriorityTable.SweepPriorityRow;
import com.palantir.atlasdb.schema.generated.SweepPriorityTable.SweepPriorityRowResult;
import com.palantir.atlasdb.schema.generated.SweepTableFactory;
import com.palantir.atlasdb.table.description.Schemas;
import com.palantir.atlasdb.transaction.api.Transaction;
Expand All @@ -52,6 +56,8 @@ protected String getInitializingClassName() {

}

private static final int READ_BATCH_SIZE = 100;

private final KeyValueService kvs;
private final SweepTableFactory sweepTableFactory;
private InitializingWrapper wrapper = new InitializingWrapper();
Expand Down Expand Up @@ -101,7 +107,18 @@ private void tryInitialize() {

private List<SweepPriority> loadPriorities(Transaction tx) {
SweepPriorityTable table = sweepTableFactory.getSweepPriorityTable(tx);
return table.getAllRowsUnordered().transform(SweepPriorityStoreImpl::hydrate).immutableCopy();

// Load a single column first for each row. This is a much more efficient query on Cassandra
// than the full table scan that occurs otherwise.
List<SweepPriorityRowResult> rows = table.getRange(RangeRequest.builder()
.retainColumns(SweepPriorityTable.getColumnSelection(SweepPriorityNamedColumn.CELLS_DELETED))
.batchHint(READ_BATCH_SIZE)
.build())
.immutableCopy();

// Fetch all columns for the above rows directly
return Lists.transform(table.getRows(Lists.transform(rows, SweepPriorityRowResult::getRowName)),
SweepPriorityStoreImpl::hydrate);
}

private static SweepPriority hydrate(SweepPriorityTable.SweepPriorityRowResult rr) {
Expand Down
4 changes: 4 additions & 0 deletions docs/source/release_notes/release-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ develop
The reported outcomes for targeted sweep are: ``SUCCESS``, ``NOTHING_TO_SWEEP``, ``DISABLED``, ``NOT_ENOUGH_DB_NODES_ONLINE``, and ``ERROR``.
(`Pull Request <https://github.com/palantir/atlasdb/pull/3399>`__)

* - |improved|
- Changed the range scan behavior for the sweep priority table so that reads scan less data in Cassandra.
(`Pull Request <https://github.com/palantir/atlasdb/pull/3410>`__)

=======
v0.97.0
=======
Expand Down

0 comments on commit 1d159bc

Please sign in to comment.