More efficient scans for sweep priority (#3410)

palantir · Jul 25, 2018 · 1d159bc · 1d159bc
1 parent d9f97ab
commit 1d159bc
Show file tree

Hide file tree

Showing 6 changed files with 91 additions and 16 deletions.
diff --git a/...db-client/src/main/java/com/palantir/atlasdb/keyvalue/impl/SweepStatsKeyValueService.java b/...db-client/src/main/java/com/palantir/atlasdb/keyvalue/impl/SweepStatsKeyValueService.java
@@ -16,11 +16,9 @@
 package com.palantir.atlasdb.keyvalue.impl;
 
 import java.util.Collection;
-import java.util.Collections;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
-import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
@@ -70,7 +68,7 @@
 public class SweepStatsKeyValueService extends ForwardingKeyValueService {
 
     private static final Logger log = LoggerFactory.getLogger(SweepStatsKeyValueService.class);
-    private static final int CLEAR_WEIGHT = 1 << 14;
+    private static final int CLEAR_WEIGHT = 1 << 14; // 16384
     private static final long FLUSH_DELAY_SECONDS = 42;
 
     // This is gross and won't work if someone starts namespacing sweep differently
@@ -84,8 +82,7 @@ public class SweepStatsKeyValueService extends ForwardingKeyValueService {
 
     private final Multiset<TableReference> writesByTable = ConcurrentHashMultiset.create();
 
-    private final Set<TableReference> clearedTables = Collections.newSetFromMap(
-            new ConcurrentHashMap<TableReference, Boolean>());
+    private final Set<TableReference> clearedTables = Sets.newConcurrentHashSet();
 
     private final AtomicInteger totalModifications = new AtomicInteger();
     private final AtomicLong totalModificationsSize = new AtomicLong();

diff --git a/atlasdb-client/src/main/java/com/palantir/atlasdb/schema/SweepSchema.java b/atlasdb-client/src/main/java/com/palantir/atlasdb/schema/SweepSchema.java
@@ -63,6 +63,7 @@ private static Schema generateSchema() {
                 // was last swept.
                 column("cells_examined", "e", ValueType.VAR_LONG);
             conflictHandler(ConflictHandler.IGNORE_ALL);
+            rangeScanAllowed();
         }});
 
         schema.validate();

diff --git a/atlasdb-client/src/main/java/com/palantir/atlasdb/schema/generated/SweepPriorityTable.java b/atlasdb-client/src/main/java/com/palantir/atlasdb/schema/generated/SweepPriorityTable.java
@@ -1054,20 +1054,69 @@ public Iterator<Map.Entry<SweepPriorityRow, SweepPriorityNamedColumnValue<?>>> g
         });
     }
 
-    public BatchingVisitableView<SweepPriorityRowResult> getAllRowsUnordered() {
-        return getAllRowsUnordered(allColumns);
-    }
-
-    public BatchingVisitableView<SweepPriorityRowResult> getAllRowsUnordered(ColumnSelection columns) {
-        return BatchingVisitables.transform(t.getRange(tableRef, RangeRequest.builder().retainColumns(columns).build()),
-                new Function<RowResult<byte[]>, SweepPriorityRowResult>() {
+    public BatchingVisitableView<SweepPriorityRowResult> getRange(RangeRequest range) {
+        if (range.getColumnNames().isEmpty()) {
+            range = range.getBuilder().retainColumns(allColumns).build();
+        }
+        return BatchingVisitables.transform(t.getRange(tableRef, range), new Function<RowResult<byte[]>, SweepPriorityRowResult>() {
             @Override
             public SweepPriorityRowResult apply(RowResult<byte[]> input) {
                 return SweepPriorityRowResult.of(input);
             }
         });
     }
 
+    @Deprecated
+    public IterableView<BatchingVisitable<SweepPriorityRowResult>> getRanges(Iterable<RangeRequest> ranges) {
+        Iterable<BatchingVisitable<RowResult<byte[]>>> rangeResults = t.getRanges(tableRef, ranges);
+        return IterableView.of(rangeResults).transform(
+                new Function<BatchingVisitable<RowResult<byte[]>>, BatchingVisitable<SweepPriorityRowResult>>() {
+            @Override
+            public BatchingVisitable<SweepPriorityRowResult> apply(BatchingVisitable<RowResult<byte[]>> visitable) {
+                return BatchingVisitables.transform(visitable, new Function<RowResult<byte[]>, SweepPriorityRowResult>() {
+                    @Override
+                    public SweepPriorityRowResult apply(RowResult<byte[]> row) {
+                        return SweepPriorityRowResult.of(row);
+                    }
+                });
+            }
+        });
+    }
+
+    public <T> Stream<T> getRanges(Iterable<RangeRequest> ranges,
+                                   int concurrencyLevel,
+                                   BiFunction<RangeRequest, BatchingVisitable<SweepPriorityRowResult>, T> visitableProcessor) {
+        return t.getRanges(tableRef, ranges, concurrencyLevel,
+                (rangeRequest, visitable) -> visitableProcessor.apply(rangeRequest, BatchingVisitables.transform(visitable, SweepPriorityRowResult::of)));
+    }
+
+    public <T> Stream<T> getRanges(Iterable<RangeRequest> ranges,
+                                   BiFunction<RangeRequest, BatchingVisitable<SweepPriorityRowResult>, T> visitableProcessor) {
+        return t.getRanges(tableRef, ranges,
+                (rangeRequest, visitable) -> visitableProcessor.apply(rangeRequest, BatchingVisitables.transform(visitable, SweepPriorityRowResult::of)));
+    }
+
+    public Stream<BatchingVisitable<SweepPriorityRowResult>> getRangesLazy(Iterable<RangeRequest> ranges) {
+        Stream<BatchingVisitable<RowResult<byte[]>>> rangeResults = t.getRangesLazy(tableRef, ranges);
+        return rangeResults.map(visitable -> BatchingVisitables.transform(visitable, SweepPriorityRowResult::of));
+    }
+
+    public void deleteRange(RangeRequest range) {
+        deleteRanges(ImmutableSet.of(range));
+    }
+
+    public void deleteRanges(Iterable<RangeRequest> ranges) {
+        BatchingVisitables.concat(getRanges(ranges))
+                          .transform(SweepPriorityRowResult.getRowNameFun())
+                          .batchAccept(1000, new AbortingVisitor<List<SweepPriorityRow>, RuntimeException>() {
+            @Override
+            public boolean visit(List<SweepPriorityRow> rows) {
+                delete(rows);
+                return true;
+            }
+        });
+    }
+
     @Override
     public List<String> findConstraintFailures(Map<Cell, byte[]> writes,
                                                ConstraintCheckingTransaction transaction,
@@ -1165,5 +1214,5 @@ public List<String> findConstraintFailuresNoRead(Map<Cell, byte[]> writes,
      * {@link UnsignedBytes}
      * {@link ValueType}
      */
-    static String __CLASS_HASH = "TJJRvx5uQ/E1NQVzUnggYQ==";
+    static String __CLASS_HASH = "SQZh8Tiuo9hisseHEHNpnA==";
 }
diff --git a/atlasdb-impl-shared/src/main/java/com/palantir/atlasdb/compact/SweepHistoryProvider.java b/atlasdb-impl-shared/src/main/java/com/palantir/atlasdb/compact/SweepHistoryProvider.java
@@ -19,16 +19,23 @@
 import java.util.HashMap;
 import java.util.Map;
 
+import com.palantir.atlasdb.keyvalue.api.RangeRequest;
 import com.palantir.atlasdb.schema.generated.SweepPriorityTable;
+import com.palantir.atlasdb.schema.generated.SweepPriorityTable.SweepPriorityNamedColumn;
 import com.palantir.atlasdb.schema.generated.SweepTableFactory;
 import com.palantir.atlasdb.transaction.api.Transaction;
 
 class SweepHistoryProvider {
+
+    private static final int READ_BATCH_SIZE = 100;
+
     Map<String, Long> getHistory(Transaction tx) {
         Map<String, Long> tableToLastTimeSwept = new HashMap<>();
         SweepPriorityTable sweepPriorityTable = SweepTableFactory.of().getSweepPriorityTable(tx);
-        sweepPriorityTable.getAllRowsUnordered(SweepPriorityTable.getColumnSelection(
-                SweepPriorityTable.SweepPriorityNamedColumn.LAST_SWEEP_TIME))
+        sweepPriorityTable.getRange(RangeRequest.builder()
+                .retainColumns(SweepPriorityTable.getColumnSelection(SweepPriorityNamedColumn.LAST_SWEEP_TIME))
+                .batchHint(READ_BATCH_SIZE)
+                .build())
                 .forEach(row -> {
                     Long lastSweepTime = row.getLastSweepTime();
                     String tableName = row.getRowName().getFullTableName();

diff --git a/...impl-shared/src/main/java/com/palantir/atlasdb/sweep/priority/SweepPriorityStoreImpl.java b/...impl-shared/src/main/java/com/palantir/atlasdb/sweep/priority/SweepPriorityStoreImpl.java
@@ -20,12 +20,16 @@
 import java.util.OptionalLong;
 
 import com.google.common.collect.Collections2;
+import com.google.common.collect.Lists;
 import com.palantir.async.initializer.AsyncInitializer;
 import com.palantir.atlasdb.keyvalue.api.KeyValueService;
+import com.palantir.atlasdb.keyvalue.api.RangeRequest;
 import com.palantir.atlasdb.keyvalue.api.TableReference;
 import com.palantir.atlasdb.schema.SweepSchema;
 import com.palantir.atlasdb.schema.generated.SweepPriorityTable;
+import com.palantir.atlasdb.schema.generated.SweepPriorityTable.SweepPriorityNamedColumn;
 import com.palantir.atlasdb.schema.generated.SweepPriorityTable.SweepPriorityRow;
+import com.palantir.atlasdb.schema.generated.SweepPriorityTable.SweepPriorityRowResult;
 import com.palantir.atlasdb.schema.generated.SweepTableFactory;
 import com.palantir.atlasdb.table.description.Schemas;
 import com.palantir.atlasdb.transaction.api.Transaction;
@@ -52,6 +56,8 @@ protected String getInitializingClassName() {
 
     }
 
+    private static final int READ_BATCH_SIZE = 100;
+
     private final KeyValueService kvs;
     private final SweepTableFactory sweepTableFactory;
     private InitializingWrapper wrapper = new InitializingWrapper();
@@ -101,7 +107,18 @@ private void tryInitialize() {
 
     private List<SweepPriority> loadPriorities(Transaction tx) {
         SweepPriorityTable table = sweepTableFactory.getSweepPriorityTable(tx);
-        return table.getAllRowsUnordered().transform(SweepPriorityStoreImpl::hydrate).immutableCopy();
+
+        // Load a single column first for each row. This is a much more efficient query on Cassandra
+        // than the full table scan that occurs otherwise.
+        List<SweepPriorityRowResult> rows = table.getRange(RangeRequest.builder()
+                .retainColumns(SweepPriorityTable.getColumnSelection(SweepPriorityNamedColumn.CELLS_DELETED))
+                .batchHint(READ_BATCH_SIZE)
+                .build())
+                .immutableCopy();
+
+        // Fetch all columns for the above rows directly
+        return Lists.transform(table.getRows(Lists.transform(rows, SweepPriorityRowResult::getRowName)),
+                SweepPriorityStoreImpl::hydrate);
     }
 
     private static SweepPriority hydrate(SweepPriorityTable.SweepPriorityRowResult rr) {

diff --git a/docs/source/release_notes/release-notes.rst b/docs/source/release_notes/release-notes.rst
@@ -55,6 +55,10 @@ develop
            The reported outcomes for targeted sweep are: ``SUCCESS``, ``NOTHING_TO_SWEEP``, ``DISABLED``, ``NOT_ENOUGH_DB_NODES_ONLINE``, and ``ERROR``.
            (`Pull Request <https://github.com/palantir/atlasdb/pull/3399>`__)
 
+    *    - |improved|
+         - Changed the range scan behavior for the sweep priority table so that reads scan less data in Cassandra.
+           (`Pull Request <https://github.com/palantir/atlasdb/pull/3410>`__)
+
 =======
 v0.97.0
 =======