Skip to content
This repository has been archived by the owner on Nov 14, 2024. It is now read-only.

Commit

Permalink
[PDS-10{3841,4895}] Part 1: Revert Automated Stream Store Cleanup (#4434
Browse files Browse the repository at this point in the history
)

* Revert streamy change

* Update test

* Regenerate Schemas

* Add generated changelog entries

* Ignore test
  • Loading branch information
jeremyk-91 authored Nov 22, 2019
1 parent 1d9d15b commit 2c2fc3c
Show file tree
Hide file tree
Showing 8 changed files with 27 additions and 88 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -694,25 +694,10 @@ private void cellsCleanedUp() {
line("for (Cell cell : cells) {"); {
line("rows.add(", StreamMetadataRow, ".BYTES_HYDRATOR.hydrateFromBytes(cell.getRowName()));");
} line("}");
line(StreamIndexTable, " indexTable = tables.get", StreamIndexTable, "(t);");
line("Set<", StreamIndexRow, "> indexRows = rows.stream()");
line(" .map(", StreamMetadataRow, "::getId)");
line(" .map(", StreamIndexRow, "::of)");
line(" .collect(Collectors.toSet());");
line("Map<", StreamIndexRow, ", Iterator<", StreamIndexColumnValue, ">> referenceIteratorByStream");
line(" = indexTable.getRowsColumnRangeIterator(indexRows,");
line(" BatchColumnRangeSelection.create(PtBytes.EMPTY_BYTE_ARRAY, PtBytes.EMPTY_BYTE_ARRAY, 1));");
line("Set<", StreamMetadataRow, "> streamsWithNoReferences");
line(" = KeyedStream.stream(referenceIteratorByStream)");
line(" .filter(valueIterator -> !valueIterator.hasNext())");
line(" .keys() // (authorized)"); // required for large internal product
line(" .map(", StreamIndexRow, "::getId)");
line(" .map(", StreamMetadataRow, "::of)");
line(" .collect(Collectors.toSet());");
line("Map<", StreamMetadataRow, ", StreamMetadata> currentMetadata = metaTable.getMetadatas(rows);");
line("Set<", StreamId, "> toDelete = Sets.newHashSet();");
line("for (Map.Entry<", StreamMetadataRow, ", StreamMetadata> e : currentMetadata.entrySet()) {"); {
line("if (e.getValue().getStatus() != Status.STORED || streamsWithNoReferences.contains(e.getKey())) {"); {
line("if (e.getValue().getStatus() != Status.STORED) {"); {
line("toDelete.add(e.getKey().getId());");
} line("}");
} line("}");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,25 +32,10 @@ public boolean cellsCleanedUp(Transaction t, Set<Cell> cells) {
for (Cell cell : cells) {
rows.add(SnapshotsStreamMetadataTable.SnapshotsStreamMetadataRow.BYTES_HYDRATOR.hydrateFromBytes(cell.getRowName()));
}
SnapshotsStreamIdxTable indexTable = tables.getSnapshotsStreamIdxTable(t);
Set<SnapshotsStreamIdxTable.SnapshotsStreamIdxRow> indexRows = rows.stream()
.map(SnapshotsStreamMetadataTable.SnapshotsStreamMetadataRow::getId)
.map(SnapshotsStreamIdxTable.SnapshotsStreamIdxRow::of)
.collect(Collectors.toSet());
Map<SnapshotsStreamIdxTable.SnapshotsStreamIdxRow, Iterator<SnapshotsStreamIdxTable.SnapshotsStreamIdxColumnValue>> referenceIteratorByStream
= indexTable.getRowsColumnRangeIterator(indexRows,
BatchColumnRangeSelection.create(PtBytes.EMPTY_BYTE_ARRAY, PtBytes.EMPTY_BYTE_ARRAY, 1));
Set<SnapshotsStreamMetadataTable.SnapshotsStreamMetadataRow> streamsWithNoReferences
= KeyedStream.stream(referenceIteratorByStream)
.filter(valueIterator -> !valueIterator.hasNext())
.keys() // (authorized)
.map(SnapshotsStreamIdxTable.SnapshotsStreamIdxRow::getId)
.map(SnapshotsStreamMetadataTable.SnapshotsStreamMetadataRow::of)
.collect(Collectors.toSet());
Map<SnapshotsStreamMetadataTable.SnapshotsStreamMetadataRow, StreamMetadata> currentMetadata = metaTable.getMetadatas(rows);
Set<Long> toDelete = Sets.newHashSet();
for (Map.Entry<SnapshotsStreamMetadataTable.SnapshotsStreamMetadataRow, StreamMetadata> e : currentMetadata.entrySet()) {
if (e.getValue().getStatus() != Status.STORED || streamsWithNoReferences.contains(e.getKey())) {
if (e.getValue().getStatus() != Status.STORED) {
toDelete.add(e.getKey().getId());
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,25 +32,10 @@ public boolean cellsCleanedUp(Transaction t, Set<Cell> cells) {
for (Cell cell : cells) {
rows.add(DataStreamMetadataTable.DataStreamMetadataRow.BYTES_HYDRATOR.hydrateFromBytes(cell.getRowName()));
}
DataStreamIdxTable indexTable = tables.getDataStreamIdxTable(t);
Set<DataStreamIdxTable.DataStreamIdxRow> indexRows = rows.stream()
.map(DataStreamMetadataTable.DataStreamMetadataRow::getId)
.map(DataStreamIdxTable.DataStreamIdxRow::of)
.collect(Collectors.toSet());
Map<DataStreamIdxTable.DataStreamIdxRow, Iterator<DataStreamIdxTable.DataStreamIdxColumnValue>> referenceIteratorByStream
= indexTable.getRowsColumnRangeIterator(indexRows,
BatchColumnRangeSelection.create(PtBytes.EMPTY_BYTE_ARRAY, PtBytes.EMPTY_BYTE_ARRAY, 1));
Set<DataStreamMetadataTable.DataStreamMetadataRow> streamsWithNoReferences
= KeyedStream.stream(referenceIteratorByStream)
.filter(valueIterator -> !valueIterator.hasNext())
.keys() // (authorized)
.map(DataStreamIdxTable.DataStreamIdxRow::getId)
.map(DataStreamMetadataTable.DataStreamMetadataRow::of)
.collect(Collectors.toSet());
Map<DataStreamMetadataTable.DataStreamMetadataRow, StreamMetadata> currentMetadata = metaTable.getMetadatas(rows);
Set<Long> toDelete = Sets.newHashSet();
for (Map.Entry<DataStreamMetadataTable.DataStreamMetadataRow, StreamMetadata> e : currentMetadata.entrySet()) {
if (e.getValue().getStatus() != Status.STORED || streamsWithNoReferences.contains(e.getKey())) {
if (e.getValue().getStatus() != Status.STORED) {
toDelete.add(e.getKey().getId());
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,25 +32,10 @@ public boolean cellsCleanedUp(Transaction t, Set<Cell> cells) {
for (Cell cell : cells) {
rows.add(HotspottyDataStreamMetadataTable.HotspottyDataStreamMetadataRow.BYTES_HYDRATOR.hydrateFromBytes(cell.getRowName()));
}
HotspottyDataStreamIdxTable indexTable = tables.getHotspottyDataStreamIdxTable(t);
Set<HotspottyDataStreamIdxTable.HotspottyDataStreamIdxRow> indexRows = rows.stream()
.map(HotspottyDataStreamMetadataTable.HotspottyDataStreamMetadataRow::getId)
.map(HotspottyDataStreamIdxTable.HotspottyDataStreamIdxRow::of)
.collect(Collectors.toSet());
Map<HotspottyDataStreamIdxTable.HotspottyDataStreamIdxRow, Iterator<HotspottyDataStreamIdxTable.HotspottyDataStreamIdxColumnValue>> referenceIteratorByStream
= indexTable.getRowsColumnRangeIterator(indexRows,
BatchColumnRangeSelection.create(PtBytes.EMPTY_BYTE_ARRAY, PtBytes.EMPTY_BYTE_ARRAY, 1));
Set<HotspottyDataStreamMetadataTable.HotspottyDataStreamMetadataRow> streamsWithNoReferences
= KeyedStream.stream(referenceIteratorByStream)
.filter(valueIterator -> !valueIterator.hasNext())
.keys() // (authorized)
.map(HotspottyDataStreamIdxTable.HotspottyDataStreamIdxRow::getId)
.map(HotspottyDataStreamMetadataTable.HotspottyDataStreamMetadataRow::of)
.collect(Collectors.toSet());
Map<HotspottyDataStreamMetadataTable.HotspottyDataStreamMetadataRow, StreamMetadata> currentMetadata = metaTable.getMetadatas(rows);
Set<Long> toDelete = Sets.newHashSet();
for (Map.Entry<HotspottyDataStreamMetadataTable.HotspottyDataStreamMetadataRow, StreamMetadata> e : currentMetadata.entrySet()) {
if (e.getValue().getStatus() != Status.STORED || streamsWithNoReferences.contains(e.getKey())) {
if (e.getValue().getStatus() != Status.STORED) {
toDelete.add(e.getKey().getId());
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.awaitility.Awaitility;
import org.junit.After;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;

import com.palantir.atlasdb.keyvalue.api.Namespace;
Expand Down Expand Up @@ -94,17 +95,26 @@ public void targetedSweepLargeStreamsTest() {
}

@Test
public void targetedSweepCleanupUnmarkedStreamsTest() {
@Ignore // TODO (jkong): This is obviously not the desired behaviour, but we are doing this for safety.
public void targetedSweepCleansUpUnmarkedStreamsTest() {
todoClient.storeUnmarkedSnapshot("snap");
todoClient.storeUnmarkedSnapshot("crackle");
todoClient.storeUnmarkedSnapshot("pop");
todoClient.runIterationOfTargetedSweep();

// Nothing can be deleted from Index because it wasn't written. There should be 3 entries in the other tables
// (hash, metadata and value), one per stream, all of which should be cleaned up.
assertDeleted(0, 3, 3, 3);
}

@Test
public void targetedSweepCurrentlyDoesNotCleanupUnmarkedStreamsTest() {
todoClient.storeUnmarkedSnapshot("snap");
todoClient.storeUnmarkedSnapshot("crackle");
todoClient.storeUnmarkedSnapshot("pop");
todoClient.runIterationOfTargetedSweep();

assertDeleted(0, 0, 0, 0);
}

private void assertDeleted(long idx, long hash, long meta, long val) {
Assert.assertThat(todoClient.numberOfCellsDeleted(INDEX_TABLE), equalTo(idx));
Assert.assertThat(todoClient.numberOfCellsDeleted(HASH_TABLE), equalTo(hash));
Expand Down
8 changes: 8 additions & 0 deletions changelog/@unreleased/pr-4434.v2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
type: manualTask
manualTask:
description: |-
Stream stores now no longer automatically clean up streams that were stored but never marked as used. We have noticed an increasing incidence of said cleaning being more aggressive than we anticipated.
Users that use stream stores *must* regenerate their schemas as part of upgrading to this version.
links:
- https://github.com/palantir/atlasdb/pull/4434
Original file line number Diff line number Diff line change
Expand Up @@ -32,25 +32,10 @@ public boolean cellsCleanedUp(Transaction t, Set<Cell> cells) {
for (Cell cell : cells) {
rows.add(UserPhotosStreamMetadataTable.UserPhotosStreamMetadataRow.BYTES_HYDRATOR.hydrateFromBytes(cell.getRowName()));
}
UserPhotosStreamIdxTable indexTable = tables.getUserPhotosStreamIdxTable(t);
Set<UserPhotosStreamIdxTable.UserPhotosStreamIdxRow> indexRows = rows.stream()
.map(UserPhotosStreamMetadataTable.UserPhotosStreamMetadataRow::getId)
.map(UserPhotosStreamIdxTable.UserPhotosStreamIdxRow::of)
.collect(Collectors.toSet());
Map<UserPhotosStreamIdxTable.UserPhotosStreamIdxRow, Iterator<UserPhotosStreamIdxTable.UserPhotosStreamIdxColumnValue>> referenceIteratorByStream
= indexTable.getRowsColumnRangeIterator(indexRows,
BatchColumnRangeSelection.create(PtBytes.EMPTY_BYTE_ARRAY, PtBytes.EMPTY_BYTE_ARRAY, 1));
Set<UserPhotosStreamMetadataTable.UserPhotosStreamMetadataRow> streamsWithNoReferences
= KeyedStream.stream(referenceIteratorByStream)
.filter(valueIterator -> !valueIterator.hasNext())
.keys() // (authorized)
.map(UserPhotosStreamIdxTable.UserPhotosStreamIdxRow::getId)
.map(UserPhotosStreamMetadataTable.UserPhotosStreamMetadataRow::of)
.collect(Collectors.toSet());
Map<UserPhotosStreamMetadataTable.UserPhotosStreamMetadataRow, StreamMetadata> currentMetadata = metaTable.getMetadatas(rows);
Set<Long> toDelete = Sets.newHashSet();
for (Map.Entry<UserPhotosStreamMetadataTable.UserPhotosStreamMetadataRow, StreamMetadata> e : currentMetadata.entrySet()) {
if (e.getValue().getStatus() != Status.STORED || streamsWithNoReferences.contains(e.getKey())) {
if (e.getValue().getStatus() != Status.STORED) {
toDelete.add(e.getKey().getId());
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@
import com.palantir.atlasdb.transaction.api.TransactionTask;
import com.palantir.atlasdb.transaction.impl.TxTask;
import com.palantir.common.base.Throwables;
import com.palantir.common.compression.LZ4CompressingInputStream;
import com.palantir.common.compression.StreamCompression;
import com.palantir.common.io.ConcatenatedInputStream;
import com.palantir.util.AssertUtils;
Expand All @@ -70,8 +69,6 @@
import com.palantir.util.file.DeleteOnCloseFileInputStream;
import com.palantir.util.file.TempFileUtils;

import net.jpountz.lz4.LZ4BlockInputStream;

@Generated("com.palantir.atlasdb.table.description.render.StreamStoreRenderer")
@SuppressWarnings("all")
public final class UserPhotosStreamStore extends AbstractPersistentStreamStore {
Expand Down Expand Up @@ -412,8 +409,6 @@ protected void touchMetadataWhileMarkingUsedForConflicts(Transaction t, Iterable
* {@link ImmutableSet}
* {@link InputStream}
* {@link Ints}
* {@link LZ4BlockInputStream}
* {@link LZ4CompressingInputStream}
* {@link List}
* {@link Lists}
* {@link Logger}
Expand All @@ -434,6 +429,7 @@ protected void touchMetadataWhileMarkingUsedForConflicts(Transaction t, Iterable
* {@link Sha256Hash}
* {@link Status}
* {@link StreamCleanedException}
* {@link StreamCompression}
* {@link StreamMetadata}
* {@link StreamStorePersistenceConfiguration}
* {@link Supplier}
Expand Down

0 comments on commit 2c2fc3c

Please sign in to comment.