Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Core: add the file count of specific deletes in the snapshot summary #4677

Merged
merged 1 commit into from
May 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions core/src/main/java/org/apache/iceberg/SnapshotSummary.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ public class SnapshotSummary {
public static final String DELETED_FILES_PROP = "deleted-data-files";
public static final String TOTAL_DATA_FILES_PROP = "total-data-files";
public static final String ADDED_DELETE_FILES_PROP = "added-delete-files";
public static final String ADD_EQ_DELETE_FILES_PROP = "added-eq-delete-files";
public static final String REMOVED_EQ_DELETE_FILES_PROP = "removed-eq-delete-files";
public static final String ADD_POS_DELETE_FILES_PROP = "added-pos-delete-files";
public static final String REMOVED_POS_DELETE_FILES_PROP = "removed-pos-delete-files";
public static final String REMOVED_DELETE_FILES_PROP = "removed-delete-files";
public static final String TOTAL_DELETE_FILES_PROP = "total-delete-files";
public static final String ADDED_RECORDS_PROP = "added-records";
Expand Down Expand Up @@ -207,6 +211,10 @@ private static class UpdateMetrics {
private long removedSize = 0L;
private int addedFiles = 0;
private int removedFiles = 0;
private int addedEqDeleteFiles = 0;
private int removedEqDeleteFiles = 0;
private int addedPosDeleteFiles = 0;
private int removedPosDeleteFiles = 0;
private int addedDeleteFiles = 0;
private int removedDeleteFiles = 0;
private long addedRecords = 0L;
Expand All @@ -222,6 +230,10 @@ void clear() {
this.removedSize = 0L;
this.addedFiles = 0;
this.removedFiles = 0;
this.addedEqDeleteFiles = 0;
this.removedEqDeleteFiles = 0;
this.addedPosDeleteFiles = 0;
this.removedPosDeleteFiles = 0;
this.addedDeleteFiles = 0;
this.removedDeleteFiles = 0;
this.addedRecords = 0L;
Expand All @@ -236,6 +248,10 @@ void clear() {
void addTo(ImmutableMap.Builder<String, String> builder) {
setIf(addedFiles > 0, builder, ADDED_FILES_PROP, addedFiles);
setIf(removedFiles > 0, builder, DELETED_FILES_PROP, removedFiles);
setIf(addedEqDeleteFiles > 0, builder, ADD_EQ_DELETE_FILES_PROP, addedEqDeleteFiles);
setIf(removedEqDeleteFiles > 0, builder, REMOVED_EQ_DELETE_FILES_PROP, removedEqDeleteFiles);
setIf(addedPosDeleteFiles > 0, builder, ADD_POS_DELETE_FILES_PROP, addedPosDeleteFiles);
setIf(removedPosDeleteFiles > 0, builder, REMOVED_POS_DELETE_FILES_PROP, removedPosDeleteFiles);
setIf(addedDeleteFiles > 0, builder, ADDED_DELETE_FILES_PROP, addedDeleteFiles);
setIf(removedDeleteFiles > 0, builder, REMOVED_DELETE_FILES_PROP, removedDeleteFiles);
Comment on lines 255 to 256
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[question] since now we are tracking added / removed delete files at a more granular level (i.e pos delete added / removed & eq delete added / removed delete files) these metrics can be re-created using them, should we remove or deprecate them from snapshot summary ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Deprecating things is quite a bit of work for little gain here. I'd just continue to update them.

setIf(addedRecords > 0, builder, ADDED_RECORDS_PROP, addedRecords);
Expand All @@ -260,10 +276,12 @@ void addedFile(ContentFile<?> file) {
break;
case POSITION_DELETES:
this.addedDeleteFiles += 1;
this.addedPosDeleteFiles += 1;
this.addedPosDeletes += file.recordCount();
break;
case EQUALITY_DELETES:
this.addedDeleteFiles += 1;
this.addedEqDeleteFiles += 1;
this.addedEqDeletes += file.recordCount();
break;
default:
Expand All @@ -280,10 +298,12 @@ void removedFile(ContentFile<?> file) {
break;
case POSITION_DELETES:
this.removedDeleteFiles += 1;
this.removedPosDeleteFiles += 1;
this.removedPosDeletes += file.recordCount();
break;
case EQUALITY_DELETES:
this.removedDeleteFiles += 1;
this.removedEqDeleteFiles += 1;
this.removedEqDeletes += file.recordCount();
break;
default:
Expand All @@ -310,6 +330,10 @@ void addedManifest(ManifestFile manifest) {
void merge(UpdateMetrics other) {
this.addedFiles += other.addedFiles;
this.removedFiles += other.removedFiles;
this.addedEqDeleteFiles += other.addedEqDeleteFiles;
this.removedEqDeleteFiles += other.removedEqDeleteFiles;
this.addedPosDeleteFiles += other.addedPosDeleteFiles;
this.removedPosDeleteFiles += other.removedPosDeleteFiles;
this.addedDeleteFiles += other.addedDeleteFiles;
this.removedDeleteFiles += other.removedDeleteFiles;
this.addedSize += other.addedSize;
Expand Down
17 changes: 17 additions & 0 deletions core/src/test/java/org/apache/iceberg/TestSnapshotSummary.java
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,21 @@ public void testFileSizeSummary() {
Assert.assertEquals("20", summary.get(SnapshotSummary.REMOVED_FILE_SIZE_PROP));
Assert.assertEquals("10", summary.get(SnapshotSummary.TOTAL_FILE_SIZE_PROP));
}

@Test
public void testFileSizeSummaryWithDeletes() {
if (formatVersion == 1) {
return;
}

table.newRowDelta()
.addDeletes(FILE_A_DELETES)
.addDeletes(FILE_A2_DELETES)
.commit();

table.refresh();
Map<String, String> summary = table.currentSnapshot().summary();
Assert.assertEquals("1", summary.get(SnapshotSummary.ADD_EQ_DELETE_FILES_PROP));
Assert.assertEquals("1", summary.get(SnapshotSummary.ADD_POS_DELETE_FILES_PROP));
}
}