-
Notifications
You must be signed in to change notification settings - Fork 14k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[KAFKA-8522] Streamline tombstone and transaction marker removal #7884
Changes from 5 commits
fdf7095
f7dcaac
2ab16af
d36f776
3b2193b
dcc2f65
a9d8c4d
dd9ca28
1587462
a25187b
f469515
15d9d8a
a3bc996
e00f37c
331ebad
4b12ebb
2fc90d3
041e867
9df0e70
613d17d
69b5240
ad4ff10
8c9b50d
de5d0a1
5b43e43
f665275
e570f6c
3c96d55
a78c563
e287b49
ee67247
5bedf9c
bd3e18f
4515e7d
60d72d0
92530fe
8baa416
6d011ed
6cc19fe
8e6f9a2
d6dd028
9335c36
3541ea2
a88dc20
bc3f867
6a1b3da
d7491c1
06d9ff5
bed40ab
54cb56b
81ba24c
cadee72
90877fc
e694f13
a9316ad
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -431,6 +431,16 @@ public LegacyRecord outerRecord() { | |
return record; | ||
} | ||
|
||
@Override | ||
public long deleteHorizonMs() { | ||
return RecordBatch.NO_TIMESTAMP; | ||
} | ||
|
||
@Override | ||
public boolean deleteHorizonSet() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: can we use |
||
return false; | ||
} | ||
|
||
@Override | ||
public boolean equals(Object o) { | ||
if (this == o) | ||
|
@@ -468,6 +478,16 @@ public long offset() { | |
return buffer.getLong(OFFSET_OFFSET); | ||
} | ||
|
||
@Override | ||
public long deleteHorizonMs() { | ||
return RecordBatch.NO_TIMESTAMP; | ||
} | ||
|
||
@Override | ||
public boolean deleteHorizonSet() { | ||
return false; | ||
} | ||
|
||
@Override | ||
public LegacyRecord outerRecord() { | ||
return record; | ||
|
@@ -557,6 +577,16 @@ public long baseOffset() { | |
return loadFullBatch().baseOffset(); | ||
} | ||
|
||
@Override | ||
public long deleteHorizonMs() { | ||
return RecordBatch.NO_TIMESTAMP; | ||
} | ||
|
||
@Override | ||
public boolean deleteHorizonSet() { | ||
return false; | ||
} | ||
|
||
@Override | ||
public long lastOffset() { | ||
return offset; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -89,11 +89,15 @@ | |
* by the broker and is preserved after compaction. Additionally, the MaxTimestamp of an empty batch always retains | ||
* the previous value prior to becoming empty. | ||
* | ||
* The delete horizon flag for the sixth bit is used to determine if the first timestamp of the batch had been set to | ||
* the time for which tombstones / transaction markers needs to be removed. If it is true, then the first timestamp is | ||
* the delete horizon, otherwise, it is merely the first timestamp of the record batch. | ||
* | ||
* The current attributes are given below: | ||
* | ||
* ------------------------------------------------------------------------------------------------- | ||
* | Unused (6-15) | Control (5) | Transactional (4) | Timestamp Type (3) | Compression Type (0-2) | | ||
* ------------------------------------------------------------------------------------------------- | ||
* --------------------------------------------------------------------------------------------------------------------------- | ||
* | Unused (7-15) | Delete Horizon Flag (6) | Control (5) | Transactional (4) | Timestamp Type (3) | Compression Type (0-2) | | ||
* --------------------------------------------------------------------------------------------------------------------------- | ||
*/ | ||
public class DefaultRecordBatch extends AbstractRecordBatch implements MutableRecordBatch { | ||
static final int BASE_OFFSET_OFFSET = 0; | ||
|
@@ -128,6 +132,7 @@ public class DefaultRecordBatch extends AbstractRecordBatch implements MutableRe | |
private static final byte COMPRESSION_CODEC_MASK = 0x07; | ||
private static final byte TRANSACTIONAL_FLAG_MASK = 0x10; | ||
private static final int CONTROL_FLAG_MASK = 0x20; | ||
private static final byte DELETE_HORIZON_FLAG_MASK = 0x40; | ||
private static final byte TIMESTAMP_TYPE_MASK = 0x08; | ||
|
||
private static final int MAX_SKIP_BUFFER_SIZE = 2048; | ||
|
@@ -155,10 +160,15 @@ public void ensureValid() { | |
} | ||
|
||
/** | ||
* Get the timestamp of the first record in this batch. It is always the create time of the record even if the | ||
* Get the timestamp of the first record in this batch. It is usually the create time of the record even if the | ||
* timestamp type of the batch is log append time. | ||
* | ||
* @return The first timestamp or {@link RecordBatch#NO_TIMESTAMP} if the batch is empty | ||
* There is the possibility that the first timestamp had been set to the delete horizon of the batch, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm.. It seems a bit brittle to rely on documentation for this. I'm considering if we should change names to better reflect this. For example, maybe we should call this |
||
* in which case, the delete horizon will be returned instead. | ||
* | ||
* @return The first timestamp if the batch's delete horizon has not been set | ||
* The delete horizon if the batch's delete horizon has been set | ||
* {@link RecordBatch#NO_TIMESTAMP} if the batch is empty | ||
*/ | ||
public long firstTimestamp() { | ||
return buffer.getLong(FIRST_TIMESTAMP_OFFSET); | ||
|
@@ -245,6 +255,18 @@ public boolean isTransactional() { | |
return (attributes() & TRANSACTIONAL_FLAG_MASK) > 0; | ||
} | ||
|
||
@Override | ||
public boolean deleteHorizonSet() { | ||
return (attributes() & DELETE_HORIZON_FLAG_MASK) > 0; | ||
} | ||
|
||
@Override | ||
public long deleteHorizonMs() { | ||
if (deleteHorizonSet()) | ||
return firstTimestamp(); | ||
return RecordBatch.NO_TIMESTAMP; | ||
} | ||
|
||
@Override | ||
public boolean isControlBatch() { | ||
return (attributes() & CONTROL_FLAG_MASK) > 0; | ||
|
@@ -360,7 +382,7 @@ public void setMaxTimestamp(TimestampType timestampType, long maxTimestamp) { | |
if (timestampType() == timestampType && currentMaxTimestamp == maxTimestamp) | ||
return; | ||
|
||
byte attributes = computeAttributes(compressionType(), timestampType, isTransactional(), isControlBatch()); | ||
byte attributes = computeAttributes(compressionType(), timestampType, isTransactional(), isControlBatch(), deleteHorizonSet()); | ||
buffer.putShort(ATTRIBUTES_OFFSET, attributes); | ||
buffer.putLong(MAX_TIMESTAMP_OFFSET, maxTimestamp); | ||
long crc = computeChecksum(); | ||
|
@@ -407,7 +429,7 @@ public int hashCode() { | |
} | ||
|
||
private static byte computeAttributes(CompressionType type, TimestampType timestampType, | ||
boolean isTransactional, boolean isControl) { | ||
boolean isTransactional, boolean isControl, boolean isDeleteHorizonSet) { | ||
if (timestampType == TimestampType.NO_TIMESTAMP_TYPE) | ||
throw new IllegalArgumentException("Timestamp type must be provided to compute attributes for message " + | ||
"format v2 and above"); | ||
|
@@ -419,6 +441,8 @@ private static byte computeAttributes(CompressionType type, TimestampType timest | |
attributes |= COMPRESSION_CODEC_MASK & type.id; | ||
if (timestampType == TimestampType.LOG_APPEND_TIME) | ||
attributes |= TIMESTAMP_TYPE_MASK; | ||
if (isDeleteHorizonSet) | ||
attributes |= DELETE_HORIZON_FLAG_MASK; | ||
return attributes; | ||
} | ||
|
||
|
@@ -435,9 +459,49 @@ public static void writeEmptyHeader(ByteBuffer buffer, | |
boolean isTransactional, | ||
boolean isControlRecord) { | ||
int offsetDelta = (int) (lastOffset - baseOffset); | ||
writeHeader(buffer, baseOffset, offsetDelta, DefaultRecordBatch.RECORD_BATCH_OVERHEAD, magic, | ||
CompressionType.NONE, timestampType, RecordBatch.NO_TIMESTAMP, timestamp, producerId, | ||
producerEpoch, baseSequence, isTransactional, isControlRecord, false, partitionLeaderEpoch, 0); | ||
} | ||
|
||
public static void writeEmptyHeader(ByteBuffer buffer, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This method seems unused? |
||
byte magic, | ||
long producerId, | ||
short producerEpoch, | ||
int baseSequence, | ||
long baseOffset, | ||
long lastOffset, | ||
int partitionLeaderEpoch, | ||
TimestampType timestampType, | ||
long timestamp, | ||
boolean isTransactional, | ||
boolean isControlRecord, | ||
boolean isDeleteHorizonSet) { | ||
int offsetDelta = (int) (lastOffset - baseOffset); | ||
writeHeader(buffer, baseOffset, offsetDelta, DefaultRecordBatch.RECORD_BATCH_OVERHEAD, magic, | ||
CompressionType.NONE, timestampType, RecordBatch.NO_TIMESTAMP, timestamp, producerId, | ||
producerEpoch, baseSequence, isTransactional, isControlRecord, partitionLeaderEpoch, 0); | ||
producerEpoch, baseSequence, isTransactional, isControlRecord, isDeleteHorizonSet, partitionLeaderEpoch, 0); | ||
} | ||
|
||
static void writeHeader(ByteBuffer buffer, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This method seems unused? |
||
long baseOffset, | ||
int lastOffsetDelta, | ||
int sizeInBytes, | ||
byte magic, | ||
CompressionType compressionType, | ||
TimestampType timestampType, | ||
long firstTimestamp, | ||
long maxTimestamp, | ||
long producerId, | ||
short epoch, | ||
int sequence, | ||
boolean isTransactional, | ||
boolean isControlBatch, | ||
int partitionLeaderEpoch, | ||
int numRecords) { | ||
writeHeader(buffer, baseOffset, lastOffsetDelta, sizeInBytes, magic, compressionType, | ||
timestampType, firstTimestamp, maxTimestamp, producerId, epoch, sequence, | ||
isTransactional, isControlBatch, false, partitionLeaderEpoch, numRecords); | ||
} | ||
|
||
static void writeHeader(ByteBuffer buffer, | ||
|
@@ -454,14 +518,15 @@ static void writeHeader(ByteBuffer buffer, | |
int sequence, | ||
boolean isTransactional, | ||
boolean isControlBatch, | ||
boolean isDeleteHorizonSet, | ||
int partitionLeaderEpoch, | ||
int numRecords) { | ||
if (magic < RecordBatch.CURRENT_MAGIC_VALUE) | ||
throw new IllegalArgumentException("Invalid magic value " + magic); | ||
if (firstTimestamp < 0 && firstTimestamp != NO_TIMESTAMP) | ||
throw new IllegalArgumentException("Invalid message timestamp " + firstTimestamp); | ||
|
||
short attributes = computeAttributes(compressionType, timestampType, isTransactional, isControlBatch); | ||
short attributes = computeAttributes(compressionType, timestampType, isTransactional, isControlBatch, isDeleteHorizonSet); | ||
|
||
int position = buffer.position(); | ||
buffer.putLong(position + BASE_OFFSET_OFFSET, baseOffset); | ||
|
@@ -699,6 +764,18 @@ public boolean isTransactional() { | |
return loadBatchHeader().isTransactional(); | ||
} | ||
|
||
@Override | ||
public boolean deleteHorizonSet() { | ||
return loadBatchHeader().deleteHorizonSet(); | ||
} | ||
|
||
@Override | ||
public long deleteHorizonMs() { | ||
if (deleteHorizonSet()) | ||
return super.loadBatchHeader().deleteHorizonMs(); | ||
return RecordBatch.NO_TIMESTAMP; | ||
} | ||
|
||
@Override | ||
public boolean isControlBatch() { | ||
return loadBatchHeader().isControlBatch(); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why don't we move these into
AbstractLegacyRecordBatch
?