Skip to content

Commit

Permalink
Re-structure document ID generation favoring _id inverted index com…
Browse files Browse the repository at this point in the history
…pression (#104683)

This implementation restructures auto-generated document IDs to maximize compression within Lucene's terms dictionary. The key insight is placing stable or slowly-changing components at the start of the ID - the most significant bytes of the timestamp change very gradually (the first byte shifts only every 35 years, the second every 50 days). This careful ordering means that large sequences of IDs generated close in time will share common prefixes, allowing Lucene's Finite State Transducer (FST) to store terms more compactly.

To maintain uniqueness while preserving these compression benefits, the ID combines three elements: a timestamp that ensures time-based ordering, the coordinator's MAC address for cluster-wide uniqueness, and a sequence number for handling high-throughput scenarios. The timestamp handling is particularly robust, using atomic operations to prevent backwards movement even if the system clock shifts.

For high-volume indices generating millions of documents, this optimization can lead to substantial storage savings while maintaining strict guarantees about ID uniqueness and ordering.
  • Loading branch information
salvatore-campagna authored Nov 12, 2024
1 parent 7039a1d commit 778ab8f
Show file tree
Hide file tree
Showing 10 changed files with 225 additions and 55 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/104683.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 104683
summary: "Feature: re-structure document ID generation favoring _id inverted index compression"
area: Logs
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,11 @@ public void testBulkWithWriteIndexAndRouting() {
// allowing the auto-generated timestamp to externally be set would allow making the index inconsistent with duplicate docs
public void testExternallySetAutoGeneratedTimestamp() {
IndexRequest indexRequest = new IndexRequest("index1").source(Collections.singletonMap("foo", "baz"));
indexRequest.autoGenerateId();
if (randomBoolean()) {
indexRequest.autoGenerateId();
} else {
indexRequest.autoGenerateTimeBasedId();
}
if (randomBoolean()) {
indexRequest.id("test");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.function.Supplier;

import static org.elasticsearch.action.ValidateActions.addValidationError;
import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_PRIMARY_TERM;
Expand All @@ -76,6 +77,9 @@ public class IndexRequest extends ReplicatedWriteRequest<IndexRequest> implement
private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(IndexRequest.class);
private static final TransportVersion PIPELINES_HAVE_RUN_FIELD_ADDED = TransportVersions.V_8_10_X;

private static final Supplier<String> ID_GENERATOR = UUIDs::base64UUID;
private static final Supplier<String> K_SORTED_TIME_BASED_ID_GENERATOR = UUIDs::base64TimeBasedKOrderedUUID;

/**
* Max length of the source document to include into string()
*
Expand Down Expand Up @@ -692,10 +696,18 @@ public void process(IndexRouting indexRouting) {
* request compatible with the append-only optimization.
*/
public void autoGenerateId() {
assert id == null;
assert autoGeneratedTimestamp == UNSET_AUTO_GENERATED_TIMESTAMP : "timestamp has already been generated!";
assert ifSeqNo == UNASSIGNED_SEQ_NO;
assert ifPrimaryTerm == UNASSIGNED_PRIMARY_TERM;
assertBeforeGeneratingId();
autoGenerateTimestamp();
id(ID_GENERATOR.get());
}

public void autoGenerateTimeBasedId() {
assertBeforeGeneratingId();
autoGenerateTimestamp();
id(K_SORTED_TIME_BASED_ID_GENERATOR.get());
}

private void autoGenerateTimestamp() {
/*
* Set the auto generated timestamp so the append only optimization
* can quickly test if this request *must* be unique without reaching
Expand All @@ -704,8 +716,13 @@ public void autoGenerateId() {
* never work before 1970, but that's ok. It's after 1970.
*/
autoGeneratedTimestamp = Math.max(0, System.currentTimeMillis());
String uid = UUIDs.base64UUID();
id(uid);
}

private void assertBeforeGeneratingId() {
assert id == null;
assert autoGeneratedTimestamp == UNSET_AUTO_GENERATED_TIMESTAMP : "timestamp has already been generated!";
assert ifSeqNo == UNASSIGNED_SEQ_NO;
assert ifPrimaryTerm == UNASSIGNED_PRIMARY_TERM;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.features.NodeFeature;
import org.elasticsearch.index.IndexMode;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.mapper.TimeSeriesRoutingHashFieldMapper;
import org.elasticsearch.transport.Transports;
Expand Down Expand Up @@ -147,11 +149,15 @@ public void checkIndexSplitAllowed() {}

private abstract static class IdAndRoutingOnly extends IndexRouting {
private final boolean routingRequired;
private final IndexVersion creationVersion;
private final IndexMode indexMode;

IdAndRoutingOnly(IndexMetadata metadata) {
super(metadata);
this.creationVersion = metadata.getCreationVersion();
MappingMetadata mapping = metadata.mapping();
this.routingRequired = mapping == null ? false : mapping.routingRequired();
this.indexMode = metadata.getIndexMode();
}

protected abstract int shardId(String id, @Nullable String routing);
Expand All @@ -161,7 +167,11 @@ public void process(IndexRequest indexRequest) {
// generate id if not already provided
final String id = indexRequest.id();
if (id == null) {
indexRequest.autoGenerateId();
if (creationVersion.onOrAfter(IndexVersions.TIME_BASED_K_ORDERED_DOC_ID) && indexMode == IndexMode.LOGSDB) {
indexRequest.autoGenerateTimeBasedId();
} else {
indexRequest.autoGenerateId();
}
} else if (id.isEmpty()) {
throw new IllegalArgumentException("if _id is specified it must not be empty");
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.common;

import java.nio.ByteBuffer;
import java.util.Base64;

/**
* Generates a base64-encoded, k-ordered UUID string optimized for compression and efficient indexing.
* <p>
* This method produces a time-based UUID where slowly changing components like the timestamp appear first,
* improving prefix-sharing and compression during indexing. It ensures uniqueness across nodes by incorporating
* a timestamp, a MAC address, and a sequence ID.
* <p>
* <b>Timestamp:</b> Represents the current time in milliseconds, ensuring ordering and uniqueness.
* <br>
* <b>MAC Address:</b> Ensures uniqueness across different coordinators.
* <br>
* <b>Sequence ID:</b> Differentiates UUIDs generated within the same millisecond, ensuring uniqueness even at high throughput.
* <p>
* The result is a compact base64-encoded string, optimized for efficient compression of the _id field in an inverted index.
*/
public class TimeBasedKOrderedUUIDGenerator extends TimeBasedUUIDGenerator {
private static final Base64.Encoder BASE_64_NO_PADDING = Base64.getEncoder().withoutPadding();

@Override
public String getBase64UUID() {
final int sequenceId = this.sequenceNumber.incrementAndGet() & 0x00FF_FFFF;

// Calculate timestamp to ensure ordering and avoid backward movement in case of time shifts.
// Uses AtomicLong to guarantee that timestamp increases even if the system clock moves backward.
// If the sequenceId overflows (reaches 0 within the same millisecond), the timestamp is incremented
// to ensure strict ordering.
long timestamp = this.lastTimestamp.accumulateAndGet(
currentTimeMillis(),
sequenceId == 0 ? (lastTimestamp, currentTimeMillis) -> Math.max(lastTimestamp, currentTimeMillis) + 1 : Math::max
);

final byte[] uuidBytes = new byte[15];
final ByteBuffer buffer = ByteBuffer.wrap(uuidBytes);

buffer.put((byte) (timestamp >>> 40)); // changes every 35 years
buffer.put((byte) (timestamp >>> 32)); // changes every ~50 days
buffer.put((byte) (timestamp >>> 24)); // changes every ~4.5h
buffer.put((byte) (timestamp >>> 16)); // changes every ~65 secs

// MAC address of the coordinator might change if there are many coordinators in the cluster
// and the indexing api does not necessarily target the same coordinator.
byte[] macAddress = macAddress();
assert macAddress.length == 6;
buffer.put(macAddress, 0, macAddress.length);

buffer.put((byte) (sequenceId >>> 16));

// From hereinafter everything is almost like random and does not compress well
// due to unlikely prefix-sharing
buffer.put((byte) (timestamp >>> 8));
buffer.put((byte) (sequenceId >>> 8));
buffer.put((byte) timestamp);
buffer.put((byte) sequenceId);

assert buffer.position() == uuidBytes.length;

return BASE_64_NO_PADDING.encodeToString(uuidBytes);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ class TimeBasedUUIDGenerator implements UUIDGenerator {

// We only use bottom 3 bytes for the sequence number. Paranoia: init with random int so that if JVM/OS/machine goes down, clock slips
// backwards, and JVM comes back up, we are less likely to be on the same sequenceNumber at the same time:
private final AtomicInteger sequenceNumber = new AtomicInteger(SecureRandomHolder.INSTANCE.nextInt());
protected final AtomicInteger sequenceNumber = new AtomicInteger(SecureRandomHolder.INSTANCE.nextInt());

// Used to ensure clock moves forward:
private final AtomicLong lastTimestamp = new AtomicLong(0);
protected final AtomicLong lastTimestamp = new AtomicLong(0);

private static final byte[] SECURE_MUNGED_ADDRESS = MacAddressProvider.getSecureMungedAddress();

Expand Down
10 changes: 10 additions & 0 deletions server/src/main/java/org/elasticsearch/common/UUIDs.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
public class UUIDs {

private static final RandomBasedUUIDGenerator RANDOM_UUID_GENERATOR = new RandomBasedUUIDGenerator();

private static final UUIDGenerator TIME_BASED_K_ORDERED_GENERATOR = new TimeBasedKOrderedUUIDGenerator();
private static final UUIDGenerator TIME_UUID_GENERATOR = new TimeBasedUUIDGenerator();

/**
Expand All @@ -33,6 +35,14 @@ public static String base64UUID() {
return TIME_UUID_GENERATOR.getBase64UUID();
}

public static String base64TimeBasedKOrderedUUID() {
return TIME_BASED_K_ORDERED_GENERATOR.getBase64UUID();
}

public static String base64TimeBasedUUID() {
return TIME_UUID_GENERATOR.getBase64UUID();
}

/**
* The length of a UUID string generated by {@link #randomBase64UUID} and {@link #randomBase64UUIDSecureString}.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ private static Version parseUnchecked(String version) {
public static final IndexVersion LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT_BACKPORT = def(8_519_00_0, Version.LUCENE_9_12_0);
public static final IndexVersion UPGRADE_TO_LUCENE_10_0_0 = def(9_000_00_0, Version.LUCENE_10_0_0);
public static final IndexVersion LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT = def(9_001_00_0, Version.LUCENE_10_0_0);
public static final IndexVersion TIME_BASED_K_ORDERED_DOC_ID = def(9_002_00_0, Version.LUCENE_10_0_0);
/*
* STOP! READ THIS FIRST! No, really,
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,12 @@ public void testAutoGenerateId() {
assertTrue("expected > 0 but got: " + request.getAutoGeneratedTimestamp(), request.getAutoGeneratedTimestamp() > 0);
}

public void testAutoGenerateTimeBasedId() {
IndexRequest request = new IndexRequest("index");
request.autoGenerateTimeBasedId();
assertTrue("expected > 0 but got: " + request.getAutoGeneratedTimestamp(), request.getAutoGeneratedTimestamp() > 0);
}

public void testIndexResponse() {
ShardId shardId = new ShardId(randomAlphaOfLengthBetween(3, 10), randomAlphaOfLengthBetween(3, 10), randomIntBetween(0, 1000));
String id = randomAlphaOfLengthBetween(3, 10);
Expand Down
Loading

0 comments on commit 778ab8f

Please sign in to comment.