From 7aa07f1a2a34c2bdf73863f51485616d598bc7e7 Mon Sep 17 00:00:00 2001 From: Mikhail Berezovskiy Date: Thu, 21 Nov 2024 14:04:42 -0500 Subject: [PATCH 01/10] Explicit HTTP content copy/retain (#116115) --- .../forbidden/es-server-signatures.txt | 2 + docs/changelog/116115.yaml | 5 + .../Netty4IncrementalRequestHandlingIT.java | 5 - .../netty4/Netty4TrashingAllocatorIT.java | 122 ++ .../http/netty4/Netty4HttpRequest.java | 34 - .../transport/netty4/Netty4Utils.java | 2 +- .../transport/netty4/NettyAllocator.java | 113 +- .../transport/netty4/WrappedByteBuf.java | 1036 +++++++++++++++++ .../transport/netty4/NettyAllocatorTests.java | 106 ++ .../common/bytes/BytesReference.java | 23 + .../java/org/elasticsearch/http/HttpBody.java | 9 +- .../org/elasticsearch/http/HttpRequest.java | 6 - .../org/elasticsearch/http/HttpTracer.java | 2 +- .../elasticsearch/rest/BaseRestHandler.java | 9 +- .../elasticsearch/rest/FilterRestHandler.java | 5 - .../elasticsearch/rest/RestController.java | 4 - .../org/elasticsearch/rest/RestHandler.java | 12 - .../org/elasticsearch/rest/RestRequest.java | 61 +- .../elasticsearch/rest/RestRequestFilter.java | 7 +- .../rest/action/document/RestBulkAction.java | 16 +- .../rest/action/document/RestIndexAction.java | 19 +- .../rest/action/search/RestSearchAction.java | 4 - .../common/bytes/BytesArrayTests.java | 6 + .../elasticsearch/http/TestHttpRequest.java | 5 - .../rest/RestControllerTests.java | 5 - .../test/rest/FakeRestRequest.java | 5 - .../EnterpriseSearchBaseRestHandler.java | 2 +- .../logstash/rest/RestPutPipelineAction.java | 2 +- .../xpack/security/audit/AuditUtil.java | 5 +- .../rest/action/SecurityBaseRestHandler.java | 2 +- .../audit/logfile/LoggingAuditTrailTests.java | 8 +- 31 files changed, 1505 insertions(+), 137 deletions(-) create mode 100644 docs/changelog/116115.yaml create mode 100644 modules/transport-netty4/src/internalClusterTest/java/org/elasticsearch/http/netty4/Netty4TrashingAllocatorIT.java create mode 100644 modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/WrappedByteBuf.java create mode 100644 modules/transport-netty4/src/test/java/org/elasticsearch/transport/netty4/NettyAllocatorTests.java diff --git a/build-tools-internal/src/main/resources/forbidden/es-server-signatures.txt b/build-tools-internal/src/main/resources/forbidden/es-server-signatures.txt index a9da7995c2b36..68b97050ea012 100644 --- a/build-tools-internal/src/main/resources/forbidden/es-server-signatures.txt +++ b/build-tools-internal/src/main/resources/forbidden/es-server-signatures.txt @@ -167,3 +167,5 @@ org.elasticsearch.cluster.SnapshotDeletionsInProgress$Entry#(java.lang.Str @defaultMessage Use a Thread constructor with a name, anonymous threads are more difficult to debug java.lang.Thread#(java.lang.Runnable) java.lang.Thread#(java.lang.ThreadGroup, java.lang.Runnable) + +org.elasticsearch.common.bytes.BytesReference#copyBytes(org.elasticsearch.common.bytes.BytesReference) @ This method is a subject for removal. Copying bytes is prone to performance regressions and unnecessary allocations. diff --git a/docs/changelog/116115.yaml b/docs/changelog/116115.yaml new file mode 100644 index 0000000000000..33e1735c20ca4 --- /dev/null +++ b/docs/changelog/116115.yaml @@ -0,0 +1,5 @@ +pr: 116115 +summary: Allow http unsafe buffers by default +area: Network +type: enhancement +issues: [] diff --git a/modules/transport-netty4/src/internalClusterTest/java/org/elasticsearch/http/netty4/Netty4IncrementalRequestHandlingIT.java b/modules/transport-netty4/src/internalClusterTest/java/org/elasticsearch/http/netty4/Netty4IncrementalRequestHandlingIT.java index 3095139ca4685..4bb27af4bd0f5 100644 --- a/modules/transport-netty4/src/internalClusterTest/java/org/elasticsearch/http/netty4/Netty4IncrementalRequestHandlingIT.java +++ b/modules/transport-netty4/src/internalClusterTest/java/org/elasticsearch/http/netty4/Netty4IncrementalRequestHandlingIT.java @@ -699,11 +699,6 @@ public Collection getRestHandlers( Predicate clusterSupportsFeature ) { return List.of(new BaseRestHandler() { - @Override - public boolean allowsUnsafeBuffers() { - return true; - } - @Override public String getName() { return ROUTE; diff --git a/modules/transport-netty4/src/internalClusterTest/java/org/elasticsearch/http/netty4/Netty4TrashingAllocatorIT.java b/modules/transport-netty4/src/internalClusterTest/java/org/elasticsearch/http/netty4/Netty4TrashingAllocatorIT.java new file mode 100644 index 0000000000000..18c91068ff4f9 --- /dev/null +++ b/modules/transport-netty4/src/internalClusterTest/java/org/elasticsearch/http/netty4/Netty4TrashingAllocatorIT.java @@ -0,0 +1,122 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.http.netty4; + +import io.netty.handler.codec.http.HttpResponseStatus; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.ESNetty4IntegTestCase; +import org.elasticsearch.client.internal.node.NodeClient; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; +import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.settings.ClusterSettings; +import org.elasticsearch.common.settings.IndexScopedSettings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.settings.SettingsFilter; +import org.elasticsearch.common.util.CollectionUtils; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.features.NodeFeature; +import org.elasticsearch.http.HttpServerTransport; +import org.elasticsearch.plugins.ActionPlugin; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.rest.BaseRestHandler; +import org.elasticsearch.rest.RestController; +import org.elasticsearch.rest.RestHandler; +import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.rest.RestResponse; +import org.elasticsearch.rest.RestStatus; + +import java.io.IOException; +import java.util.Collection; +import java.util.List; +import java.util.function.Predicate; +import java.util.function.Supplier; + +public class Netty4TrashingAllocatorIT extends ESNetty4IntegTestCase { + + @Override + protected Collection> nodePlugins() { + return CollectionUtils.concatLists(List.of(Handler.class), super.nodePlugins()); + } + + @Override + protected boolean addMockHttpTransport() { + return false; + } + + public void testTrashContent() throws InterruptedException { + try (var client = new Netty4HttpClient()) { + var addr = randomFrom(internalCluster().getInstance(HttpServerTransport.class).boundAddress().boundAddresses()).address(); + var content = randomAlphaOfLength(between(1024, 2048)); + var responses = client.post(addr, List.of(new Tuple<>(Handler.ROUTE, content))); + assertEquals(HttpResponseStatus.OK, responses.stream().findFirst().get().status()); + } + } + + public static class Handler extends Plugin implements ActionPlugin { + static final String ROUTE = "/_test/trashing-alloc"; + + @Override + public Collection getRestHandlers( + Settings settings, + NamedWriteableRegistry namedWriteableRegistry, + RestController restController, + ClusterSettings clusterSettings, + IndexScopedSettings indexScopedSettings, + SettingsFilter settingsFilter, + IndexNameExpressionResolver indexNameExpressionResolver, + Supplier nodesInCluster, + Predicate clusterSupportsFeature + ) { + return List.of(new BaseRestHandler() { + @Override + public String getName() { + return ROUTE; + } + + @Override + public List routes() { + return List.of(new Route(RestRequest.Method.POST, ROUTE)); + } + + @Override + protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) throws IOException { + var content = request.releasableContent(); + var iter = content.iterator(); + return (chan) -> { + request.getHttpRequest().release(); + assertFalse(content.hasReferences()); + BytesRef br; + while ((br = iter.next()) != null) { + for (int i = br.offset; i < br.offset + br.length; i++) { + if (br.bytes[i] != 0) { + fail( + new AssertionError( + "buffer is not trashed, off=" + + br.offset + + " len=" + + br.length + + " pos=" + + i + + " ind=" + + (i - br.offset) + ) + ); + } + } + } + chan.sendResponse(new RestResponse(RestStatus.OK, "")); + }; + } + }); + } + } +} diff --git a/modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpRequest.java b/modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpRequest.java index a1aa211814520..2662ddf7e1440 100644 --- a/modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpRequest.java +++ b/modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpRequest.java @@ -9,7 +9,6 @@ package org.elasticsearch.http.netty4; -import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; import io.netty.handler.codec.http.DefaultFullHttpRequest; import io.netty.handler.codec.http.EmptyHttpHeaders; @@ -128,39 +127,6 @@ public void release() { } } - @Override - public HttpRequest releaseAndCopy() { - assert released.get() == false; - if (pooled == false) { - return this; - } - try { - final ByteBuf copiedContent = Unpooled.copiedBuffer(request.content()); - HttpBody newContent; - if (content.isStream()) { - newContent = content; - } else { - newContent = Netty4Utils.fullHttpBodyFrom(copiedContent); - } - return new Netty4HttpRequest( - sequence, - new DefaultFullHttpRequest( - request.protocolVersion(), - request.method(), - request.uri(), - copiedContent, - request.headers(), - request.trailingHeaders() - ), - new AtomicBoolean(false), - false, - newContent - ); - } finally { - release(); - } - } - @Override public final Map> getHeaders() { return headers; diff --git a/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Utils.java b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Utils.java index 459b6c77be8c3..81b4fd3fbb9ee 100644 --- a/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Utils.java +++ b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Utils.java @@ -179,7 +179,7 @@ public boolean hasReferences() { } public static HttpBody.Full fullHttpBodyFrom(final ByteBuf buf) { - return new HttpBody.ByteRefHttpBody(toBytesReference(buf)); + return new HttpBody.ByteRefHttpBody(toReleasableBytesReference(buf)); } public static Recycler createRecycler(Settings settings) { diff --git a/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/NettyAllocator.java b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/NettyAllocator.java index ab38b5f0c4c8c..1eb7e13889338 100644 --- a/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/NettyAllocator.java +++ b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/NettyAllocator.java @@ -24,9 +24,11 @@ import org.elasticsearch.common.recycler.Recycler; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.PageCacheRecycler; +import org.elasticsearch.core.Assertions; import org.elasticsearch.core.Booleans; import org.elasticsearch.monitor.jvm.JvmInfo; +import java.util.Arrays; import java.util.concurrent.atomic.AtomicBoolean; public class NettyAllocator { @@ -44,8 +46,9 @@ public class NettyAllocator { private static final String USE_NETTY_DEFAULT_CHUNK = "es.unsafe.use_netty_default_chunk_and_page_size"; static { + ByteBufAllocator allocator; if (Booleans.parseBoolean(System.getProperty(USE_NETTY_DEFAULT), false)) { - ALLOCATOR = ByteBufAllocator.DEFAULT; + allocator = ByteBufAllocator.DEFAULT; SUGGESTED_MAX_ALLOCATION_SIZE = 1024 * 1024; DESCRIPTION = "[name=netty_default, suggested_max_allocation_size=" + ByteSizeValue.ofBytes(SUGGESTED_MAX_ALLOCATION_SIZE) @@ -127,7 +130,12 @@ public class NettyAllocator { + g1gcRegionSize + "}]"; } - ALLOCATOR = new NoDirectBuffers(delegate); + allocator = new NoDirectBuffers(delegate); + } + if (Assertions.ENABLED) { + ALLOCATOR = new TrashingByteBufAllocator(allocator); + } else { + ALLOCATOR = allocator; } RECYCLER = new Recycler<>() { @@ -353,4 +361,105 @@ public ByteBufAllocator getDelegate() { return delegate; } } + + static class TrashingByteBuf extends WrappedByteBuf { + + private boolean trashed = false; + + protected TrashingByteBuf(ByteBuf buf) { + super(buf); + } + + @Override + public boolean release() { + if (refCnt() == 1) { + // see [NOTE on racy trashContent() calls] + trashContent(); + } + return super.release(); + } + + @Override + public boolean release(int decrement) { + if (refCnt() == decrement && refCnt() > 0) { + // see [NOTE on racy trashContent() calls] + trashContent(); + } + return super.release(decrement); + } + + // [NOTE on racy trashContent() calls]: We trash the buffer content _before_ reducing the ref + // count to zero, which looks racy because in principle a concurrent caller could come along + // and successfully retain() this buffer to keep it alive after it's been trashed. Such a + // caller would sometimes get an IllegalReferenceCountException ofc but that's something it + // could handle - see for instance org.elasticsearch.transport.netty4.Netty4Utils.ByteBufRefCounted.tryIncRef. + // Yet in practice this should never happen, we only ever retain() these buffers while we + // know them to be alive (i.e. via RefCounted#mustIncRef or its moral equivalents) so it'd + // be a bug for a caller to retain() a buffer whose ref count is heading to zero and whose + // contents we've already decided to trash. + private void trashContent() { + if (trashed == false) { + trashed = true; + TrashingByteBufAllocator.trashBuffer(buf); + } + } + } + + static class TrashingCompositeByteBuf extends CompositeByteBuf { + + TrashingCompositeByteBuf(ByteBufAllocator alloc, boolean direct, int maxNumComponents) { + super(alloc, direct, maxNumComponents); + } + + @Override + protected void deallocate() { + TrashingByteBufAllocator.trashBuffer(this); + super.deallocate(); + } + } + + static class TrashingByteBufAllocator extends NoDirectBuffers { + + static int DEFAULT_MAX_COMPONENTS = 16; + + static void trashBuffer(ByteBuf buf) { + for (var nioBuf : buf.nioBuffers()) { + if (nioBuf.hasArray()) { + var from = nioBuf.arrayOffset() + nioBuf.position(); + var to = from + nioBuf.remaining(); + Arrays.fill(nioBuf.array(), from, to, (byte) 0); + } + } + } + + TrashingByteBufAllocator(ByteBufAllocator delegate) { + super(delegate); + } + + @Override + public ByteBuf heapBuffer() { + return new TrashingByteBuf(super.heapBuffer()); + } + + @Override + public ByteBuf heapBuffer(int initialCapacity) { + return new TrashingByteBuf(super.heapBuffer(initialCapacity)); + } + + @Override + public ByteBuf heapBuffer(int initialCapacity, int maxCapacity) { + return new TrashingByteBuf(super.heapBuffer(initialCapacity, maxCapacity)); + } + + @Override + public CompositeByteBuf compositeHeapBuffer() { + return new TrashingCompositeByteBuf(this, false, DEFAULT_MAX_COMPONENTS); + } + + @Override + public CompositeByteBuf compositeHeapBuffer(int maxNumComponents) { + return new TrashingCompositeByteBuf(this, false, maxNumComponents); + } + + } } diff --git a/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/WrappedByteBuf.java b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/WrappedByteBuf.java new file mode 100644 index 0000000000000..50841cec000f1 --- /dev/null +++ b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/WrappedByteBuf.java @@ -0,0 +1,1036 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.transport.netty4; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.util.ByteProcessor; +import io.netty.util.internal.ObjectUtil; +import io.netty.util.internal.StringUtil; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.FileChannel; +import java.nio.channels.GatheringByteChannel; +import java.nio.channels.ScatteringByteChannel; +import java.nio.charset.Charset; + +/** + * A copy of Netty's WrappedByteBuf. + */ +class WrappedByteBuf extends ByteBuf { + + protected final ByteBuf buf; + + protected WrappedByteBuf(ByteBuf buf) { + this.buf = ObjectUtil.checkNotNull(buf, "buf"); + } + + @Override + public final boolean hasMemoryAddress() { + return buf.hasMemoryAddress(); + } + + @Override + public boolean isContiguous() { + return buf.isContiguous(); + } + + @Override + public final long memoryAddress() { + return buf.memoryAddress(); + } + + @Override + public final int capacity() { + return buf.capacity(); + } + + @Override + public ByteBuf capacity(int newCapacity) { + buf.capacity(newCapacity); + return this; + } + + @Override + public final int maxCapacity() { + return buf.maxCapacity(); + } + + @Override + public final ByteBufAllocator alloc() { + return buf.alloc(); + } + + @Override + public final ByteOrder order() { + return buf.order(); + } + + @Override + public ByteBuf order(ByteOrder endianness) { + return buf.order(endianness); + } + + @Override + public final ByteBuf unwrap() { + return buf; + } + + @Override + public ByteBuf asReadOnly() { + return buf.asReadOnly(); + } + + @Override + public boolean isReadOnly() { + return buf.isReadOnly(); + } + + @Override + public final boolean isDirect() { + return buf.isDirect(); + } + + @Override + public final int readerIndex() { + return buf.readerIndex(); + } + + @Override + public final ByteBuf readerIndex(int readerIndex) { + buf.readerIndex(readerIndex); + return this; + } + + @Override + public final int writerIndex() { + return buf.writerIndex(); + } + + @Override + public final ByteBuf writerIndex(int writerIndex) { + buf.writerIndex(writerIndex); + return this; + } + + @Override + public ByteBuf setIndex(int readerIndex, int writerIndex) { + buf.setIndex(readerIndex, writerIndex); + return this; + } + + @Override + public final int readableBytes() { + return buf.readableBytes(); + } + + @Override + public final int writableBytes() { + return buf.writableBytes(); + } + + @Override + public final int maxWritableBytes() { + return buf.maxWritableBytes(); + } + + @Override + public int maxFastWritableBytes() { + return buf.maxFastWritableBytes(); + } + + @Override + public final boolean isReadable() { + return buf.isReadable(); + } + + @Override + public final boolean isWritable() { + return buf.isWritable(); + } + + @Override + public final ByteBuf clear() { + buf.clear(); + return this; + } + + @Override + public final ByteBuf markReaderIndex() { + buf.markReaderIndex(); + return this; + } + + @Override + public final ByteBuf resetReaderIndex() { + buf.resetReaderIndex(); + return this; + } + + @Override + public final ByteBuf markWriterIndex() { + buf.markWriterIndex(); + return this; + } + + @Override + public final ByteBuf resetWriterIndex() { + buf.resetWriterIndex(); + return this; + } + + @Override + public ByteBuf discardReadBytes() { + buf.discardReadBytes(); + return this; + } + + @Override + public ByteBuf discardSomeReadBytes() { + buf.discardSomeReadBytes(); + return this; + } + + @Override + public ByteBuf ensureWritable(int minWritableBytes) { + buf.ensureWritable(minWritableBytes); + return this; + } + + @Override + public int ensureWritable(int minWritableBytes, boolean force) { + return buf.ensureWritable(minWritableBytes, force); + } + + @Override + public boolean getBoolean(int index) { + return buf.getBoolean(index); + } + + @Override + public byte getByte(int index) { + return buf.getByte(index); + } + + @Override + public short getUnsignedByte(int index) { + return buf.getUnsignedByte(index); + } + + @Override + public short getShort(int index) { + return buf.getShort(index); + } + + @Override + public short getShortLE(int index) { + return buf.getShortLE(index); + } + + @Override + public int getUnsignedShort(int index) { + return buf.getUnsignedShort(index); + } + + @Override + public int getUnsignedShortLE(int index) { + return buf.getUnsignedShortLE(index); + } + + @Override + public int getMedium(int index) { + return buf.getMedium(index); + } + + @Override + public int getMediumLE(int index) { + return buf.getMediumLE(index); + } + + @Override + public int getUnsignedMedium(int index) { + return buf.getUnsignedMedium(index); + } + + @Override + public int getUnsignedMediumLE(int index) { + return buf.getUnsignedMediumLE(index); + } + + @Override + public int getInt(int index) { + return buf.getInt(index); + } + + @Override + public int getIntLE(int index) { + return buf.getIntLE(index); + } + + @Override + public long getUnsignedInt(int index) { + return buf.getUnsignedInt(index); + } + + @Override + public long getUnsignedIntLE(int index) { + return buf.getUnsignedIntLE(index); + } + + @Override + public long getLong(int index) { + return buf.getLong(index); + } + + @Override + public long getLongLE(int index) { + return buf.getLongLE(index); + } + + @Override + public char getChar(int index) { + return buf.getChar(index); + } + + @Override + public float getFloat(int index) { + return buf.getFloat(index); + } + + @Override + public double getDouble(int index) { + return buf.getDouble(index); + } + + @Override + public ByteBuf getBytes(int index, ByteBuf dst) { + buf.getBytes(index, dst); + return this; + } + + @Override + public ByteBuf getBytes(int index, ByteBuf dst, int length) { + buf.getBytes(index, dst, length); + return this; + } + + @Override + public ByteBuf getBytes(int index, ByteBuf dst, int dstIndex, int length) { + buf.getBytes(index, dst, dstIndex, length); + return this; + } + + @Override + public ByteBuf getBytes(int index, byte[] dst) { + buf.getBytes(index, dst); + return this; + } + + @Override + public ByteBuf getBytes(int index, byte[] dst, int dstIndex, int length) { + buf.getBytes(index, dst, dstIndex, length); + return this; + } + + @Override + public ByteBuf getBytes(int index, ByteBuffer dst) { + buf.getBytes(index, dst); + return this; + } + + @Override + public ByteBuf getBytes(int index, OutputStream out, int length) throws IOException { + buf.getBytes(index, out, length); + return this; + } + + @Override + public int getBytes(int index, GatheringByteChannel out, int length) throws IOException { + return buf.getBytes(index, out, length); + } + + @Override + public int getBytes(int index, FileChannel out, long position, int length) throws IOException { + return buf.getBytes(index, out, position, length); + } + + @Override + public CharSequence getCharSequence(int index, int length, Charset charset) { + return buf.getCharSequence(index, length, charset); + } + + @Override + public ByteBuf setBoolean(int index, boolean value) { + buf.setBoolean(index, value); + return this; + } + + @Override + public ByteBuf setByte(int index, int value) { + buf.setByte(index, value); + return this; + } + + @Override + public ByteBuf setShort(int index, int value) { + buf.setShort(index, value); + return this; + } + + @Override + public ByteBuf setShortLE(int index, int value) { + buf.setShortLE(index, value); + return this; + } + + @Override + public ByteBuf setMedium(int index, int value) { + buf.setMedium(index, value); + return this; + } + + @Override + public ByteBuf setMediumLE(int index, int value) { + buf.setMediumLE(index, value); + return this; + } + + @Override + public ByteBuf setInt(int index, int value) { + buf.setInt(index, value); + return this; + } + + @Override + public ByteBuf setIntLE(int index, int value) { + buf.setIntLE(index, value); + return this; + } + + @Override + public ByteBuf setLong(int index, long value) { + buf.setLong(index, value); + return this; + } + + @Override + public ByteBuf setLongLE(int index, long value) { + buf.setLongLE(index, value); + return this; + } + + @Override + public ByteBuf setChar(int index, int value) { + buf.setChar(index, value); + return this; + } + + @Override + public ByteBuf setFloat(int index, float value) { + buf.setFloat(index, value); + return this; + } + + @Override + public ByteBuf setDouble(int index, double value) { + buf.setDouble(index, value); + return this; + } + + @Override + public ByteBuf setBytes(int index, ByteBuf src) { + buf.setBytes(index, src); + return this; + } + + @Override + public ByteBuf setBytes(int index, ByteBuf src, int length) { + buf.setBytes(index, src, length); + return this; + } + + @Override + public ByteBuf setBytes(int index, ByteBuf src, int srcIndex, int length) { + buf.setBytes(index, src, srcIndex, length); + return this; + } + + @Override + public ByteBuf setBytes(int index, byte[] src) { + buf.setBytes(index, src); + return this; + } + + @Override + public ByteBuf setBytes(int index, byte[] src, int srcIndex, int length) { + buf.setBytes(index, src, srcIndex, length); + return this; + } + + @Override + public ByteBuf setBytes(int index, ByteBuffer src) { + buf.setBytes(index, src); + return this; + } + + @Override + public int setBytes(int index, InputStream in, int length) throws IOException { + return buf.setBytes(index, in, length); + } + + @Override + public int setBytes(int index, ScatteringByteChannel in, int length) throws IOException { + return buf.setBytes(index, in, length); + } + + @Override + public int setBytes(int index, FileChannel in, long position, int length) throws IOException { + return buf.setBytes(index, in, position, length); + } + + @Override + public ByteBuf setZero(int index, int length) { + buf.setZero(index, length); + return this; + } + + @Override + public int setCharSequence(int index, CharSequence sequence, Charset charset) { + return buf.setCharSequence(index, sequence, charset); + } + + @Override + public boolean readBoolean() { + return buf.readBoolean(); + } + + @Override + public byte readByte() { + return buf.readByte(); + } + + @Override + public short readUnsignedByte() { + return buf.readUnsignedByte(); + } + + @Override + public short readShort() { + return buf.readShort(); + } + + @Override + public short readShortLE() { + return buf.readShortLE(); + } + + @Override + public int readUnsignedShort() { + return buf.readUnsignedShort(); + } + + @Override + public int readUnsignedShortLE() { + return buf.readUnsignedShortLE(); + } + + @Override + public int readMedium() { + return buf.readMedium(); + } + + @Override + public int readMediumLE() { + return buf.readMediumLE(); + } + + @Override + public int readUnsignedMedium() { + return buf.readUnsignedMedium(); + } + + @Override + public int readUnsignedMediumLE() { + return buf.readUnsignedMediumLE(); + } + + @Override + public int readInt() { + return buf.readInt(); + } + + @Override + public int readIntLE() { + return buf.readIntLE(); + } + + @Override + public long readUnsignedInt() { + return buf.readUnsignedInt(); + } + + @Override + public long readUnsignedIntLE() { + return buf.readUnsignedIntLE(); + } + + @Override + public long readLong() { + return buf.readLong(); + } + + @Override + public long readLongLE() { + return buf.readLongLE(); + } + + @Override + public char readChar() { + return buf.readChar(); + } + + @Override + public float readFloat() { + return buf.readFloat(); + } + + @Override + public double readDouble() { + return buf.readDouble(); + } + + @Override + public ByteBuf readBytes(int length) { + return buf.readBytes(length); + } + + @Override + public ByteBuf readSlice(int length) { + return buf.readSlice(length); + } + + @Override + public ByteBuf readRetainedSlice(int length) { + return buf.readRetainedSlice(length); + } + + @Override + public ByteBuf readBytes(ByteBuf dst) { + buf.readBytes(dst); + return this; + } + + @Override + public ByteBuf readBytes(ByteBuf dst, int length) { + buf.readBytes(dst, length); + return this; + } + + @Override + public ByteBuf readBytes(ByteBuf dst, int dstIndex, int length) { + buf.readBytes(dst, dstIndex, length); + return this; + } + + @Override + public ByteBuf readBytes(byte[] dst) { + buf.readBytes(dst); + return this; + } + + @Override + public ByteBuf readBytes(byte[] dst, int dstIndex, int length) { + buf.readBytes(dst, dstIndex, length); + return this; + } + + @Override + public ByteBuf readBytes(ByteBuffer dst) { + buf.readBytes(dst); + return this; + } + + @Override + public ByteBuf readBytes(OutputStream out, int length) throws IOException { + buf.readBytes(out, length); + return this; + } + + @Override + public int readBytes(GatheringByteChannel out, int length) throws IOException { + return buf.readBytes(out, length); + } + + @Override + public int readBytes(FileChannel out, long position, int length) throws IOException { + return buf.readBytes(out, position, length); + } + + @Override + public CharSequence readCharSequence(int length, Charset charset) { + return buf.readCharSequence(length, charset); + } + + @Override + public ByteBuf skipBytes(int length) { + buf.skipBytes(length); + return this; + } + + @Override + public ByteBuf writeBoolean(boolean value) { + buf.writeBoolean(value); + return this; + } + + @Override + public ByteBuf writeByte(int value) { + buf.writeByte(value); + return this; + } + + @Override + public ByteBuf writeShort(int value) { + buf.writeShort(value); + return this; + } + + @Override + public ByteBuf writeShortLE(int value) { + buf.writeShortLE(value); + return this; + } + + @Override + public ByteBuf writeMedium(int value) { + buf.writeMedium(value); + return this; + } + + @Override + public ByteBuf writeMediumLE(int value) { + buf.writeMediumLE(value); + return this; + } + + @Override + public ByteBuf writeInt(int value) { + buf.writeInt(value); + return this; + } + + @Override + public ByteBuf writeIntLE(int value) { + buf.writeIntLE(value); + return this; + } + + @Override + public ByteBuf writeLong(long value) { + buf.writeLong(value); + return this; + } + + @Override + public ByteBuf writeLongLE(long value) { + buf.writeLongLE(value); + return this; + } + + @Override + public ByteBuf writeChar(int value) { + buf.writeChar(value); + return this; + } + + @Override + public ByteBuf writeFloat(float value) { + buf.writeFloat(value); + return this; + } + + @Override + public ByteBuf writeDouble(double value) { + buf.writeDouble(value); + return this; + } + + @Override + public ByteBuf writeBytes(ByteBuf src) { + buf.writeBytes(src); + return this; + } + + @Override + public ByteBuf writeBytes(ByteBuf src, int length) { + buf.writeBytes(src, length); + return this; + } + + @Override + public ByteBuf writeBytes(ByteBuf src, int srcIndex, int length) { + buf.writeBytes(src, srcIndex, length); + return this; + } + + @Override + public ByteBuf writeBytes(byte[] src) { + buf.writeBytes(src); + return this; + } + + @Override + public ByteBuf writeBytes(byte[] src, int srcIndex, int length) { + buf.writeBytes(src, srcIndex, length); + return this; + } + + @Override + public ByteBuf writeBytes(ByteBuffer src) { + buf.writeBytes(src); + return this; + } + + @Override + public int writeBytes(InputStream in, int length) throws IOException { + return buf.writeBytes(in, length); + } + + @Override + public int writeBytes(ScatteringByteChannel in, int length) throws IOException { + return buf.writeBytes(in, length); + } + + @Override + public int writeBytes(FileChannel in, long position, int length) throws IOException { + return buf.writeBytes(in, position, length); + } + + @Override + public ByteBuf writeZero(int length) { + buf.writeZero(length); + return this; + } + + @Override + public int writeCharSequence(CharSequence sequence, Charset charset) { + return buf.writeCharSequence(sequence, charset); + } + + @Override + public int indexOf(int fromIndex, int toIndex, byte value) { + return buf.indexOf(fromIndex, toIndex, value); + } + + @Override + public int bytesBefore(byte value) { + return buf.bytesBefore(value); + } + + @Override + public int bytesBefore(int length, byte value) { + return buf.bytesBefore(length, value); + } + + @Override + public int bytesBefore(int index, int length, byte value) { + return buf.bytesBefore(index, length, value); + } + + @Override + public int forEachByte(ByteProcessor processor) { + return buf.forEachByte(processor); + } + + @Override + public int forEachByte(int index, int length, ByteProcessor processor) { + return buf.forEachByte(index, length, processor); + } + + @Override + public int forEachByteDesc(ByteProcessor processor) { + return buf.forEachByteDesc(processor); + } + + @Override + public int forEachByteDesc(int index, int length, ByteProcessor processor) { + return buf.forEachByteDesc(index, length, processor); + } + + @Override + public ByteBuf copy() { + return buf.copy(); + } + + @Override + public ByteBuf copy(int index, int length) { + return buf.copy(index, length); + } + + @Override + public ByteBuf slice() { + return buf.slice(); + } + + @Override + public ByteBuf retainedSlice() { + return buf.retainedSlice(); + } + + @Override + public ByteBuf slice(int index, int length) { + return buf.slice(index, length); + } + + @Override + public ByteBuf retainedSlice(int index, int length) { + return buf.retainedSlice(index, length); + } + + @Override + public ByteBuf duplicate() { + return buf.duplicate(); + } + + @Override + public ByteBuf retainedDuplicate() { + return buf.retainedDuplicate(); + } + + @Override + public int nioBufferCount() { + return buf.nioBufferCount(); + } + + @Override + public ByteBuffer nioBuffer() { + return buf.nioBuffer(); + } + + @Override + public ByteBuffer nioBuffer(int index, int length) { + return buf.nioBuffer(index, length); + } + + @Override + public ByteBuffer[] nioBuffers() { + return buf.nioBuffers(); + } + + @Override + public ByteBuffer[] nioBuffers(int index, int length) { + return buf.nioBuffers(index, length); + } + + @Override + public ByteBuffer internalNioBuffer(int index, int length) { + return buf.internalNioBuffer(index, length); + } + + @Override + public boolean hasArray() { + return buf.hasArray(); + } + + @Override + public byte[] array() { + return buf.array(); + } + + @Override + public int arrayOffset() { + return buf.arrayOffset(); + } + + @Override + public String toString(Charset charset) { + return buf.toString(charset); + } + + @Override + public String toString(int index, int length, Charset charset) { + return buf.toString(index, length, charset); + } + + @Override + public int hashCode() { + return buf.hashCode(); + } + + @Override + @SuppressWarnings("EqualsWhichDoesntCheckParameterClass") + public boolean equals(Object obj) { + return buf.equals(obj); + } + + @Override + public int compareTo(ByteBuf buffer) { + return buf.compareTo(buffer); + } + + @Override + public String toString() { + return StringUtil.simpleClassName(this) + '(' + buf.toString() + ')'; + } + + @Override + public ByteBuf retain(int increment) { + buf.retain(increment); + return this; + } + + @Override + public ByteBuf retain() { + buf.retain(); + return this; + } + + @Override + public ByteBuf touch() { + buf.touch(); + return this; + } + + @Override + public ByteBuf touch(Object hint) { + buf.touch(hint); + return this; + } + + @Override + public final boolean isReadable(int size) { + return buf.isReadable(size); + } + + @Override + public final boolean isWritable(int size) { + return buf.isWritable(size); + } + + @Override + public final int refCnt() { + return buf.refCnt(); + } + + @Override + public boolean release() { + return buf.release(); + } + + @Override + public boolean release(int decrement) { + return buf.release(decrement); + } + +} diff --git a/modules/transport-netty4/src/test/java/org/elasticsearch/transport/netty4/NettyAllocatorTests.java b/modules/transport-netty4/src/test/java/org/elasticsearch/transport/netty4/NettyAllocatorTests.java new file mode 100644 index 0000000000000..a76eb9fa4875b --- /dev/null +++ b/modules/transport-netty4/src/test/java/org/elasticsearch/transport/netty4/NettyAllocatorTests.java @@ -0,0 +1,106 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.transport.netty4; + +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.Unpooled; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.List; + +import static org.elasticsearch.transport.netty4.NettyAllocator.TrashingByteBuf; +import static org.elasticsearch.transport.netty4.NettyAllocator.TrashingByteBufAllocator; + +public class NettyAllocatorTests extends ESTestCase { + + static void assertBufferTrashed(BytesReference bytesRef) throws IOException { + var iter = bytesRef.iterator(); + BytesRef br; + while ((br = iter.next()) != null) { + for (var i = br.offset; i < br.offset + br.length; i++) { + assertEquals("off=" + br.offset + " len=" + br.length + " i=" + i, 0, br.bytes[i]); + } + } + } + + public void testTrashArrayByteBuf() { + var arr = randomByteArrayOfLength(between(1024, 2048)); + var buf = Unpooled.wrappedBuffer(arr); + var tBuf = new TrashingByteBuf(buf); + tBuf.release(); + var emptyArr = new byte[arr.length]; + assertArrayEquals(emptyArr, arr); + } + + public void testNioBufsTrashingByteBuf() { + var arrCnt = between(1, 16); + var byteArrs = new byte[arrCnt][]; + var byteBufs = new ByteBuffer[arrCnt]; + for (var i = 0; i < arrCnt; i++) { + byteArrs[i] = randomByteArrayOfLength(between(1024, 2048)); + byteBufs[i] = ByteBuffer.wrap(byteArrs[i]); + } + var buf = Unpooled.wrappedBuffer(byteBufs); + var tBuf = new TrashingByteBuf(buf); + tBuf.release(); + for (int i = 0; i < arrCnt; i++) { + for (int j = 0; j < byteArrs[i].length; j++) { + assertEquals(0, byteArrs[i][j]); + } + } + } + + public void testNioBufOffsetTrashingByteBuf() { + var arr = randomByteArrayOfLength(1024); + var off = 1; + var len = arr.length - 2; + arr[0] = 1; + arr[arr.length - 1] = 1; + var buf = Unpooled.wrappedBuffer(arr, off, len); + var tBuf = new TrashingByteBuf(buf); + tBuf.release(); + assertEquals(1, arr[0]); + assertEquals(1, arr[arr.length - 1]); + for (int i = 1; i < arr.length - 1; i++) { + assertEquals("at index " + i, 0, arr[i]); + } + } + + public void testTrashingByteBufAllocator() throws IOException { + var alloc = new TrashingByteBufAllocator(ByteBufAllocator.DEFAULT); + var size = between(1024 * 1024, 10 * 1024 * 1024); + + // use 3 different heap allocation methods + for (var buf : List.of(alloc.heapBuffer(), alloc.heapBuffer(1024), alloc.heapBuffer(1024, size))) { + buf.writeBytes(randomByteArrayOfLength(size)); + var bytesRef = Netty4Utils.toBytesReference(buf); + buf.release(); + assertBufferTrashed(bytesRef); + } + } + + public void testTrashingCompositeByteBuf() throws IOException { + var alloc = new TrashingByteBufAllocator(ByteBufAllocator.DEFAULT); + var compBuf = alloc.compositeHeapBuffer(); + for (var i = 0; i < between(1, 10); i++) { + var buf = alloc.heapBuffer().writeBytes(randomByteArrayOfLength(between(1024, 8192))); + compBuf.addComponent(true, buf); + } + var bytesRef = Netty4Utils.toBytesReference(compBuf); + compBuf.release(); + assertBufferTrashed(bytesRef); + } + +} diff --git a/server/src/main/java/org/elasticsearch/common/bytes/BytesReference.java b/server/src/main/java/org/elasticsearch/common/bytes/BytesReference.java index ddcfc1ea7eed8..51e6512072e41 100644 --- a/server/src/main/java/org/elasticsearch/common/bytes/BytesReference.java +++ b/server/src/main/java/org/elasticsearch/common/bytes/BytesReference.java @@ -74,6 +74,29 @@ static ByteBuffer[] toByteBuffers(BytesReference reference) { } } + /** + * Allocates new buffer and copy bytes from given BytesReference. + * + * @deprecated copying bytes is a right place for performance regression and unnecessary allocations. + * This method exists to serve very few places that struggle to handle reference counted buffers. + */ + @Deprecated(forRemoval = true) + static BytesReference copyBytes(BytesReference bytesReference) { + byte[] arr = new byte[bytesReference.length()]; + int offset = 0; + final BytesRefIterator iterator = bytesReference.iterator(); + try { + BytesRef slice; + while ((slice = iterator.next()) != null) { + System.arraycopy(slice.bytes, slice.offset, arr, offset, slice.length); + offset += slice.length; + } + return new BytesArray(arr); + } catch (IOException e) { + throw new AssertionError(e); + } + } + /** * Returns BytesReference composed of the provided ByteBuffers. */ diff --git a/server/src/main/java/org/elasticsearch/http/HttpBody.java b/server/src/main/java/org/elasticsearch/http/HttpBody.java index a10487502ed3c..6571125677fab 100644 --- a/server/src/main/java/org/elasticsearch/http/HttpBody.java +++ b/server/src/main/java/org/elasticsearch/http/HttpBody.java @@ -9,7 +9,6 @@ package org.elasticsearch.http; -import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.bytes.ReleasableBytesReference; import org.elasticsearch.core.Nullable; @@ -21,11 +20,11 @@ public sealed interface HttpBody extends Releasable permits HttpBody.Full, HttpBody.Stream { static Full fromBytesReference(BytesReference bytesRef) { - return new ByteRefHttpBody(bytesRef); + return new ByteRefHttpBody(ReleasableBytesReference.wrap(bytesRef)); } static Full empty() { - return new ByteRefHttpBody(BytesArray.EMPTY); + return new ByteRefHttpBody(ReleasableBytesReference.empty()); } default boolean isFull() { @@ -56,7 +55,7 @@ default Stream asStream() { * Full content represents a complete http body content that can be accessed immediately. */ non-sealed interface Full extends HttpBody { - BytesReference bytes(); + ReleasableBytesReference bytes(); @Override default void close() {} @@ -114,5 +113,5 @@ interface ChunkHandler extends Releasable { default void close() {} } - record ByteRefHttpBody(BytesReference bytes) implements Full {} + record ByteRefHttpBody(ReleasableBytesReference bytes) implements Full {} } diff --git a/server/src/main/java/org/elasticsearch/http/HttpRequest.java b/server/src/main/java/org/elasticsearch/http/HttpRequest.java index ca6e51f2cec08..b4b1bb84433c9 100644 --- a/server/src/main/java/org/elasticsearch/http/HttpRequest.java +++ b/server/src/main/java/org/elasticsearch/http/HttpRequest.java @@ -52,10 +52,4 @@ enum HttpVersion { */ void release(); - /** - * If this instances uses any pooled resources, creates a copy of this instance that does not use any pooled resources and releases - * any resources associated with this instance. If the instance does not use any shared resources, returns itself. - * @return a safe unpooled http request - */ - HttpRequest releaseAndCopy(); } diff --git a/server/src/main/java/org/elasticsearch/http/HttpTracer.java b/server/src/main/java/org/elasticsearch/http/HttpTracer.java index 3d8360e6ee3fa..d6daf11c0539a 100644 --- a/server/src/main/java/org/elasticsearch/http/HttpTracer.java +++ b/server/src/main/java/org/elasticsearch/http/HttpTracer.java @@ -94,7 +94,7 @@ HttpTracer maybeLogRequest(RestRequest restRequest, @Nullable Exception e) { private void logFullContent(RestRequest restRequest) { try (var stream = HttpBodyTracer.getBodyOutputStream(restRequest.getRequestId(), HttpBodyTracer.Type.REQUEST)) { - restRequest.content().writeTo(stream); + restRequest.releasableContent().writeTo(stream); } catch (Exception e2) { assert false : e2; // no real IO here } diff --git a/server/src/main/java/org/elasticsearch/rest/BaseRestHandler.java b/server/src/main/java/org/elasticsearch/rest/BaseRestHandler.java index f1b59ed14cefb..4564a37dacf4a 100644 --- a/server/src/main/java/org/elasticsearch/rest/BaseRestHandler.java +++ b/server/src/main/java/org/elasticsearch/rest/BaseRestHandler.java @@ -122,6 +122,7 @@ public final void handleRequest(RestRequest request, RestChannel channel, NodeCl ); } + usageCount.increment(); if (request.isStreamedContent()) { assert action instanceof RequestBodyChunkConsumer; var chunkConsumer = (RequestBodyChunkConsumer) action; @@ -137,11 +138,11 @@ public void close() { chunkConsumer.streamClose(); } }); + action.accept(channel); + } else { + action.accept(channel); + request.getHttpRequest().release(); } - - usageCount.increment(); - // execute the action - action.accept(channel); } } diff --git a/server/src/main/java/org/elasticsearch/rest/FilterRestHandler.java b/server/src/main/java/org/elasticsearch/rest/FilterRestHandler.java index cb5155cb0de0b..21a44ac9af5c8 100644 --- a/server/src/main/java/org/elasticsearch/rest/FilterRestHandler.java +++ b/server/src/main/java/org/elasticsearch/rest/FilterRestHandler.java @@ -43,11 +43,6 @@ public boolean canTripCircuitBreaker() { return delegate.canTripCircuitBreaker(); } - @Override - public boolean allowsUnsafeBuffers() { - return delegate.allowsUnsafeBuffers(); - } - @Override public boolean supportsBulkContent() { return delegate.supportsBulkContent(); diff --git a/server/src/main/java/org/elasticsearch/rest/RestController.java b/server/src/main/java/org/elasticsearch/rest/RestController.java index 7446ec5bb6717..49fe794bbe615 100644 --- a/server/src/main/java/org/elasticsearch/rest/RestController.java +++ b/server/src/main/java/org/elasticsearch/rest/RestController.java @@ -432,10 +432,6 @@ private void dispatchRequest( } // iff we could reserve bytes for the request we need to send the response also over this channel responseChannel = new ResourceHandlingHttpChannel(channel, circuitBreakerService, contentLength, methodHandlers); - // TODO: Count requests double in the circuit breaker if they need copying? - if (handler.allowsUnsafeBuffers() == false) { - request.ensureSafeBuffers(); - } if (handler.allowSystemIndexAccessByDefault() == false) { // The ELASTIC_PRODUCT_ORIGIN_HTTP_HEADER indicates that the request is coming from an Elastic product and diff --git a/server/src/main/java/org/elasticsearch/rest/RestHandler.java b/server/src/main/java/org/elasticsearch/rest/RestHandler.java index cf66e402d3691..572e92e369a63 100644 --- a/server/src/main/java/org/elasticsearch/rest/RestHandler.java +++ b/server/src/main/java/org/elasticsearch/rest/RestHandler.java @@ -69,18 +69,6 @@ default Scope getServerlessScope() { return serverlessScope == null ? null : serverlessScope.value(); } - /** - * Indicates if the RestHandler supports working with pooled buffers. If the request handler will not escape the return - * {@link RestRequest#content()} or any buffers extracted from it then there is no need to make a copies of any pooled buffers in the - * {@link RestRequest} instance before passing a request to this handler. If this instance does not support pooled/unsafe buffers - * {@link RestRequest#ensureSafeBuffers()} should be called on any request before passing it to {@link #handleRequest}. - * - * @return true iff the handler supports requests that make use of pooled buffers - */ - default boolean allowsUnsafeBuffers() { - return false; - } - /** * The list of {@link Route}s that this RestHandler is responsible for handling. */ diff --git a/server/src/main/java/org/elasticsearch/rest/RestRequest.java b/server/src/main/java/org/elasticsearch/rest/RestRequest.java index 17eda305b5ccf..17d85a8eabb1c 100644 --- a/server/src/main/java/org/elasticsearch/rest/RestRequest.java +++ b/server/src/main/java/org/elasticsearch/rest/RestRequest.java @@ -16,17 +16,21 @@ import org.elasticsearch.common.ValidationException; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.bytes.ReleasableBytesReference; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.Booleans; import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.RestApiVersion; +import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.core.TimeValue; import org.elasticsearch.core.Tuple; import org.elasticsearch.http.HttpBody; import org.elasticsearch.http.HttpChannel; import org.elasticsearch.http.HttpRequest; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; import org.elasticsearch.telemetry.tracing.Traceable; import org.elasticsearch.xcontent.ParsedMediaType; import org.elasticsearch.xcontent.ToXContent; @@ -51,6 +55,8 @@ public class RestRequest implements ToXContent.Params, Traceable { + private static final Logger logger = LogManager.getLogger(RestRequest.class); + /** * Internal marker request parameter to indicate that a request was made in serverless mode. Use this parameter, together with * {@link #OPERATOR_REQUEST} if you need to toggle behavior for serverless, for example to enforce partial API restrictions @@ -187,15 +193,6 @@ protected RestRequest(RestRequest other) { } } - /** - * Invoke {@link HttpRequest#releaseAndCopy()} on the http request in this instance and replace a pooled http request - * with an unpooled copy. This is supposed to be used before passing requests to {@link RestHandler} instances that can not safely - * handle http requests that use pooled buffers as determined by {@link RestHandler#allowsUnsafeBuffers()}. - */ - void ensureSafeBuffers() { - httpRequest = httpRequest.releaseAndCopy(); - } - /** * Creates a new REST request. * @@ -306,9 +303,31 @@ public boolean isFullContent() { return httpRequest.body().isFull(); } + /** + * Returns a copy of HTTP content. The copy is GC-managed and does not require reference counting. + * Please use {@link #releasableContent()} to avoid content copy. + */ + @SuppressForbidden(reason = "temporarily support content copy while migrating RestHandlers to ref counted pooled buffers") public BytesReference content() { + return BytesReference.copyBytes(releasableContent()); + } + + /** + * Returns a direct reference to the network buffer containing the request body. The HTTP layers will release their references to this + * buffer as soon as they have finished the synchronous steps of processing the request on the network thread, which will by default + * release the buffer back to the pool where it may be re-used for another request. If you need to keep the buffer alive past the end of + * these synchronous steps, acquire your own reference to this buffer and release it once it's no longer needed. + */ + public ReleasableBytesReference releasableContent() { this.contentConsumed = true; - return httpRequest.body().asFull().bytes(); + var bytes = httpRequest.body().asFull().bytes(); + if (bytes.hasReferences() == false) { + var e = new IllegalStateException("http releasable content accessed after release"); + logger.error(e.getMessage(), e); + assert false : e; + throw e; + } + return bytes; } public boolean isStreamedContent() { @@ -319,18 +338,32 @@ public HttpBody.Stream contentStream() { return httpRequest.body().asStream(); } - /** - * @return content of the request body or throw an exception if the body or content type is missing - */ - public final BytesReference requiredContent() { + private void ensureContent() { if (hasContent() == false) { throw new ElasticsearchParseException("request body is required"); } else if (xContentType.get() == null) { throwValidationException("unknown content type"); } + } + + /** + * @return copy of the request body or throw an exception if the body or content type is missing. + * See {@link #content()}. Please use {@link #requiredReleasableContent()} to avoid content copy. + */ + public final BytesReference requiredContent() { + ensureContent(); return content(); } + /** + * Returns reference to the network buffer of HTTP content or throw an exception if the body or content type is missing. + * See {@link #releasableContent()}. It's a recommended method to handle HTTP content without copying it. + */ + public ReleasableBytesReference requiredReleasableContent() { + ensureContent(); + return releasableContent(); + } + private static void throwValidationException(String msg) { ValidationException unknownContentType = new ValidationException(); unknownContentType.addValidationError(msg); diff --git a/server/src/main/java/org/elasticsearch/rest/RestRequestFilter.java b/server/src/main/java/org/elasticsearch/rest/RestRequestFilter.java index e4105363e1bce..57b4d2990c8e0 100644 --- a/server/src/main/java/org/elasticsearch/rest/RestRequestFilter.java +++ b/server/src/main/java/org/elasticsearch/rest/RestRequestFilter.java @@ -12,6 +12,7 @@ import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.bytes.ReleasableBytesReference; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.core.Tuple; @@ -44,10 +45,10 @@ public boolean hasContent() { } @Override - public BytesReference content() { + public ReleasableBytesReference releasableContent() { if (filteredBytes == null) { Tuple> result = XContentHelper.convertToMap( - restRequest.requiredContent(), + restRequest.requiredReleasableContent(), true, restRequest.getXContentType() ); @@ -63,7 +64,7 @@ public BytesReference content() { throw new ElasticsearchException("failed to parse request", e); } } - return filteredBytes; + return ReleasableBytesReference.wrap(filteredBytes); } }; } else { diff --git a/server/src/main/java/org/elasticsearch/rest/action/document/RestBulkAction.java b/server/src/main/java/org/elasticsearch/rest/action/document/RestBulkAction.java index de3fd390ec86d..9428ef5390b2f 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/document/RestBulkAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/document/RestBulkAction.java @@ -10,6 +10,7 @@ package org.elasticsearch.rest.action.document; import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.DocWriteRequest; import org.elasticsearch.action.bulk.BulkRequest; import org.elasticsearch.action.bulk.BulkRequestParser; @@ -102,9 +103,11 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC boolean defaultRequireDataStream = request.paramAsBoolean(DocWriteRequest.REQUIRE_DATA_STREAM, false); bulkRequest.timeout(request.paramAsTime("timeout", BulkShardRequest.DEFAULT_TIMEOUT)); bulkRequest.setRefreshPolicy(request.param("refresh")); + ReleasableBytesReference content = request.requiredReleasableContent(); + try { bulkRequest.add( - request.requiredContent(), + content, defaultIndex, defaultRouting, defaultFetchSourceContext, @@ -119,8 +122,10 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC } catch (Exception e) { return channel -> new RestToXContentListener<>(channel).onFailure(parseFailureException(e)); } - - return channel -> client.bulk(bulkRequest, new RestRefCountedChunkedToXContentListener<>(channel)); + return channel -> { + content.mustIncRef(); + client.bulk(bulkRequest, ActionListener.releaseAfter(new RestRefCountedChunkedToXContentListener<>(channel), content)); + }; } else { String waitForActiveShards = request.param("wait_for_active_shards"); TimeValue timeout = request.paramAsTime("timeout", BulkShardRequest.DEFAULT_TIMEOUT); @@ -270,11 +275,6 @@ public boolean supportsBulkContent() { return true; } - @Override - public boolean allowsUnsafeBuffers() { - return true; - } - @Override public Set supportedCapabilities() { return capabilities; diff --git a/server/src/main/java/org/elasticsearch/rest/action/document/RestIndexAction.java b/server/src/main/java/org/elasticsearch/rest/action/document/RestIndexAction.java index c2437dcb96fa6..d81ac03492d59 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/document/RestIndexAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/document/RestIndexAction.java @@ -9,12 +9,14 @@ package org.elasticsearch.rest.action.document; +import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.DocWriteRequest; import org.elasticsearch.action.DocWriteResponse; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.support.ActiveShardCount; import org.elasticsearch.client.internal.node.NodeClient; import org.elasticsearch.cluster.metadata.DataStream; +import org.elasticsearch.common.bytes.ReleasableBytesReference; import org.elasticsearch.index.VersionType; import org.elasticsearch.rest.BaseRestHandler; import org.elasticsearch.rest.RestRequest; @@ -104,11 +106,12 @@ public RestChannelConsumer prepareRequest(RestRequest request, final NodeClient @Override public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException { + ReleasableBytesReference source = request.requiredReleasableContent(); IndexRequest indexRequest = new IndexRequest(request.param("index")); indexRequest.id(request.param("id")); indexRequest.routing(request.param("routing")); indexRequest.setPipeline(request.param("pipeline")); - indexRequest.source(request.requiredContent(), request.getXContentType()); + indexRequest.source(source, request.getXContentType()); indexRequest.timeout(request.paramAsTime("timeout", IndexRequest.DEFAULT_TIMEOUT)); indexRequest.setRefreshPolicy(request.param("refresh")); indexRequest.version(RestActions.parseVersion(request)); @@ -126,10 +129,16 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC indexRequest.opType(sOpType); } - return channel -> client.index( - indexRequest, - new RestToXContentListener<>(channel, DocWriteResponse::status, r -> r.getLocation(indexRequest.routing())) - ); + return channel -> { + source.mustIncRef(); + client.index( + indexRequest, + ActionListener.releaseAfter( + new RestToXContentListener<>(channel, DocWriteResponse::status, r -> r.getLocation(indexRequest.routing())), + source + ) + ); + }; } @Override diff --git a/server/src/main/java/org/elasticsearch/rest/action/search/RestSearchAction.java b/server/src/main/java/org/elasticsearch/rest/action/search/RestSearchAction.java index 80a85d3b9b748..ff062084a3cbb 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/search/RestSearchAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/search/RestSearchAction.java @@ -423,8 +423,4 @@ protected Set responseParams() { return RESPONSE_PARAMS; } - @Override - public boolean allowsUnsafeBuffers() { - return true; - } } diff --git a/server/src/test/java/org/elasticsearch/common/bytes/BytesArrayTests.java b/server/src/test/java/org/elasticsearch/common/bytes/BytesArrayTests.java index ad298e7aa8307..3fd8535cd5c27 100644 --- a/server/src/test/java/org/elasticsearch/common/bytes/BytesArrayTests.java +++ b/server/src/test/java/org/elasticsearch/common/bytes/BytesArrayTests.java @@ -107,4 +107,10 @@ public void testGetDoubleLE() { Exception e = expectThrows(ArrayIndexOutOfBoundsException.class, () -> ref.getDoubleLE(9)); assertThat(e.getMessage(), equalTo("Index 9 out of bounds for length 9")); } + + public void testCopyBytes() { + var data = randomByteArrayOfLength(between(1024, 1024 * 1024 * 50)); + var copy = BytesReference.copyBytes(new BytesArray(data)); + assertArrayEquals(data, BytesReference.toBytes(copy)); + } } diff --git a/server/src/test/java/org/elasticsearch/http/TestHttpRequest.java b/server/src/test/java/org/elasticsearch/http/TestHttpRequest.java index 8cd61453a3391..27dc0be673abb 100644 --- a/server/src/test/java/org/elasticsearch/http/TestHttpRequest.java +++ b/server/src/test/java/org/elasticsearch/http/TestHttpRequest.java @@ -85,11 +85,6 @@ public HttpResponse createResponse(RestStatus status, ChunkedRestResponseBodyPar @Override public void release() {} - @Override - public HttpRequest releaseAndCopy() { - return this; - } - @Override public Exception getInboundException() { return null; diff --git a/server/src/test/java/org/elasticsearch/rest/RestControllerTests.java b/server/src/test/java/org/elasticsearch/rest/RestControllerTests.java index b7d38f6f299c7..2fdb3daa26da4 100644 --- a/server/src/test/java/org/elasticsearch/rest/RestControllerTests.java +++ b/server/src/test/java/org/elasticsearch/rest/RestControllerTests.java @@ -906,11 +906,6 @@ public HttpResponse createResponse(RestStatus status, ChunkedRestResponseBodyPar @Override public void release() {} - @Override - public HttpRequest releaseAndCopy() { - return this; - } - @Override public Exception getInboundException() { return null; diff --git a/test/framework/src/main/java/org/elasticsearch/test/rest/FakeRestRequest.java b/test/framework/src/main/java/org/elasticsearch/test/rest/FakeRestRequest.java index 9ddcf39d24d98..0c466b9162eb8 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/rest/FakeRestRequest.java +++ b/test/framework/src/main/java/org/elasticsearch/test/rest/FakeRestRequest.java @@ -138,11 +138,6 @@ public HttpResponse createResponse(RestStatus status, ChunkedRestResponseBodyPar @Override public void release() {} - @Override - public HttpRequest releaseAndCopy() { - return this; - } - @Override public Exception getInboundException() { return inboundException; diff --git a/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/EnterpriseSearchBaseRestHandler.java b/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/EnterpriseSearchBaseRestHandler.java index aa200f7ae9acb..214f9150dfcc5 100644 --- a/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/EnterpriseSearchBaseRestHandler.java +++ b/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/EnterpriseSearchBaseRestHandler.java @@ -32,7 +32,7 @@ protected final BaseRestHandler.RestChannelConsumer prepareRequest(RestRequest r // We need to consume parameters and content from the REST request in order to bypass unrecognized param errors // and return a license error. request.params().keySet().forEach(key -> request.param(key, "")); - request.content(); + request.releasableContent(); return channel -> channel.sendResponse( new RestResponse(channel, LicenseUtils.newComplianceException(this.licenseState, this.product)) ); diff --git a/x-pack/plugin/logstash/src/main/java/org/elasticsearch/xpack/logstash/rest/RestPutPipelineAction.java b/x-pack/plugin/logstash/src/main/java/org/elasticsearch/xpack/logstash/rest/RestPutPipelineAction.java index a9992e168bc66..2ea56b147bf9c 100644 --- a/x-pack/plugin/logstash/src/main/java/org/elasticsearch/xpack/logstash/rest/RestPutPipelineAction.java +++ b/x-pack/plugin/logstash/src/main/java/org/elasticsearch/xpack/logstash/rest/RestPutPipelineAction.java @@ -49,7 +49,7 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli } return restChannel -> { - final String content = request.content().utf8ToString(); + final String content = request.releasableContent().utf8ToString(); client.execute( PutPipelineAction.INSTANCE, new PutPipelineRequest(id, content, request.getXContentType()), diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/audit/AuditUtil.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/audit/AuditUtil.java index 13e3e40887d89..429b632cdac18 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/audit/AuditUtil.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/audit/AuditUtil.java @@ -27,10 +27,11 @@ public class AuditUtil { public static String restRequestContent(RestRequest request) { if (request.hasContent()) { + var content = request.releasableContent(); try { - return XContentHelper.convertToJson(request.content(), false, false, request.getXContentType()); + return XContentHelper.convertToJson(content, false, false, request.getXContentType()); } catch (IOException ioe) { - return "Invalid Format: " + request.content().utf8ToString(); + return "Invalid Format: " + content.utf8ToString(); } } return ""; diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/rest/action/SecurityBaseRestHandler.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/rest/action/SecurityBaseRestHandler.java index f0405e42f1f22..df21f5d4eeb0b 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/rest/action/SecurityBaseRestHandler.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/rest/action/SecurityBaseRestHandler.java @@ -75,7 +75,7 @@ protected final RestChannelConsumer prepareRequest(RestRequest request, NodeClie return innerPrepareRequest(request, client); } else { request.params().keySet().forEach(key -> request.param(key, "")); - request.content(); + request.releasableContent(); // mark content consumed return channel -> channel.sendResponse(new RestResponse(channel, failedFeature)); } } diff --git a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/audit/logfile/LoggingAuditTrailTests.java b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/audit/logfile/LoggingAuditTrailTests.java index 5adc1e351931d..3be40c280874d 100644 --- a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/audit/logfile/LoggingAuditTrailTests.java +++ b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/audit/logfile/LoggingAuditTrailTests.java @@ -2614,7 +2614,7 @@ public void testAuthenticationSuccessRest() throws Exception { checkedFields.put(LoggingAuditTrail.REQUEST_METHOD_FIELD_NAME, request.method().toString()); checkedFields.put(LoggingAuditTrail.REQUEST_ID_FIELD_NAME, requestId); checkedFields.put(LoggingAuditTrail.URL_PATH_FIELD_NAME, "_uri"); - if (includeRequestBody && Strings.hasLength(request.content())) { + if (includeRequestBody && request.hasContent()) { checkedFields.put(LoggingAuditTrail.REQUEST_BODY_FIELD_NAME, request.content().utf8ToString()); } if (params.isEmpty() == false) { @@ -2643,8 +2643,8 @@ public void testAuthenticationSuccessRest() throws Exception { checkedFields.put(LoggingAuditTrail.REQUEST_METHOD_FIELD_NAME, request.method().toString()); checkedFields.put(LoggingAuditTrail.REQUEST_ID_FIELD_NAME, requestId); checkedFields.put(LoggingAuditTrail.URL_PATH_FIELD_NAME, "_uri"); - if (includeRequestBody && Strings.hasLength(request.content())) { - checkedFields.put(LoggingAuditTrail.REQUEST_BODY_FIELD_NAME, request.getHttpRequest().body().asFull().bytes().utf8ToString()); + if (includeRequestBody && request.hasContent()) { + checkedFields.put(LoggingAuditTrail.REQUEST_BODY_FIELD_NAME, request.content().utf8ToString()); } if (params.isEmpty() == false) { checkedFields.put(LoggingAuditTrail.URL_QUERY_FIELD_NAME, "foo=bar&evac=true"); @@ -2672,7 +2672,7 @@ public void testAuthenticationSuccessRest() throws Exception { checkedFields.put(LoggingAuditTrail.REQUEST_METHOD_FIELD_NAME, request.method().toString()); checkedFields.put(LoggingAuditTrail.REQUEST_ID_FIELD_NAME, requestId); checkedFields.put(LoggingAuditTrail.URL_PATH_FIELD_NAME, "_uri"); - if (includeRequestBody && Strings.hasLength(request.content().utf8ToString())) { + if (includeRequestBody && request.hasContent()) { checkedFields.put(LoggingAuditTrail.REQUEST_BODY_FIELD_NAME, request.content().utf8ToString()); } if (params.isEmpty() == false) { From b378a1bb54650247c867329f6bf3265918a89fa0 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 20 Nov 2024 22:09:17 +0000 Subject: [PATCH 02/10] Bump 8.x to 8.18.0 --- .backportrc.json | 4 +- .buildkite/pipelines/intake.yml | 2 +- .buildkite/pipelines/periodic-packaging.yml | 16 + .buildkite/pipelines/periodic.yml | 23 +- .ci/bwcVersions | 1 + .ci/snapshotBwcVersions | 1 + .../reference/migration/migrate_8_18.asciidoc | 20 + docs/reference/migration/migrate_9_0.asciidoc | 548 ++++++------ docs/reference/release-notes/8.18.0.asciidoc | 8 + docs/reference/release-notes/9.0.0.asciidoc | 812 ++++++------------ .../release-notes/highlights.asciidoc | 168 +--- 11 files changed, 619 insertions(+), 984 deletions(-) create mode 100644 docs/reference/migration/migrate_8_18.asciidoc create mode 100644 docs/reference/release-notes/8.18.0.asciidoc diff --git a/.backportrc.json b/.backportrc.json index 03f3f892f9227..20287f0bfc0e6 100644 --- a/.backportrc.json +++ b/.backportrc.json @@ -1,10 +1,10 @@ { "upstream" : "elastic/elasticsearch", - "targetBranchChoices" : [ "main", "8.x", "8.16", "8.15", "8.14", "8.13", "8.12", "8.11", "8.10", "8.9", "8.8", "8.7", "8.6", "8.5", "8.4", "8.3", "8.2", "8.1", "8.0", "7.17", "6.8" ], + "targetBranchChoices" : [ "main", "8.x", "8.17", "8.16", "8.15", "8.14", "8.13", "8.12", "8.11", "8.10", "8.9", "8.8", "8.7", "8.6", "8.5", "8.4", "8.3", "8.2", "8.1", "8.0", "7.17", "6.8" ], "targetPRLabels" : [ "backport" ], "branchLabelMapping" : { "^v9.0.0$" : "main", - "^v8.17.0$" : "8.x", + "^v8.18.0$" : "8.x", "^v(\\d+).(\\d+).\\d+(?:-(?:alpha|beta|rc)\\d+)?$" : "$1.$2" } } diff --git a/.buildkite/pipelines/intake.yml b/.buildkite/pipelines/intake.yml index 19e99852869e6..8935872fdec83 100644 --- a/.buildkite/pipelines/intake.yml +++ b/.buildkite/pipelines/intake.yml @@ -56,7 +56,7 @@ steps: timeout_in_minutes: 300 matrix: setup: - BWC_VERSION: ["8.16.1", "8.17.0", "9.0.0"] + BWC_VERSION: ["8.16.1", "8.17.0", "8.18.0", "9.0.0"] agents: provider: gcp image: family/elasticsearch-ubuntu-2004 diff --git a/.buildkite/pipelines/periodic-packaging.yml b/.buildkite/pipelines/periodic-packaging.yml index 7dd8269f4ffe6..2dbb7f5193af6 100644 --- a/.buildkite/pipelines/periodic-packaging.yml +++ b/.buildkite/pipelines/periodic-packaging.yml @@ -320,6 +320,22 @@ steps: env: BWC_VERSION: 8.17.0 + - label: "{{matrix.image}} / 8.18.0 / packaging-tests-upgrade" + command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v8.18.0 + timeout_in_minutes: 300 + matrix: + setup: + image: + - rocky-8 + - ubuntu-2004 + agents: + provider: gcp + image: family/elasticsearch-{{matrix.image}} + machineType: custom-16-32768 + buildDirectory: /dev/shm/bk + env: + BWC_VERSION: 8.18.0 + - label: "{{matrix.image}} / 9.0.0 / packaging-tests-upgrade" command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v9.0.0 timeout_in_minutes: 300 diff --git a/.buildkite/pipelines/periodic.yml b/.buildkite/pipelines/periodic.yml index 79371d6ddccf5..047e4a3f4f8f6 100644 --- a/.buildkite/pipelines/periodic.yml +++ b/.buildkite/pipelines/periodic.yml @@ -344,6 +344,25 @@ steps: - signal_reason: agent_stop limit: 3 + - label: 8.18.0 / bwc + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true v8.18.0#bwcTest + timeout_in_minutes: 300 + agents: + provider: gcp + image: family/elasticsearch-ubuntu-2004 + machineType: n1-standard-32 + buildDirectory: /dev/shm/bk + preemptible: true + env: + BWC_VERSION: 8.18.0 + retry: + automatic: + - exit_status: "-1" + limit: 3 + signal_reason: none + - signal_reason: agent_stop + limit: 3 + - label: 9.0.0 / bwc command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true v9.0.0#bwcTest timeout_in_minutes: 300 @@ -429,7 +448,7 @@ steps: setup: ES_RUNTIME_JAVA: - openjdk21 - BWC_VERSION: ["8.16.1", "8.17.0", "9.0.0"] + BWC_VERSION: ["8.16.1", "8.17.0", "8.18.0", "9.0.0"] agents: provider: gcp image: family/elasticsearch-ubuntu-2004 @@ -471,7 +490,7 @@ steps: ES_RUNTIME_JAVA: - openjdk21 - openjdk23 - BWC_VERSION: ["8.16.1", "8.17.0", "9.0.0"] + BWC_VERSION: ["8.16.1", "8.17.0", "8.18.0", "9.0.0"] agents: provider: gcp image: family/elasticsearch-ubuntu-2004 diff --git a/.ci/bwcVersions b/.ci/bwcVersions index 85522e47a523f..ac07e14c2a176 100644 --- a/.ci/bwcVersions +++ b/.ci/bwcVersions @@ -17,4 +17,5 @@ BWC_VERSION: - "8.15.4" - "8.16.1" - "8.17.0" + - "8.18.0" - "9.0.0" diff --git a/.ci/snapshotBwcVersions b/.ci/snapshotBwcVersions index 9ea3072021bb3..351c605e6e092 100644 --- a/.ci/snapshotBwcVersions +++ b/.ci/snapshotBwcVersions @@ -1,4 +1,5 @@ BWC_VERSION: - "8.16.1" - "8.17.0" + - "8.18.0" - "9.0.0" diff --git a/docs/reference/migration/migrate_8_18.asciidoc b/docs/reference/migration/migrate_8_18.asciidoc new file mode 100644 index 0000000000000..c989ff9f85b6d --- /dev/null +++ b/docs/reference/migration/migrate_8_18.asciidoc @@ -0,0 +1,20 @@ +[[migrating-8.18]] +== Migrating to 8.18 +++++ +8.18 +++++ + +This section discusses the changes that you need to be aware of when migrating +your application to {es} 8.18. + +See also <> and <>. + +coming::[8.18.0] + + +[discrete] +[[breaking-changes-8.18]] +=== Breaking changes + +There are no breaking changes in {es} 8.18. + diff --git a/docs/reference/migration/migrate_9_0.asciidoc b/docs/reference/migration/migrate_9_0.asciidoc index 6569647fd993e..5048220966bba 100644 --- a/docs/reference/migration/migrate_9_0.asciidoc +++ b/docs/reference/migration/migrate_9_0.asciidoc @@ -1,6 +1,3 @@ -// THIS IS A GENERATED FILE. DO NOT EDIT DIRECTLY. -// The content generated here are is not correct and most has been manually commented out until it can be fixed. -// See ES-9931 for more details. [[migrating-9.0]] == Migrating to 9.0 ++++ @@ -23,204 +20,229 @@ The following changes in {es} 9.0 might affect your applications and prevent them from operating normally. Before upgrading to 9.0, review these changes and take the described steps to mitigate the impact. -// -// [discrete] -// [[breaking_90_analysis_changes]] -// ==== Analysis changes -// -// [[set_lenient_to_true_by_default_when_using_updateable_synonyms]] -// .Set lenient to true by default when using updateable synonyms -// [%collapsible] -// ==== -// *Details* + -// When a `synonym` or `synonym_graph` token filter is configured with `updateable: true`, the default `lenient` -// value will now be `true`. -// -// *Impact* + -// `synonym` or `synonym_graph` token filters configured with `updateable: true` will ignore invalid synonyms by -// default. This prevents shard initialization errors on invalid synonyms. -// ==== -// -// [discrete] -// [[breaking_90_mapping_changes]] -// ==== Mapping changes -// -// [[jdk_locale_database_change]] -// .JDK locale database change -// [%collapsible] -// ==== -// *Details* + -// {es} 8.16 changes the version of the JDK that is included from version 22 to version 23. This changes the locale database that is used by Elasticsearch from the COMPAT database to the CLDR database. This change can cause significant differences to the textual date formats accepted by Elasticsearch, and to calculated week-dates. -// -// If you run {es} 8.16 on JDK version 22 or below, it will use the COMPAT locale database to match the behavior of 8.15. However, starting with {es} 9.0, {es} will use the CLDR database regardless of JDK version it is run on. -// -// *Impact* + -// This affects you if you use custom date formats using textual or week-date field specifiers. If you use date fields or calculated week-dates that change between the COMPAT and CLDR databases, then this change will cause Elasticsearch to reject previously valid date fields as invalid data. You might need to modify your ingest or output integration code to account for the differences between these two JDK versions. -// -// Starting in version 8.15.2, Elasticsearch will log deprecation warnings if you are using date format specifiers that might change on upgrading to JDK 23. These warnings are visible in Kibana. -// -// For detailed guidance, refer to <> and the https://ela.st/jdk-23-locales[Elastic blog]. -// ==== -// -// [discrete] -// [[breaking_90_analysis_changes]] -// ==== Analysis changes -// -// [[snowball_stemmers_have_been_upgraded]] -// .Snowball stemmers have been upgraded -// [%collapsible] -// ==== -// *Details* + -// Lucene 10 ships with an upgrade of its Snowball stemmers. For details see https://github.com/apache/lucene/issues/13209. Users using Snowball stemmers that are experiencing changes in search behaviour on existing data are advised to reindex. -// -// *Impact* + -// The upgrade should generally provide improved stemming results. Small changes in token analysis can lead to mismatches with previously index data, so existing indices using Snowball stemmers as part of their analysis chain should be reindexed. -// ==== -// -// [[german2_snowball_stemmer_an_alias_for_german_stemmer]] -// .The "german2" snowball stemmer is now an alias for the "german" stemmer -// [%collapsible] -// ==== -// *Details* + -// Lucene 10 has merged the improved "german2" snowball language stemmer with the "german" stemmer. For Elasticsearch, "german2" is now a deprecated alias for "german". This may results in slightly different tokens being generated for terms with umlaut substitution (like "ue" for "ü" etc...) -// -// *Impact* + -// Replace usages of "german2" with "german" in analysis configuration. Old indices that use the "german" stemmer should be reindexed if possible. -// ==== -// -// [[persian_analyzer_has_stemmer_by_default]] -// .The 'persian' analyzer has stemmer by default -// [%collapsible] -// ==== -// *Details* + -// Lucene 10 has added a final stemming step to its PersianAnalyzer that Elasticsearch exposes as 'persian' analyzer. Existing indices will keep the old non-stemming behaviour while new indices will see the updated behaviour with added stemming. Users that wish to maintain the non-stemming behaviour need to define their own analyzer as outlined in https://www.elastic.co/guide/en/elasticsearch/reference/8.15/analysis-lang-analyzer.html#persian-analyzer. Users that wish to use the new stemming behaviour for existing indices will have to reindex their data. -// -// *Impact* + -// Indexing with the 'persian' analyzer will produce slightly different tokens. Users should check if this impacts their search results. If they wish to maintain the legacy non-stemming behaviour they can define their own analyzer equivalent as explained in https://www.elastic.co/guide/en/elasticsearch/reference/8.15/analysis-lang-analyzer.html#persian-analyzer. -// ==== -// -// [[korean_dictionary_for_nori_has_been_updated]] -// .The Korean dictionary for Nori has been updated -// [%collapsible] -// ==== -// *Details* + -// Lucene 10 ships with an updated Korean dictionary (mecab-ko-dic-2.1.1). For details see https://github.com/apache/lucene/issues/11452. Users experiencing changes in search behaviour on existing data are advised to reindex. -// -// *Impact* + -// The change is small and should generally provide better analysis results. Existing indices for full-text use cases should be reindexed though. -// ==== -// -// [discrete] -// [[breaking_90_cluster_and_node_setting_changes]] -// ==== Cluster and node setting changes -// -// [[remove_unsupported_legacy_value_for_discovery_type]] -// .Remove unsupported legacy value for `discovery.type` -// [%collapsible] -// ==== -// *Details* + -// Earlier versions of {es} had a `discovery.type` setting which permitted values that referred to legacy discovery types. From v9.0.0 onwards, the only supported values for this setting are `multi-node` (the default) and `single-node`. -// -// *Impact* + -// Remove any value for `discovery.type` from your `elasticsearch.yml` configuration file. -// ==== -// -// [discrete] -// [[breaking_90_es_ql_changes]] -// ==== ES|QL changes -// -// [[esql_entirely_remove_meta_functions]] -// .ESQL: Entirely remove META FUNCTIONS -// [%collapsible] -// ==== -// *Details* + -// Removes an undocumented syntax from ESQL: META FUNCTION. This was never -// reliable or really useful. Consult the documentation instead. -// -// *Impact* + -// Removes an undocumented syntax from ESQL: META FUNCTION -// ==== -// -// [discrete] -// [[breaking_90_rest_api_changes]] -// ==== REST API changes -// -// [[remove_cluster_state_from_cluster_reroute_response]] -// .Remove cluster state from `/_cluster/reroute` response -// [%collapsible] -// ==== -// *Details* + -// The `POST /_cluster/reroute` API no longer returns the cluster state in its response. The `?metric` query parameter to this API now has no effect and its use will be forbidden in a future version. -// -// *Impact* + -// Cease usage of the `?metric` query parameter when calling the `POST /_cluster/reroute` API. -// ==== -// -// [[remove_deprecated_local_attribute_from_alias_apis]] -// .Remove deprecated local attribute from alias APIs -// [%collapsible] -// ==== -// *Details* + -// The following APIs no longer accept the `?local` query parameter: `GET /_alias`, `GET /_aliases`, `GET /_alias/{name}`, `HEAD /_alias/{name}`, `GET /{index}/_alias`, `HEAD /{index}/_alias`, `GET /{index}/_alias/{name}`, `HEAD /{index}/_alias/{name}`, `GET /_cat/aliases`, and `GET /_cat/aliases/{alias}`. This parameter has been deprecated and ignored since version 8.12. -// -// *Impact* + -// Cease usage of the `?local` query parameter when calling the listed APIs. -// ==== -// -// [[reworking_rrf_retriever_to_be_evaluated_during_rewrite_phase]] -// .Reworking RRF retriever to be evaluated during rewrite phase -// [%collapsible] -// ==== -// *Details* + -// In this release (8.16), we have introduced major changes to the retrievers framework -// and how they can be evaluated, focusing mainly on compound retrievers -// like `rrf` and `text_similarity_reranker`, which allowed us to support full -// composability (i.e. any retriever can be nested under any compound retriever), -// as well as supporting additional search features like collapsing, explaining, -// aggregations, and highlighting. -// -// To ensure consistency, and given that this rework is not available until 8.16, -// `rrf` and `text_similarity_reranker` retriever queries would now -// throw an exception in a mixed cluster scenario, where there are nodes -// both in current or later (i.e. >= 8.16) and previous ( <= 8.15) versions. -// -// As part of the rework, we have also removed the `_rank` property from -// the responses of an `rrf` retriever. -// -// *Impact* + -// - Users will not be able to use the `rrf` and `text_similarity_reranker` retrievers in a mixed cluster scenario -// with previous releases (i.e. prior to 8.16), and the request will throw an `IllegalArgumentException`. -// - `_rank` has now been removed from the output of the `rrf` retrievers so trying to directly parse the field -// will throw an exception -// ==== -// -// [[update_data_stream_lifecycle_telemetry_to_track_global_retention]] -// .Update data stream lifecycle telemetry to track global retention -// [%collapsible] -// ==== -// *Details* + -// In this release we introduced global retention settings that fulfil the following criteria: -// -// - a data stream managed by the data stream lifecycle, -// - a data stream that is not an internal data stream. -// -// As a result, we defined different types of retention: -// -// - **data retention**: the retention configured on data stream level by the data stream user or owner -// - **default global retention:** the retention configured by an admin on a cluster level and applied to any -// data stream that doesn't have data retention and fulfils the criteria. -// - **max global retention:** the retention configured by an admin to guard against having long retention periods. -// Any data stream that fulfills the criteria will adhere to the data retention unless it exceeds the max retention, -// in which case the max global retention applies. -// - **effective retention:** the retention that applies on the data stream that fulfill the criteria at a given moment -// in time. It takes into consideration all the retention above and resolves it to the retention that will take effect. -// -// Considering the above changes, having a field named `retention` in the usage API was confusing. For this reason, we -// renamed it to `data_retention` and added telemetry about the other configurations too. -// -// *Impact* + -// Users that use the field `data_lifecycle.retention` should use the `data_lifecycle.data_retention` -// ==== + + +There are no notable breaking changes in {es} 9.0. +But there are some less critical breaking changes. + +[discrete] +[[breaking_90_analysis_changes]] +==== Analysis changes + +[[snowball_stemmers_have_been_upgraded]] +.Snowball stemmers have been upgraded +[%collapsible] +==== +*Details* + +Lucene 10 ships with an upgrade of its Snowball stemmers. For details see https://github.com/apache/lucene/issues/13209. Users using Snowball stemmers that are experiencing changes in search behaviour on existing data are advised to reindex. + +*Impact* + +The upgrade should generally provide improved stemming results. Small changes in token analysis can lead to mismatches with previously index data, so existing indices using Snowball stemmers as part of their analysis chain should be reindexed. +==== + +[[german2_snowball_stemmer_an_alias_for_german_stemmer]] +.The "german2" snowball stemmer is now an alias for the "german" stemmer +[%collapsible] +==== +*Details* + +Lucene 10 has merged the improved "german2" snowball language stemmer with the "german" stemmer. For Elasticsearch, "german2" is now a deprecated alias for "german". This may results in slightly different tokens being generated for terms with umlaut substitution (like "ue" for "ü" etc...) + +*Impact* + +Replace usages of "german2" with "german" in analysis configuration. Old indices that use the "german" stemmer should be reindexed if possible. +==== + +[[persian_analyzer_has_stemmer_by_default]] +.The 'persian' analyzer has stemmer by default +[%collapsible] +==== +*Details* + +Lucene 10 has added a final stemming step to its PersianAnalyzer that Elasticsearch exposes as 'persian' analyzer. Existing indices will keep the old non-stemming behaviour while new indices will see the updated behaviour with added stemming. Users that wish to maintain the non-stemming behaviour need to define their own analyzer as outlined in https://www.elastic.co/guide/en/elasticsearch/reference/8.15/analysis-lang-analyzer.html#persian-analyzer. Users that wish to use the new stemming behaviour for existing indices will have to reindex their data. + +*Impact* + +Indexing with the 'persian' analyzer will produce slightly different tokens. Users should check if this impacts their search results. If they wish to maintain the legacy non-stemming behaviour they can define their own analyzer equivalent as explained in https://www.elastic.co/guide/en/elasticsearch/reference/8.15/analysis-lang-analyzer.html#persian-analyzer. +==== + +[[korean_dictionary_for_nori_has_been_updated]] +.The Korean dictionary for Nori has been updated +[%collapsible] +==== +*Details* + +Lucene 10 ships with an updated Korean dictionary (mecab-ko-dic-2.1.1). For details see https://github.com/apache/lucene/issues/11452. Users experiencing changes in search behaviour on existing data are advised to reindex. + +*Impact* + +The change is small and should generally provide better analysis results. Existing indices for full-text use cases should be reindexed though. +==== + +[discrete] +[[breaking_90_cluster_and_node_setting_changes]] +==== Cluster and node setting changes + +[[minimum_shard_balancer_threshold_1_0]] +.Minimum shard balancer threshold is now 1.0 +[%collapsible] +==== +*Details* + +Earlier versions of {es} accepted any non-negative value for `cluster.routing.allocation.balance.threshold`, but values smaller than `1.0` do not make sense and have been ignored since version 8.6.1. From 9.0.0 these nonsensical values are now forbidden. + +*Impact* + +Do not set `cluster.routing.allocation.balance.threshold` to a value less than `1.0`. +==== + +[[remove_cluster_routing_allocation_disk_watermark_enable_for_single_data_node_setting]] +.Remove `cluster.routing.allocation.disk.watermark.enable_for_single_data_node` setting +[%collapsible] +==== +*Details* + +Prior to 7.8, whenever a cluster had only a single data node, the watermarks would not be respected. In order to change this in 7.8+ in a backwards compatible way, we introduced the `cluster.routing.allocation.disk.watermark.enable_for_single_data_node` node setting. The setting was deprecated in 7.14 and was made to accept only true in 8.0 + +*Impact* + +No known end user impact +==== + +[[remove_deprecated_xpack_searchable_snapshot_allocate_on_rolling_restart_setting]] +.Remove deprecated `xpack.searchable.snapshot.allocate_on_rolling_restart` setting +[%collapsible] +==== +*Details* + +The `xpack.searchable.snapshot.allocate_on_rolling_restart` setting was created as an escape-hatch just in case relying on the `cluster.routing.allocation.enable=primaries` setting for allocating searchable snapshots during rolling restarts had some unintended side-effects. It has been deprecated since 8.2.0. + +*Impact* + +Remove `xpack.searchable.snapshot.allocate_on_rolling_restart` from your settings if present. +==== + +[[remove_unsupported_legacy_value_for_discovery_type]] +.Remove unsupported legacy value for `discovery.type` +[%collapsible] +==== +*Details* + +Earlier versions of {es} had a `discovery.type` setting which permitted values that referred to legacy discovery types. From v9.0.0 onwards, the only supported values for this setting are `multi-node` (the default) and `single-node`. + +*Impact* + +Remove any value for `discovery.type` from your `elasticsearch.yml` configuration file. +==== + +[discrete] +[[breaking_90_ingest_changes]] +==== Ingest changes + +[[remove_ecs_option_on_user_agent_processor]] +.Remove `ecs` option on `user_agent` processor +[%collapsible] +==== +*Details* + +The `user_agent` ingest processor no longer accepts the `ecs` option. (It was previously deprecated and ignored.) + +*Impact* + +Users should stop using the `ecs` option when creating instances of the `user_agent` ingest processor. The option will be removed from existing processors stored in the cluster state on upgrade. +==== + +[[remove_ignored_fallback_option_on_geoip_processor]] +.Remove ignored fallback option on GeoIP processor +[%collapsible] +==== +*Details* + +The option fallback_to_default_databases on the geoip ingest processor has been removed. (It was deprecated and ignored since 8.0.0.) + +*Impact* + +Customers should stop remove the noop fallback_to_default_databases option on any geoip ingest processors. +==== + +[discrete] +[[breaking_90_mapping_changes]] +==== Mapping changes + +[[remove_support_for_type_fields_copy_to_boost_in_metadata_field_definition]] +.Remove support for type, fields, copy_to and boost in metadata field definition +[%collapsible] +==== +*Details* + +The type, fields, copy_to and boost parameters are no longer supported in metadata field definition + +*Impact* + +Users providing type, fields, copy_to or boost as part of metadata field definition should remove them from their mappings. +==== + +[discrete] +[[breaking_90_rest_api_changes]] +==== REST API changes + +[[apply_more_strict_parsing_of_actions_in_bulk_api]] +.Apply more strict parsing of actions in bulk API +[%collapsible] +==== +*Details* + +Previously, the following classes of malformed input were deprecated but not rejected in the action lines of the a bulk request: missing closing brace; additional keys after the action (which were ignored); additional data after the closing brace (which was ignored). They will now be considered errors and rejected. + +*Impact* + +Users must provide well-formed input when using the bulk API. (They can request REST API compatibility with v8 to get the previous behaviour back as an interim measure.) +==== + +[[error_json_structure_has_changed_when_detailed_errors_are_disabled]] +.Error JSON structure has changed when detailed errors are disabled +[%collapsible] +==== +*Details* + +This change modifies the JSON format of error messages returned to REST clients +when detailed messages are turned off. +Previously, JSON returned when an exception occurred, and `http.detailed_errors.enabled: false` was set, +just consisted of a single `"error"` text field with some basic information. +Setting `http.detailed_errors.enabled: true` (the default) changed this field +to an object with more detailed information. +With this change, non-detailed errors now have the same structure as detailed errors. `"error"` will now always +be an object with, at a minimum, a `"type"` and `"reason"` field. Additional fields are included when detailed +errors are enabled. +To use the previous structure for non-detailed errors, use the v8 REST API. + +*Impact* + +If you have set `http.detailed_errors.enabled: false` (the default is `true`) +the structure of JSON when any exceptions occur now matches the structure when +detailed errors are enabled. +To use the previous structure for non-detailed errors, use the v8 REST API. +==== + +[[remove_cluster_state_from_cluster_reroute_response]] +.Remove cluster state from `/_cluster/reroute` response +[%collapsible] +==== +*Details* + +The `POST /_cluster/reroute` API no longer returns the cluster state in its response. The `?metric` query parameter to this API now has no effect and its use will be forbidden in a future version. + +*Impact* + +Cease usage of the `?metric` query parameter when calling the `POST /_cluster/reroute` API. +==== + +[[remove_deprecated_local_attribute_from_alias_apis]] +.Remove deprecated local attribute from alias APIs +[%collapsible] +==== +*Details* + +The following APIs no longer accept the `?local` query parameter: `GET /_alias`, `GET /_aliases`, `GET /_alias/{name}`, `HEAD /_alias/{name}`, `GET /{index}/_alias`, `HEAD /{index}/_alias`, `GET /{index}/_alias/{name}`, `HEAD /{index}/_alias/{name}`, `GET /_cat/aliases`, and `GET /_cat/aliases/{alias}`. This parameter has been deprecated and ignored since version 8.12. + +*Impact* + +Cease usage of the `?local` query parameter when calling the listed APIs. +==== + +[[remove_legacy_params_from_range_query]] +.Remove legacy params from range query +[%collapsible] +==== +*Details* + +The deprecated range query parameters `to`, `from`, `include_lower`, and `include_upper` are no longer supported. + +*Impact* + +Users should use `lt`, `lte`, `gt`, and `gte` query parameters instead. +==== + +[[remove_support_for_deprecated_force_source_highlighting_parameter]] +.Remove support for deprecated `force_source` highlighting parameter +[%collapsible] +==== +*Details* + +The deprecated highlighting `force_source` parameter is no longer supported. + +*Impact* + +Users should remove usages of the `force_source` parameter from their search requests. +==== [discrete] @@ -235,85 +257,45 @@ after upgrading to 9.0. To find out if you are using any deprecated functionality, enable <>. -// -// [discrete] -// [[deprecations_90_analysis]] -// ==== Analysis deprecations -// -// [[deprecate_dutch_kp_lovins_stemmer_as_they_are_removed_in_lucene_10]] -// .Deprecate dutch_kp and lovins stemmer as they are removed in Lucene 10 -// [%collapsible] -// ==== -// *Details* + -// kp, dutch_kp, dutchKp and lovins stemmers are deprecated and will be removed. -// -// *Impact* + -// These stemmers will be removed and will be no longer supported. -// ==== -// -// [[deprecate_edge_ngram_side_parameter]] -// .deprecate `edge_ngram` side parameter -// [%collapsible] -// ==== -// *Details* + -// edge_ngram will no longer accept the side parameter. -// -// *Impact* + -// Users will need to update any usage of edge_ngram token filter that utilizes `side`. If the `back` value was used, they can achieve the same behavior by using the `reverse` token filter. -// ==== -// -// [discrete] -// [[deprecations_90_crud]] -// ==== CRUD deprecations -// -// [[deprecate_dot_prefixed_indices_composable_template_index_patterns]] -// .Deprecate dot-prefixed indices and composable template index patterns -// [%collapsible] -// ==== -// *Details* + -// Indices beginning with a dot '.' are reserved for system and internal indices, and should not be used by and end-user. Additionally, composable index templates that contain patterns for dot-prefixed indices should also be avoided, as these patterns are meant for internal use only. In a future Elasticsearch version, creation of these dot-prefixed indices will no longer be allowed. -// -// *Impact* + -// Requests performing an action that would create an index beginning with a dot (indexing a document, manual creation, reindex), or creating an index template with index patterns beginning with a dot, will contain a deprecation header warning about dot-prefixed indices in the response. -// ==== -// -// [discrete] -// [[deprecations_90_rest_api]] -// ==== REST API deprecations -// -// [[adding_deprecation_warnings_for_rrf_using_rank_sub_searches]] -// .Adding deprecation warnings for rrf using rank and `sub_searches` -// [%collapsible] -// ==== -// *Details* + -// Search API parameter `sub_searches` will no longer be a supported and will be removed in future releases. Similarly, `rrf` can only be used through the specified `retriever` and no longer though the `rank` parameter -// -// *Impact* + -// Requests specifying rrf through `rank` and/or `sub_searches` elements will be disallowed in a future version. Users should instead utilize the new `retriever` parameter. -// ==== -// -// [[deprecate_legacy_params_from_range_query]] -// .Deprecate legacy params from range query -// [%collapsible] -// ==== -// *Details* + -// Range query will not longer accept `to`, `from`, `include_lower`, and `include_upper` parameters. -// -// *Impact* + -// Instead use `gt`, `gte`, `lt` and `lte` parameters. -// ==== -// -// [[inference_api_deprecate_elser_service]] -// .[Inference API] Deprecate elser service -// [%collapsible] -// ==== -// *Details* + -// The `elser` service of the inference API will be removed in an upcoming release. Please use the elasticsearch service instead. -// -// *Impact* + -// In the current version there is no impact. In a future version, users of the `elser` service will no longer be able to use it, and will be required to use the `elasticsearch` service to access elser through the inference API. -// ==== - -// BELOW WAS MANUALLY ADDED TO FIX THE BUILD -include::migrate_9_0/transient-settings-migration-guide.asciidoc[] -//include::migrate_9_0/rest-api-changes.asciidoc[] //see ES-9932 + +[discrete] +[[deprecations_90_mapping]] +==== Mapping deprecations + +[[deprecate_source_mode_in_mappings]] +.Deprecate `_source.mode` in mappings +[%collapsible] +==== +*Details* + +Configuring `_source.mode` in mappings is deprecated and will be removed in future versions. Use `index.mapping.source.mode` index setting instead. + +*Impact* + +Use `index.mapping.source.mode` index setting instead +==== + +[discrete] +[[deprecations_90_rest_api]] +==== REST API deprecations + +[[document_type_deprecated_on_simulate_pipeline_api]] +.Document `_type` deprecated on simulate pipeline API +[%collapsible] +==== +*Details* + +Passing a document with a `_type` property is deprecated in the `/_ingest/pipeline/{id}/_simulate` and `/_ingest/pipeline/_simulate` APIs. + +*Impact* + +Users should already have stopped using mapping types, which were deprecated in {es} 7. This deprecation warning will fire if they specify mapping types on documents pass to the simulate pipeline API. +==== + +[[inference_api_deprecate_elser_service]] +.[Inference API] Deprecate elser service +[%collapsible] +==== +*Details* + +The `elser` service of the inference API will be removed in an upcoming release. Please use the elasticsearch service instead. + +*Impact* + +In the current version there is no impact. In a future version, users of the `elser` service will no longer be able to use it, and will be required to use the `elasticsearch` service to access elser through the inference API. +==== + diff --git a/docs/reference/release-notes/8.18.0.asciidoc b/docs/reference/release-notes/8.18.0.asciidoc new file mode 100644 index 0000000000000..332edfbc23eb7 --- /dev/null +++ b/docs/reference/release-notes/8.18.0.asciidoc @@ -0,0 +1,8 @@ +[[release-notes-8.18.0]] +== {es} version 8.18.0 + +coming[8.18.0] + +Also see <>. + + diff --git a/docs/reference/release-notes/9.0.0.asciidoc b/docs/reference/release-notes/9.0.0.asciidoc index af26fd57385e3..93e5a30cb82f7 100644 --- a/docs/reference/release-notes/9.0.0.asciidoc +++ b/docs/reference/release-notes/9.0.0.asciidoc @@ -1,6 +1,3 @@ -// THIS IS A GENERATED FILE. DO NOT EDIT DIRECTLY. -// The content generated here are is not correct and most has been manually commented out until it can be fixed. -// See ES-9931 for more details. [[release-notes-9.0.0]] == {es} version 9.0.0 @@ -12,546 +9,289 @@ Also see <>. [float] === Breaking changes -// Allocation:: -// * Remove cluster state from `/_cluster/reroute` response {es-pull}114231[#114231] (issue: {es-issue}88978[#88978]) -// -// Analysis:: -// * Set lenient to true by default when using updateable synonyms {es-pull}110901[#110901] -// * Snowball stemmers have been upgraded {es-pull}114146[#114146] -// * The 'german2' stemmer is now an alias for the 'german' snowball stemmer {es-pull}113614[#113614] -// * The 'persian' analyzer has stemmer by default {es-pull}113482[#113482] (issue: {es-issue}113050[#113050]) -// * The Korean dictionary for Nori has been updated {es-pull}114124[#114124] -// -// Cluster Coordination:: -// * Remove unsupported legacy value for `discovery.type` {es-pull}112903[#112903] -// -// Data streams:: -// * Update data stream lifecycle telemetry to track global retention {es-pull}112451[#112451] -// -// ES|QL:: -// * ESQL: Entirely remove META FUNCTIONS {es-pull}113967[#113967] -// -// Indices APIs:: -// * Remove deprecated local attribute from alias APIs {es-pull}115393[#115393] -// -// Mapping:: -// * JDK locale database change {es-pull}113975[#113975] -// -// Search:: -// * Adding breaking change entry for retrievers {es-pull}115399[#115399] +Allocation:: +* Increase minimum threshold in shard balancer {es-pull}115831[#115831] +* Remove `cluster.routing.allocation.disk.watermark.enable_for_single_data_node` setting {es-pull}114207[#114207] +* Remove cluster state from `/_cluster/reroute` response {es-pull}114231[#114231] (issue: {es-issue}88978[#88978]) + +Analysis:: +* Snowball stemmers have been upgraded {es-pull}114146[#114146] +* The 'german2' stemmer is now an alias for the 'german' snowball stemmer {es-pull}113614[#113614] +* The 'persian' analyzer has stemmer by default {es-pull}113482[#113482] (issue: {es-issue}113050[#113050]) +* The Korean dictionary for Nori has been updated {es-pull}114124[#114124] + +Cluster Coordination:: +* Remove unsupported legacy value for `discovery.type` {es-pull}112903[#112903] + +Highlighting:: +* Remove support for deprecated `force_source` highlighting parameter {es-pull}116943[#116943] + +Indices APIs:: +* Apply more strict parsing of actions in bulk API {es-pull}115923[#115923] +* Remove deprecated local attribute from alias APIs {es-pull}115393[#115393] + +Infra/REST API:: +* Output a consistent format when generating error json {es-pull}90529[#90529] (issue: {es-issue}89387[#89387]) + +Ingest Node:: +* Remove `ecs` option on `user_agent` processor {es-pull}116077[#116077] +* Remove ignored fallback option on GeoIP processor {es-pull}116112[#116112] + +Mapping:: +* Remove support for type, fields, `copy_to` and boost in metadata field definition {es-pull}116944[#116944] + +Search:: +* Remove legacy params from range query {es-pull}116970[#116970] + +Snapshot/Restore:: +* Remove deprecated `xpack.searchable.snapshot.allocate_on_rolling_restart` setting {es-pull}114202[#114202] [[bug-9.0.0]] [float] === Bug fixes -// -// Aggregations:: -// * Always check the parent breaker with zero bytes in `PreallocatedCircuitBreakerService` {es-pull}115181[#115181] -// * Force using the last centroid during merging {es-pull}111644[#111644] (issue: {es-issue}111065[#111065]) -// -// Authentication:: -// * Check for disabling own user in Put User API {es-pull}112262[#112262] (issue: {es-issue}90205[#90205]) -// * Expose cluster-state role mappings in APIs {es-pull}114951[#114951] -// -// Authorization:: -// * Fix DLS & FLS sometimes being enforced when it is disabled {es-pull}111915[#111915] (issue: {es-issue}94709[#94709]) -// * Fix DLS using runtime fields and synthetic source {es-pull}112341[#112341] -// -// CRUD:: -// * Don't fail retention lease sync actions due to capacity constraints {es-pull}109414[#109414] (issue: {es-issue}105926[#105926]) -// * Preserve thread context when waiting for segment generation in RTG {es-pull}114623[#114623] -// * Standardize error code when bulk body is invalid {es-pull}114869[#114869] -// -// Cluster Coordination:: -// * Ensure clean thread context in `MasterService` {es-pull}114512[#114512] -// -// Data streams:: -// * Adding support for data streams with a match-all template {es-pull}111311[#111311] (issue: {es-issue}111204[#111204]) -// * Exclude internal data streams from global retention {es-pull}112100[#112100] -// * Fix verbose get data stream API not requiring extra privileges {es-pull}112973[#112973] -// * OTel mappings: avoid metrics to be rejected when attributes are malformed {es-pull}114856[#114856] -// * [otel-data] Add more kubernetes aliases {es-pull}115429[#115429] -// * logs-apm.error-*: define log.level field as keyword {es-pull}112440[#112440] -// -// Distributed:: -// * Handle `InternalSendException` inline for non-forking handlers {es-pull}114375[#114375] -// -// EQL:: -// * Don't use a `BytesStreamOutput` to copy keys in `BytesRefBlockHash` {es-pull}114819[#114819] (issue: {es-issue}114599[#114599]) -// * Fix validation of TEXT fields with case insensitive comparison {es-pull}111238[#111238] (issue: {es-issue}111235[#111235]) -// -// ES|QL:: -// * ESQL: Add Values aggregation tests, fix `ConstantBytesRefBlock` memory handling {es-pull}111367[#111367] -// * ESQL: Align year diffing to the rest of the units in DATE_DIFF: chronological {es-pull}113103[#113103] (issue: {es-issue}112482[#112482]) -// * ESQL: Disable pushdown of WHERE past STATS {es-pull}115308[#115308] (issue: {es-issue}115281[#115281]) -// * ESQL: Fix CASE when conditions are multivalued {es-pull}112401[#112401] (issue: {es-issue}112359[#112359]) -// * ESQL: Fix Double operations returning infinite {es-pull}111064[#111064] (issue: {es-issue}111026[#111026]) -// * ESQL: Fix `REVERSE` with backspace character {es-pull}115245[#115245] (issues: {es-issue}114372[#114372], {es-issue}115227[#115227], {es-issue}115228[#115228]) -// * ESQL: Fix a bug in `MV_PERCENTILE` {es-pull}112218[#112218] (issues: {es-issue}112193[#112193], {es-issue}112180[#112180], {es-issue}112187[#112187], {es-issue}112188[#112188]) -// * ESQL: Fix filtered grouping on ords {es-pull}115312[#115312] (issue: {es-issue}114897[#114897]) -// * ESQL: Fix grammar changes around per agg filtering {es-pull}114848[#114848] -// * ESQL: Fix serialization during `can_match` {es-pull}111779[#111779] (issues: {es-issue}111701[#111701], {es-issue}111726[#111726]) -// * ESQL: Fix synthetic attribute pruning {es-pull}111413[#111413] (issue: {es-issue}105821[#105821]) -// * ESQL: don't lose the original casting error message {es-pull}111968[#111968] (issue: {es-issue}111967[#111967]) -// * ESQL: fix for missing indices error message {es-pull}111797[#111797] (issue: {es-issue}111712[#111712]) -// * ES|QL: Fix stats by constant expression {es-pull}114899[#114899] -// * ES|QL: Restrict sorting for `_source` and counter field types {es-pull}114638[#114638] (issues: {es-issue}114423[#114423], {es-issue}111976[#111976]) -// * ES|QL: better validation for GROK patterns {es-pull}110574[#110574] (issue: {es-issue}110533[#110533]) -// * ES|QL: better validation for RLIKE patterns {es-pull}112489[#112489] (issue: {es-issue}112485[#112485]) -// * ES|QL: better validation of GROK patterns {es-pull}112200[#112200] (issue: {es-issue}112111[#112111]) -// * Fix ST_CENTROID_AGG when no records are aggregated {es-pull}114888[#114888] (issue: {es-issue}106025[#106025]) -// * Fix TDigestState.read CB leaks {es-pull}114303[#114303] (issue: {es-issue}114194[#114194]) -// * Spatial search functions support multi-valued fields in compute engine {es-pull}112063[#112063] (issues: {es-issue}112102[#112102], {es-issue}112505[#112505], {es-issue}110830[#110830]) -// * [ES|QL] Check expression resolved before checking its data type in `ImplicitCasting` {es-pull}113314[#113314] (issue: {es-issue}113242[#113242]) -// * [ES|QL] Simplify patterns for subfields {es-pull}111118[#111118] -// * [ES|QL] Simplify syntax of named parameter for identifier and pattern {es-pull}115061[#115061] -// * [ES|QL] Skip validating remote cluster index names in parser {es-pull}114271[#114271] -// * [ES|QL] Use `RangeQuery` and String in `BinaryComparison` on datetime fields {es-pull}110669[#110669] (issue: {es-issue}107900[#107900]) -// * [ES|QL] add tests for stats by constant {es-pull}110593[#110593] (issue: {es-issue}105383[#105383]) -// * [ES|QL] make named parameter for identifier and pattern snapshot {es-pull}114784[#114784] -// * [ES|QL] validate `mv_sort` order {es-pull}110021[#110021] (issue: {es-issue}109910[#109910]) -// -// Geo:: -// * Fix cases of collections with one point {es-pull}111193[#111193] (issue: {es-issue}110982[#110982]) -// -// Health:: -// * Set `replica_unassigned_buffer_time` in constructor {es-pull}112612[#112612] -// -// ILM+SLM:: -// * Make `SnapshotLifecycleStats` immutable so `SnapshotLifecycleMetadata.EMPTY` isn't changed as side-effect {es-pull}111215[#111215] -// -// Indices APIs:: -// * Revert "Add `ResolvedExpression` wrapper" {es-pull}115317[#115317] -// -// Infra/Core:: -// * Fix max file size check to use `getMaxFileSize` {es-pull}113723[#113723] (issue: {es-issue}113705[#113705]) -// * Guard blob store local directory creation with `doPrivileged` {es-pull}115459[#115459] -// * Handle `BigInteger` in xcontent copy {es-pull}111937[#111937] (issue: {es-issue}111812[#111812]) -// * Report JVM stats for all memory pools (97046) {es-pull}115117[#115117] (issue: {es-issue}97046[#97046]) -// * `ByteArrayStreamInput:` Return -1 when there are no more bytes to read {es-pull}112214[#112214] -// -// Infra/Logging:: -// * Only emit product origin in deprecation log if present {es-pull}111683[#111683] (issue: {es-issue}81757[#81757]) -// -// Infra/Metrics:: -// * Make `randomInstantBetween` always return value in range [minInstant, `maxInstant]` {es-pull}114177[#114177] -// -// Infra/REST API:: -// * Fixed a `NullPointerException` in `_capabilities` API when the `path` parameter is null. {es-pull}113413[#113413] (issue: {es-issue}113413[#113413]) -// -// Infra/Settings:: -// * GET _cluster/settings with include_defaults returns the expected fallback value if defined in elasticsearch.yml {es-pull}110816[#110816] (issue: {es-issue}110815[#110815]) -// -// Ingest Node:: -// * Add warning headers for ingest pipelines containing special characters {es-pull}114837[#114837] (issue: {es-issue}104411[#104411]) -// * Fix IPinfo geolocation schema {es-pull}115147[#115147] -// * Fix `getDatabaseType` for unusual MMDBs {es-pull}112888[#112888] -// * Reducing error-level stack trace logging for normal events in `GeoIpDownloader` {es-pull}114924[#114924] -// -// License:: -// * Fix Start Trial API output acknowledgement header for features {es-pull}111740[#111740] (issue: {es-issue}111739[#111739]) -// * Fix `TokenService` always appearing used in Feature Usage {es-pull}112263[#112263] (issue: {es-issue}61956[#61956]) -// -// Logs:: -// * Do not expand dots when storing objects in ignored source {es-pull}113910[#113910] -// * Fix `ignore_above` handling in synthetic source when index level setting is used {es-pull}113570[#113570] (issue: {es-issue}113538[#113538]) -// * Fix synthetic source for flattened field when used with `ignore_above` {es-pull}113499[#113499] (issue: {es-issue}112044[#112044]) -// -// Machine Learning:: -// * Avoid `ModelAssignment` deadlock {es-pull}109684[#109684] -// * Fix NPE in Get Deployment Stats {es-pull}115404[#115404] -// * Fix bug in ML serverless autoscaling which prevented trained model updates from triggering a scale up {es-pull}110734[#110734] -// * Ignore unrecognized openai sse fields {es-pull}114715[#114715] -// * Mitigate IOSession timeouts {es-pull}115414[#115414] (issues: {es-issue}114385[#114385], {es-issue}114327[#114327], {es-issue}114105[#114105], {es-issue}114232[#114232]) -// * Prevent NPE if model assignment is removed while waiting to start {es-pull}115430[#115430] -// * Send mid-stream errors to users {es-pull}114549[#114549] -// * Temporarily return both `modelId` and `inferenceId` for GET /_inference until we migrate clients to only `inferenceId` {es-pull}111490[#111490] -// * Warn for model load failures if they have a status code <500 {es-pull}113280[#113280] -// * [Inference API] Remove unused Cohere rerank service settings fields in a BWC way {es-pull}110427[#110427] -// * [ML] Create Inference API will no longer return model_id and now only return inference_id {es-pull}112508[#112508] -// -// Mapping:: -// * Fix `MapperBuilderContext#isDataStream` when used in dynamic mappers {es-pull}110554[#110554] -// * Fix synthetic source field names for multi-fields {es-pull}112850[#112850] -// * Retrieve the source for objects and arrays in a separate parsing phase {es-pull}113027[#113027] (issue: {es-issue}112374[#112374]) -// * Two empty mappings now are created equally {es-pull}107936[#107936] (issue: {es-issue}107031[#107031]) -// -// Ranking:: -// * Fix MLTQuery handling of custom term frequencies {es-pull}110846[#110846] -// * Fix RRF validation for `rank_constant` < 1 {es-pull}112058[#112058] -// * Fix score count validation in reranker response {es-pull}111212[#111212] (issue: {es-issue}111202[#111202]) -// -// Search:: -// * Allow for querries on `_tier` to skip shards in the `can_match` phase {es-pull}114990[#114990] (issue: {es-issue}114910[#114910]) -// * Allow out of range term queries for numeric types {es-pull}112916[#112916] -// * Do not exclude empty arrays or empty objects in source filtering {es-pull}112250[#112250] (issue: {es-issue}109668[#109668]) -// * Fix synthetic source handling for `bit` type in `dense_vector` field {es-pull}114407[#114407] (issue: {es-issue}114402[#114402]) -// * Improve DateTime error handling and add some bad date tests {es-pull}112723[#112723] (issue: {es-issue}112190[#112190]) -// * Improve date expression/remote handling in index names {es-pull}112405[#112405] (issue: {es-issue}112243[#112243]) -// * Make "too many clauses" throw IllegalArgumentException to avoid 500s {es-pull}112678[#112678] (issue: {es-issue}112177[#112177]) -// * Make empty string searches be consistent with case (in)sensitivity {es-pull}110833[#110833] -// * Prevent flattening of ordered and unordered interval sources {es-pull}114234[#114234] -// * Remove needless forking to GENERIC in `TransportMultiSearchAction` {es-pull}110796[#110796] -// * Search/Mapping: KnnVectorQueryBuilder support for allowUnmappedFields {es-pull}107047[#107047] (issue: {es-issue}106846[#106846]) -// * Span term query to convert to match no docs when unmapped field is targeted {es-pull}113251[#113251] -// * Speedup `CanMatchPreFilterSearchPhase` constructor {es-pull}110860[#110860] -// * Updated Date Range to Follow Documentation When Assuming Missing Values {es-pull}112258[#112258] (issue: {es-issue}111484[#111484]) -// -// Security:: -// * Updated the transport CA name in Security Auto-Configuration. {es-pull}106520[#106520] (issue: {es-issue}106455[#106455]) -// -// Snapshot/Restore:: -// * Retry throttled snapshot deletions {es-pull}113237[#113237] -// -// TSDB:: -// * Implement `parseBytesRef` for `TimeSeriesRoutingHashFieldType` {es-pull}113373[#113373] (issue: {es-issue}112399[#112399]) -// -// Task Management:: -// * Improve handling of failure to create persistent task {es-pull}114386[#114386] -// -// Transform:: -// * Allow task canceling of validate API calls {es-pull}110951[#110951] -// * Include reason when no nodes are found {es-pull}112409[#112409] (issue: {es-issue}112404[#112404]) -// -// Vector Search:: -// * Fix dim validation for bit `element_type` {es-pull}114533[#114533] -// * Support semantic_text in object fields {es-pull}114601[#114601] (issue: {es-issue}114401[#114401]) -// -// Watcher:: -// * Truncating watcher history if it is too large {es-pull}111245[#111245] (issue: {es-issue}94745[#94745]) -// * Watch Next Run Interval Resets On Shard Move or Node Restart {es-pull}115102[#115102] (issue: {es-issue}111433[#111433]) -// -// [[deprecation-9.0.0]] -// [float] -// === Deprecations -// -// Analysis:: -// * Deprecate dutch_kp and lovins stemmer as they are removed in Lucene 10 {es-pull}113143[#113143] -// * deprecate `edge_ngram` side parameter {es-pull}110829[#110829] -// -// CRUD:: -// * Deprecate dot-prefixed indices and composable template index patterns {es-pull}112571[#112571] -// -// Machine Learning:: -// * [Inference API] Deprecate elser service {es-pull}113216[#113216] -// -// Search:: -// * Adding deprecation warnings for rrf using rank and `sub_searches` {es-pull}114854[#114854] -// * Deprecate legacy params from range query {es-pull}113286[#113286] -// -// [[enhancement-9.0.0]] -// [float] -// === Enhancements -// -// Aggregations:: -// * Account for `DelayedBucket` before reduction {es-pull}113013[#113013] -// * Add protection for OOM during aggregations partial reduction {es-pull}110520[#110520] -// * Deduplicate `BucketOrder` when deserializing {es-pull}112707[#112707] -// * Lower the memory footprint when creating `DelayedBucket` {es-pull}112519[#112519] -// * Reduce heap usage for `AggregatorsReducer` {es-pull}112874[#112874] -// * Remove reduce and `reduceContext` from `DelayedBucket` {es-pull}112547[#112547] -// -// Allocation:: -// * Add link to flood-stage watermark exception message {es-pull}111315[#111315] -// * Always allow rebalancing by default {es-pull}111015[#111015] -// * Only publish desired balance gauges on master {es-pull}115383[#115383] -// -// Application:: -// * [Profiling] add `container.id` field to event index template {es-pull}111969[#111969] -// -// Authorization:: -// * Add manage roles privilege {es-pull}110633[#110633] -// * Add privileges required for CDR misconfiguration features to work on AWS SecurityHub integration {es-pull}112574[#112574] -// * [Security Solution] Add `create_index` to `kibana_system` role for index/DS `.logs-endpoint.action.responses-*` {es-pull}115241[#115241] -// -// CRUD:: -// * Suppress merge-on-recovery for older indices {es-pull}113462[#113462] -// -// Codec:: -// * Remove zstd feature flag for index codec best compression {es-pull}112665[#112665] -// -// Data streams:: -// * Add 'verbose' flag retrieving `maximum_timestamp` for get data stream API {es-pull}112303[#112303] -// * Display effective retention in the relevant data stream APIs {es-pull}112019[#112019] -// * Expose global retention settings via data stream lifecycle API {es-pull}112210[#112210] -// * Make ecs@mappings work with OTel attributes {es-pull}111600[#111600] -// -// Distributed:: -// * Add link to Max Shards Per Node exception message {es-pull}110993[#110993] -// * Use Azure blob batch API to delete blobs in batches {es-pull}114566[#114566] -// -// EQL:: -// * ESQL: Delay construction of warnings {es-pull}114368[#114368] -// -// ES|QL:: -// * Add EXP ES|QL function {es-pull}110879[#110879] -// * Add `CircuitBreaker` to TDigest, Step 3: Connect with ESQL CB {es-pull}113387[#113387] -// * Add `CircuitBreaker` to TDigest, Step 4: Take into account shallow classes size {es-pull}113613[#113613] (issue: {es-issue}113916[#113916]) -// * Collect and display execution metadata for ES|QL cross cluster searches {es-pull}112595[#112595] (issue: {es-issue}112402[#112402]) -// * ESQL: Add support for multivalue fields in Arrow output {es-pull}114774[#114774] -// * ESQL: BUCKET: allow numerical spans as whole numbers {es-pull}111874[#111874] (issues: {es-issue}104646[#104646], {es-issue}109340[#109340], {es-issue}105375[#105375]) -// * ESQL: Have BUCKET generate friendlier intervals {es-pull}111879[#111879] (issue: {es-issue}110916[#110916]) -// * ESQL: Profile more timing information {es-pull}111855[#111855] -// * ESQL: Push down filters even in case of renames in Evals {es-pull}114411[#114411] -// * ESQL: Remove parent from `FieldAttribute` {es-pull}112881[#112881] -// * ESQL: Speed up CASE for some parameters {es-pull}112295[#112295] -// * ESQL: Speed up grouping by bytes {es-pull}114021[#114021] -// * ESQL: Support INLINESTATS grouped on expressions {es-pull}111690[#111690] -// * ESQL: Use less memory in listener {es-pull}114358[#114358] -// * ES|QL: Add support for cached strings in plan serialization {es-pull}112929[#112929] -// * ES|QL: add Telemetry API and track top functions {es-pull}111226[#111226] -// * ES|QL: add metrics for functions {es-pull}114620[#114620] -// * Enhance SORT push-down to Lucene to cover references to fields and ST_DISTANCE function {es-pull}112938[#112938] (issue: {es-issue}109973[#109973]) -// * Siem ea 9521 improve test {es-pull}111552[#111552] -// * Support multi-valued fields in compute engine for ST_DISTANCE {es-pull}114836[#114836] (issue: {es-issue}112910[#112910]) -// * [ESQL] Add `SPACE` function {es-pull}112350[#112350] -// * [ESQL] Add finish() elapsed time to aggregation profiling times {es-pull}113172[#113172] (issue: {es-issue}112950[#112950]) -// * [ESQL] Make query wrapped by `SingleValueQuery` cacheable {es-pull}110116[#110116] -// * [ES|QL] Add hypot function {es-pull}114382[#114382] -// * [ES|QL] Cast mixed numeric types to a common numeric type for Coalesce and In at Analyzer {es-pull}111917[#111917] (issue: {es-issue}111486[#111486]) -// * [ES|QL] Combine Disjunctive CIDRMatch {es-pull}111501[#111501] (issue: {es-issue}105143[#105143]) -// * [ES|QL] Create `Range` in `PushFiltersToSource` for qualified pushable filters on the same field {es-pull}111437[#111437] -// * [ES|QL] Name parameter with leading underscore {es-pull}111950[#111950] (issue: {es-issue}111821[#111821]) -// * [ES|QL] Named parameter for field names and field name patterns {es-pull}112905[#112905] -// * [ES|QL] Validate index name in parser {es-pull}112081[#112081] -// * [ES|QL] add reverse function {es-pull}113297[#113297] -// * [ES|QL] explicit cast a string literal to `date_period` and `time_duration` in arithmetic operations {es-pull}109193[#109193] -// -// Experiences:: -// * Integrate IBM watsonx to Inference API for text embeddings {es-pull}111770[#111770] -// -// Geo:: -// * Add support for spatial relationships in point field mapper {es-pull}112126[#112126] -// * Small performance improvement in h3 library {es-pull}113385[#113385] -// * Support docvalues only query in shape field {es-pull}112199[#112199] -// -// Health:: -// * (API) Cluster Health report `unassigned_primary_shards` {es-pull}112024[#112024] -// * Do not treat replica as unassigned if primary recently created and unassigned time is below a threshold {es-pull}112066[#112066] -// * Increase `replica_unassigned_buffer_time` default from 3s to 5s {es-pull}112834[#112834] -// -// ILM+SLM:: -// * ILM: Add `total_shards_per_node` setting to searchable snapshot {es-pull}112972[#112972] (issue: {es-issue}112261[#112261]) -// * PUT slm policy should only increase version if actually changed {es-pull}111079[#111079] -// * Preserve Step Info Across ILM Auto Retries {es-pull}113187[#113187] -// * Register SLM run before snapshotting to save stats {es-pull}110216[#110216] -// * SLM interval schedule followup - add back `getFieldName` style getters {es-pull}112123[#112123] -// -// Infra/Circuit Breakers:: -// * Add link to Circuit Breaker "Data too large" exception message {es-pull}113561[#113561] -// -// Infra/Core:: -// * Add nanos support to `ZonedDateTime` serialization {es-pull}111689[#111689] (issue: {es-issue}68292[#68292]) -// * Extend logging for dropped warning headers {es-pull}111624[#111624] (issue: {es-issue}90527[#90527]) -// * Give the kibana system user permission to read security entities {es-pull}114363[#114363] -// -// Infra/Metrics:: -// * Add `TaskManager` to `pluginServices` {es-pull}112687[#112687] -// * Add `ensureGreen` test method for use with `adminClient` {es-pull}113425[#113425] -// -// Infra/REST API:: -// * Optimize the loop processing of URL decoding {es-pull}110237[#110237] (issue: {es-issue}110235[#110235]) -// -// Infra/Scripting:: -// * Add a `mustache.max_output_size_bytes` setting to limit the length of results from mustache scripts {es-pull}114002[#114002] -// * Expose `HexFormat` in Painless {es-pull}112412[#112412] -// -// Infra/Settings:: -// * Improve exception message for bad environment variable placeholders in settings {es-pull}114552[#114552] (issue: {es-issue}110858[#110858]) -// * Reprocess operator file settings when settings service starts, due to node restart or master node change {es-pull}114295[#114295] -// -// Ingest Node:: -// * Add `size_in_bytes` to enrich cache stats {es-pull}110578[#110578] -// * Add support for templates when validating mappings in the simulate ingest API {es-pull}111161[#111161] -// * Adding `index_template_substitutions` to the simulate ingest API {es-pull}114128[#114128] -// * Adding component template substitutions to the simulate ingest API {es-pull}113276[#113276] -// * Adding mapping validation to the simulate ingest API {es-pull}110606[#110606] -// * Adding support for additional mapping to simulate ingest API {es-pull}114742[#114742] -// * Adding support for simulate ingest mapping adddition for indices with mappings that do not come from templates {es-pull}115359[#115359] -// * Adds example plugin for custom ingest processor {es-pull}112282[#112282] (issue: {es-issue}111539[#111539]) -// * Fix unnecessary mustache template evaluation {es-pull}110986[#110986] (issue: {es-issue}110191[#110191]) -// * Listing all available databases in the _ingest/geoip/database API {es-pull}113498[#113498] -// * Make enrich cache based on memory usage {es-pull}111412[#111412] (issue: {es-issue}106081[#106081]) -// * Tag redacted document in ingest metadata {es-pull}113552[#113552] -// * Verify Maxmind database types in the geoip processor {es-pull}114527[#114527] -// -// Logs:: -// * Add validation for synthetic source mode in logs mode indices {es-pull}110677[#110677] -// * Store original source for keywords using a normalizer {es-pull}112151[#112151] -// -// Machine Learning:: -// * Add Completion Inference API for Alibaba Cloud AI Search Model {es-pull}112512[#112512] -// * Add DeBERTa-V2/V3 tokenizer {es-pull}111852[#111852] -// * Add Streaming Inference spec {es-pull}113812[#113812] -// * Add chunking settings configuration to `CohereService,` `AmazonBedrockService,` and `AzureOpenAiService` {es-pull}113897[#113897] -// * Add chunking settings configuration to `ElasticsearchService/ELSER` {es-pull}114429[#114429] -// * Add custom rule parameters to force time shift {es-pull}110974[#110974] -// * Adding chunking settings to `GoogleVertexAiService,` `AzureAiStudioService,` and `AlibabaCloudSearchService` {es-pull}113981[#113981] -// * Adding chunking settings to `MistralService,` `GoogleAiStudioService,` and `HuggingFaceService` {es-pull}113623[#113623] -// * Adds a new Inference API for streaming responses back to the user. {es-pull}113158[#113158] -// * Create `StreamingHttpResultPublisher` {es-pull}112026[#112026] -// * Create an ml node inference endpoint referencing an existing model {es-pull}114750[#114750] -// * Default inference endpoint for ELSER {es-pull}113873[#113873] -// * Default inference endpoint for the multilingual-e5-small model {es-pull}114683[#114683] -// * Enable OpenAI Streaming {es-pull}113911[#113911] -// * Filter empty task settings objects from the API response {es-pull}114389[#114389] -// * Increase default `queue_capacity` to 10_000 and decrease max `queue_capacity` to 100_000 {es-pull}115041[#115041] -// * Migrate Inference to `ChunkedToXContent` {es-pull}111655[#111655] -// * Register Task while Streaming {es-pull}112369[#112369] -// * Server-Sent Events for Inference response {es-pull}112565[#112565] -// * Stream Anthropic Completion {es-pull}114321[#114321] -// * Stream Azure Completion {es-pull}114464[#114464] -// * Stream Bedrock Completion {es-pull}114732[#114732] -// * Stream Cohere Completion {es-pull}114080[#114080] -// * Stream Google Completion {es-pull}114596[#114596] -// * Stream OpenAI Completion {es-pull}112677[#112677] -// * Support sparse embedding models in the elasticsearch inference service {es-pull}112270[#112270] -// * Switch default chunking strategy to sentence {es-pull}114453[#114453] -// * Upgrade to AWS SDK v2 {es-pull}114309[#114309] (issue: {es-issue}110590[#110590]) -// * Use the same chunking configurations for models in the Elasticsearch service {es-pull}111336[#111336] -// * Validate streaming HTTP Response {es-pull}112481[#112481] -// * Wait for allocation on scale up {es-pull}114719[#114719] -// * [Inference API] Add Alibaba Cloud AI Search Model support to Inference API {es-pull}111181[#111181] -// * [Inference API] Add Docs for AlibabaCloud AI Search Support for the Inference API {es-pull}111181[#111181] -// * [Inference API] Introduce Update API to change some aspects of existing inference endpoints {es-pull}114457[#114457] -// * [Inference API] Prevent inference endpoints from being deleted if they are referenced by semantic text {es-pull}110399[#110399] -// * [Inference API] alibabacloud ai search service support chunk infer to support semantic_text field {es-pull}110399[#110399] -// -// Mapping:: -// * Add Field caps support for Semantic Text {es-pull}111809[#111809] -// * Add Lucene segment-level fields stats {es-pull}111123[#111123] -// * Add Search Inference ID To Semantic Text Mapping {es-pull}113051[#113051] -// * Add object param for keeping synthetic source {es-pull}113690[#113690] -// * Add support for multi-value dimensions {es-pull}112645[#112645] (issue: {es-issue}110387[#110387]) -// * Allow dimension fields to have multiple values in standard and logsdb index mode {es-pull}112345[#112345] (issues: {es-issue}112232[#112232], {es-issue}112239[#112239]) -// * Allow fields with dots in sparse vector field mapper {es-pull}111981[#111981] (issue: {es-issue}109118[#109118]) -// * Allow querying `index_mode` {es-pull}110676[#110676] -// * Configure keeping source in `FieldMapper` {es-pull}112706[#112706] -// * Control storing array source with index setting {es-pull}112397[#112397] -// * Introduce mode `subobjects=auto` for objects {es-pull}110524[#110524] -// * Update `semantic_text` field to support indexing numeric and boolean data types {es-pull}111284[#111284] -// * Use ELSER By Default For Semantic Text {es-pull}113563[#113563] -// * Use fallback synthetic source for `copy_to` and doc_values: false cases {es-pull}112294[#112294] (issues: {es-issue}110753[#110753], {es-issue}110038[#110038], {es-issue}109546[#109546]) -// -// Network:: -// * Add links to network disconnect troubleshooting {es-pull}112330[#112330] -// -// Ranking:: -// * Add timeout and cancellation check to rescore phase {es-pull}115048[#115048] -// -// Recovery:: -// * Trigger merges after recovery {es-pull}113102[#113102] -// -// Relevance:: -// * Add a query rules tester API call {es-pull}114168[#114168] -// -// Search:: -// * Add initial support for `semantic_text` field type {es-pull}113920[#113920] -// * Add more `dense_vector` details for cluster stats field stats {es-pull}113607[#113607] -// * Add range and regexp Intervals {es-pull}111465[#111465] -// * Adding support for `allow_partial_search_results` in PIT {es-pull}111516[#111516] -// * Allow incubating Panama Vector in simdvec, and add vectorized `ipByteBin` {es-pull}112933[#112933] -// * Avoid using concurrent collector manager in `LuceneChangesSnapshot` {es-pull}113816[#113816] -// * Bool query early termination should also consider `must_not` clauses {es-pull}115031[#115031] -// * Deduplicate Kuromoji User Dictionary {es-pull}112768[#112768] -// * Multi term intervals: increase max_expansions {es-pull}112826[#112826] (issue: {es-issue}110491[#110491]) -// * Search coordinator uses `event.ingested` in cluster state to do rewrites {es-pull}111523[#111523] -// * Update cluster stats for retrievers {es-pull}114109[#114109] -// -// Security:: -// * (logger) change from error to warn for short circuiting user {es-pull}112895[#112895] -// * Add asset criticality indices for `kibana_system_user` {es-pull}113588[#113588] -// * Add tier preference to security index settings allowlist {es-pull}111818[#111818] -// * [Service Account] Add `AutoOps` account {es-pull}111316[#111316] -// -// Snapshot/Restore:: -// * Add `max_multipart_parts` setting to S3 repository {es-pull}113989[#113989] -// * Add support for Azure Managed Identity {es-pull}111344[#111344] -// * Add telemetry for repository usage {es-pull}112133[#112133] -// * Add workaround for missing shard gen blob {es-pull}112337[#112337] -// * Clean up dangling S3 multipart uploads {es-pull}111955[#111955] (issues: {es-issue}101169[#101169], {es-issue}44971[#44971]) -// * Execute shard snapshot tasks in shard-id order {es-pull}111576[#111576] (issue: {es-issue}108739[#108739]) -// * Include account name in Azure settings exceptions {es-pull}111274[#111274] -// * Introduce repository integrity verification API {es-pull}112348[#112348] (issue: {es-issue}52622[#52622]) -// * Retry `S3BlobContainer#getRegister` on all exceptions {es-pull}114813[#114813] -// * Track shard snapshot progress during node shutdown {es-pull}112567[#112567] -// -// Stats:: -// * Track search and fetch failure stats {es-pull}113988[#113988] -// -// TSDB:: -// * Add support for boolean dimensions {es-pull}111457[#111457] (issue: {es-issue}111338[#111338]) -// * Stop iterating over all fields to extract @timestamp value {es-pull}110603[#110603] (issue: {es-issue}92297[#92297]) -// * Support booleans in routing path {es-pull}111445[#111445] -// -// Vector Search:: -// * Dense vector field types updatable for int4 {es-pull}110928[#110928] -// * Use native scalar scorer for int8_flat index {es-pull}111071[#111071] -// -// [[feature-9.0.0]] -// [float] -// === New features -// -// Data streams:: -// * Introduce global retention in data stream lifecycle. {es-pull}111972[#111972] -// * X-pack/plugin/otel: introduce x-pack-otel plugin {es-pull}111091[#111091] -// -// ES|QL:: -// * Add ESQL match function {es-pull}113374[#113374] -// * ESQL: Add `MV_PSERIES_WEIGHTED_SUM` for score calculations used by security solution {es-pull}109017[#109017] -// * ESQL: Add async ID and `is_running` headers to ESQL async query {es-pull}111840[#111840] -// * ESQL: Add boolean support to Max and Min aggs {es-pull}110527[#110527] -// * ESQL: Add boolean support to TOP aggregation {es-pull}110718[#110718] -// * ESQL: Added `mv_percentile` function {es-pull}111749[#111749] (issue: {es-issue}111591[#111591]) -// * ESQL: INLINESTATS {es-pull}109583[#109583] (issue: {es-issue}107589[#107589]) -// * ESQL: Introduce per agg filter {es-pull}113735[#113735] -// * ESQL: Strings support for MAX and MIN aggregations {es-pull}111544[#111544] -// * ESQL: Support IP fields in MAX and MIN aggregations {es-pull}110921[#110921] -// * ESQL: TOP aggregation IP support {es-pull}111105[#111105] -// * ESQL: TOP support for strings {es-pull}113183[#113183] (issue: {es-issue}109849[#109849]) -// * ESQL: `mv_median_absolute_deviation` function {es-pull}112055[#112055] (issue: {es-issue}111590[#111590]) -// * Remove snapshot build restriction for match and qstr functions {es-pull}114482[#114482] -// * Search in ES|QL: Add MATCH operator {es-pull}110971[#110971] -// -// ILM+SLM:: -// * SLM Interval based scheduling {es-pull}110847[#110847] -// -// Inference:: -// * EIS integration {es-pull}111154[#111154] -// -// Ingest Node:: -// * Add a `terminate` ingest processor {es-pull}114157[#114157] (issue: {es-issue}110218[#110218]) -// -// Machine Learning:: -// * Inference autoscaling {es-pull}109667[#109667] -// * Telemetry for inference adaptive allocations {es-pull}110630[#110630] -// -// Relevance:: -// * [Query rules] Add `exclude` query rule type {es-pull}111420[#111420] -// -// Search:: -// * Async search: Add ID and "is running" http headers {es-pull}112431[#112431] (issue: {es-issue}109576[#109576]) -// * Cross-cluster search telemetry {es-pull}113825[#113825] -// -// Vector Search:: -// * Adding new bbq index types behind a feature flag {es-pull}114439[#114439] + +Aggregations:: +* Handle with `illegalArgumentExceptions` negative values in HDR percentile aggregations {es-pull}116174[#116174] (issue: {es-issue}115777[#115777]) + +Analysis:: +* Adjust analyze limit exception to be a `bad_request` {es-pull}116325[#116325] + +CCS:: +* Fix long metric deserialize & add - auto-resize needs to be set manually {es-pull}117105[#117105] (issue: {es-issue}116914[#116914]) + +CRUD:: +* Preserve thread context when waiting for segment generation in RTG {es-pull}114623[#114623] +* Standardize error code when bulk body is invalid {es-pull}114869[#114869] + +Data streams:: +* Add missing header in `put_data_lifecycle` rest-api-spec {es-pull}116292[#116292] + +EQL:: +* Don't use a `BytesStreamOutput` to copy keys in `BytesRefBlockHash` {es-pull}114819[#114819] (issue: {es-issue}114599[#114599]) + +ES|QL:: +* Added stricter range type checks and runtime warnings for ENRICH {es-pull}115091[#115091] (issues: {es-issue}107357[#107357], {es-issue}116799[#116799]) +* Don't return TEXT type for functions that take TEXT {es-pull}114334[#114334] (issues: {es-issue}111537[#111537], {es-issue}114333[#114333]) +* ESQL: Fix sorts containing `_source` {es-pull}116980[#116980] (issue: {es-issue}116659[#116659]) +* ESQL: fix the column position in errors {es-pull}117153[#117153] +* ES|QL: Fix stats by constant expression {es-pull}114899[#114899] +* Fix NPE in `EnrichLookupService` on mixed clusters with <8.14 versions {es-pull}116583[#116583] (issues: {es-issue}116529[#116529], {es-issue}116544[#116544]) +* Fix TDigestState.read CB leaks {es-pull}114303[#114303] (issue: {es-issue}114194[#114194]) +* Fixing remote ENRICH by pushing the Enrich inside `FragmentExec` {es-pull}114665[#114665] (issue: {es-issue}105095[#105095]) +* Use `SearchStats` instead of field.isAggregatable in data node planning {es-pull}115744[#115744] (issue: {es-issue}115737[#115737]) +* [ESQL] Fix Binary Comparisons on Date Nanos {es-pull}116346[#116346] +* [ES|QL] To_DatePeriod and To_TimeDuration return better error messages on `union_type` fields {es-pull}114934[#114934] + +Infra/CLI:: +* Fix NPE on plugin sync {es-pull}115640[#115640] (issue: {es-issue}114818[#114818]) + +Infra/Metrics:: +* Make `randomInstantBetween` always return value in range [minInstant, `maxInstant]` {es-pull}114177[#114177] + +Infra/REST API:: +* Fixed a `NullPointerException` in `_capabilities` API when the `path` parameter is null. {es-pull}113413[#113413] (issue: {es-issue}113413[#113413]) + +Infra/Settings:: +* Don't allow secure settings in YML config (109115) {es-pull}115779[#115779] (issue: {es-issue}109115[#109115]) + +Ingest Node:: +* Add warning headers for ingest pipelines containing special characters {es-pull}114837[#114837] (issue: {es-issue}104411[#104411]) +* Reducing error-level stack trace logging for normal events in `GeoIpDownloader` {es-pull}114924[#114924] + +Logs:: +* Always check if index mode is logsdb {es-pull}116922[#116922] +* Prohibit changes to index mode, source, and sort settings during resize {es-pull}115812[#115812] + +Machine Learning:: +* Fix bug in ML autoscaling when some node info is unavailable {es-pull}116650[#116650] +* Fix deberta tokenizer bug caused by bug in normalizer {es-pull}117189[#117189] +* Hides `hugging_face_elser` service from the `GET _inference/_services API` {es-pull}116664[#116664] (issue: {es-issue}116644[#116644]) +* Mitigate IOSession timeouts {es-pull}115414[#115414] (issues: {es-issue}114385[#114385], {es-issue}114327[#114327], {es-issue}114105[#114105], {es-issue}114232[#114232]) +* Propagate scoring function through random sampler {es-pull}116957[#116957] (issue: {es-issue}110134[#110134]) +* Update Deberta tokenizer {es-pull}116358[#116358] +* Wait for up to 2 seconds for yellow status before starting search {es-pull}115938[#115938] (issues: {es-issue}107777[#107777], {es-issue}105955[#105955], {es-issue}107815[#107815], {es-issue}112191[#112191]) + +Mapping:: +* Change synthetic source logic for `constant_keyword` {es-pull}117182[#117182] (issue: {es-issue}117083[#117083]) +* Ignore conflicting fields during dynamic mapping update {es-pull}114227[#114227] (issue: {es-issue}114228[#114228]) + +Network:: +* Use underlying `ByteBuf` `refCount` for `ReleasableBytesReference` {es-pull}116211[#116211] + +Ranking:: +* Propagating nested `inner_hits` to the parent compound retriever {es-pull}116408[#116408] (issue: {es-issue}116397[#116397]) + +Relevance:: +* Fix handling of bulk requests with semantic text fields and delete ops {es-pull}116942[#116942] + +Search:: +* Catch and handle disconnect exceptions in search {es-pull}115836[#115836] +* Fields caps does not honour ignore_unavailable {es-pull}116021[#116021] (issue: {es-issue}107767[#107767]) +* Fix handling of time exceeded exception in fetch phase {es-pull}116676[#116676] +* Fix leak in `DfsQueryPhase` and introduce search disconnect stress test {es-pull}116060[#116060] (issue: {es-issue}115056[#115056]) +* Inconsistency in the _analyzer api when the index is not included {es-pull}115930[#115930] +* Semantic text simple partial update {es-pull}116478[#116478] +* Updated Date Range to Follow Documentation When Assuming Missing Values {es-pull}112258[#112258] (issue: {es-issue}111484[#111484]) +* Validate missing shards after the coordinator rewrite {es-pull}116382[#116382] +* _validate does not honour ignore_unavailable {es-pull}116656[#116656] (issue: {es-issue}116594[#116594]) + +Snapshot/Restore:: +* Retry throttled snapshot deletions {es-pull}113237[#113237] + +Vector Search:: +* Update Semantic Query To Handle Zero Size Responses {es-pull}116277[#116277] (issue: {es-issue}116083[#116083]) + +Watcher:: +* Watch Next Run Interval Resets On Shard Move or Node Restart {es-pull}115102[#115102] (issue: {es-issue}111433[#111433]) + +[[deprecation-9.0.0]] +[float] +=== Deprecations + +Ingest Node:: +* Fix `_type` deprecation on simulate pipeline API {es-pull}116259[#116259] + +Machine Learning:: +* [Inference API] Deprecate elser service {es-pull}113216[#113216] + +Mapping:: +* Deprecate `_source.mode` in mappings {es-pull}116689[#116689] + +[[enhancement-9.0.0]] +[float] +=== Enhancements + +Allocation:: +* Only publish desired balance gauges on master {es-pull}115383[#115383] + +Authorization:: +* Add a `monitor_stats` privilege and allow that privilege for remote cluster privileges {es-pull}114964[#114964] +* [Security Solution] Add `create_index` to `kibana_system` role for index/DS `.logs-endpoint.action.responses-*` {es-pull}115241[#115241] + +CRUD:: +* Suppress merge-on-recovery for older indices {es-pull}113462[#113462] + +Data streams:: +* Adding a deprecation info API warning for data streams with old indices {es-pull}116447[#116447] +* Apm-data: disable date_detection for all apm data streams {es-pull}116995[#116995] + +Distributed:: +* Metrics for incremental bulk splits {es-pull}116765[#116765] +* Use Azure blob batch API to delete blobs in batches {es-pull}114566[#114566] + +ES|QL:: +* Add ES|QL `bit_length` function {es-pull}115792[#115792] +* ESQL: Honor skip_unavailable setting for nonmatching indices errors at planning time {es-pull}116348[#116348] (issue: {es-issue}114531[#114531]) +* ESQL: Remove parent from `FieldAttribute` {es-pull}112881[#112881] +* ESQL: extract common filter from aggs {es-pull}115678[#115678] +* ESQL: optimise aggregations filtered by false/null into evals {es-pull}115858[#115858] +* ES|QL CCS uses `skip_unavailable` setting for handling disconnected remote clusters {es-pull}115266[#115266] (issue: {es-issue}114531[#114531]) +* ES|QL: add metrics for functions {es-pull}114620[#114620] +* Esql Enable Date Nanos (tech preview) {es-pull}117080[#117080] +* Support partial sort fields in TopN pushdown {es-pull}116043[#116043] (issue: {es-issue}114515[#114515]) +* [ES|QL] Implicit casting string literal to intervals {es-pull}115814[#115814] (issue: {es-issue}115352[#115352]) + +Health:: +* Increase `replica_unassigned_buffer_time` default from 3s to 5s {es-pull}112834[#112834] + +Indices APIs:: +* Ensure class resource stream is closed in `ResourceUtils` {es-pull}116437[#116437] + +Inference:: +* Add version prefix to Inference Service API path {es-pull}117095[#117095] + +Infra/Circuit Breakers:: +* Add link to Circuit Breaker "Data too large" exception message {es-pull}113561[#113561] + +Infra/Core:: +* Support for unsigned 64 bit numbers in Cpu stats {es-pull}114681[#114681] (issue: {es-issue}112274[#112274]) + +Infra/Metrics:: +* Add `ensureGreen` test method for use with `adminClient` {es-pull}113425[#113425] + +Infra/Scripting:: +* Add a `mustache.max_output_size_bytes` setting to limit the length of results from mustache scripts {es-pull}114002[#114002] + +Ingest Node:: +* Add postal_code support to the City and Enterprise databases {es-pull}114193[#114193] +* Add support for registered country fields for maxmind geoip databases {es-pull}114521[#114521] +* Adding support for additional mapping to simulate ingest API {es-pull}114742[#114742] +* Adding support for simulate ingest mapping adddition for indices with mappings that do not come from templates {es-pull}115359[#115359] +* Support IPinfo database configurations {es-pull}114548[#114548] +* Support more maxmind fields in the geoip processor {es-pull}114268[#114268] + +Logs:: +* Add logsdb telemetry {es-pull}115994[#115994] +* Add num docs and size to logsdb telemetry {es-pull}116128[#116128] +* Feature: re-structure document ID generation favoring _id inverted index compression {es-pull}104683[#104683] + +Machine Learning:: +* Add DeBERTa-V2/V3 tokenizer {es-pull}111852[#111852] +* Add special case for elastic reranker in inference API {es-pull}116962[#116962] +* Adding inference endpoint validation for `AzureAiStudioService` {es-pull}113713[#113713] +* Adds support for `input_type` field to Vertex inference service {es-pull}116431[#116431] +* Enable built-in Inference Endpoints and default for Semantic Text {es-pull}116931[#116931] +* Increase default `queue_capacity` to 10_000 and decrease max `queue_capacity` to 100_000 {es-pull}115041[#115041] +* Inference duration and error metrics {es-pull}115876[#115876] +* Remove all mentions of eis and gateway and deprecate flags that do {es-pull}116692[#116692] +* [Inference API] Add API to get configuration of inference services {es-pull}114862[#114862] +* [Inference API] Improve chunked results error message {es-pull}115807[#115807] + +Network:: +* Allow http unsafe buffers by default {es-pull}116115[#116115] + +Recovery:: +* Attempt to clean up index before remote transfer {es-pull}115142[#115142] (issue: {es-issue}104473[#104473]) +* Trigger merges after recovery {es-pull}113102[#113102] + +Reindex:: +* Change Reindexing metrics unit from millis to seconds {es-pull}115721[#115721] + +Relevance:: +* Add query rules retriever {es-pull}114855[#114855] +* Add tracking for query rule types {es-pull}116357[#116357] + +Search:: +* Add Search Phase APM metrics {es-pull}113194[#113194] +* Add `docvalue_fields` Support for `dense_vector` Fields {es-pull}114484[#114484] (issue: {es-issue}108470[#108470]) +* Add initial support for `semantic_text` field type {es-pull}113920[#113920] +* Adds access to flags no_sub_matches and no_overlapping_matches to hyphenation-decompounder-tokenfilter {es-pull}115459[#115459] (issue: {es-issue}97849[#97849]) +* Better sizing `BytesRef` for Strings in Queries {es-pull}115655[#115655] +* Enable `_tier` based coordinator rewrites for all indices (not just mounted indices) {es-pull}115797[#115797] +* Only aggregations require at least one shard request {es-pull}115314[#115314] + +Security:: +* Add refresh `.security` index call between security migrations {es-pull}114879[#114879] + +Snapshot/Restore:: +* Improve message about insecure S3 settings {es-pull}116915[#116915] +* Retry `S3BlobContainer#getRegister` on all exceptions {es-pull}114813[#114813] +* Split searchable snapshot into multiple repo operations {es-pull}116918[#116918] +* Track shard snapshot progress during node shutdown {es-pull}112567[#112567] + +Vector Search:: +* Add support for bitwise inner-product in painless {es-pull}116082[#116082] + +[[feature-9.0.0]] +[float] +=== New features + +Data streams:: +* Add default ILM policies and switch to ILM for apm-data plugin {es-pull}115687[#115687] + +ES|QL:: +* Add support for `BYTE_LENGTH` scalar function {es-pull}116591[#116591] +* Esql/lookup join grammar {es-pull}116515[#116515] +* Remove snapshot build restriction for match and qstr functions {es-pull}114482[#114482] + +Search:: +* ESQL - Add match operator (:) {es-pull}116819[#116819] [[upgrade-9.0.0]] [float] === Upgrades -// -// Infra/Core:: -// * Upgrade xcontent to Jackson 2.17.0 {es-pull}111948[#111948] -// * Upgrade xcontent to Jackson 2.17.2 {es-pull}112320[#112320] -// -// Infra/Metrics:: -// * Update APM Java Agent to support JDK 23 {es-pull}115194[#115194] (issues: {es-issue}115101[#115101], {es-issue}115100[#115100]) -// -// Search:: -// * Upgrade to Lucene 10 {es-pull}114741[#114741] -// * Upgrade to Lucene 9.12 {es-pull}113333[#113333] -// -// Snapshot/Restore:: -// * Upgrade Azure SDK {es-pull}111225[#111225] -// * Upgrade `repository-azure` dependencies {es-pull}112277[#112277] + +Search:: +* Upgrade to Lucene 10 {es-pull}114741[#114741] diff --git a/docs/reference/release-notes/highlights.asciidoc b/docs/reference/release-notes/highlights.asciidoc index edecd4f727583..b87081639c684 100644 --- a/docs/reference/release-notes/highlights.asciidoc +++ b/docs/reference/release-notes/highlights.asciidoc @@ -1,6 +1,3 @@ -// THIS IS A GENERATED FILE. DO NOT EDIT DIRECTLY. -// The content generated here are is not correct and most has been manually commented out until it can be fixed. -// See ES-9931 for more details. [[release-highlights]] == What's new in {minor-version} @@ -12,163 +9,14 @@ For detailed information about this release, see the <> and <>. endif::[] -// -// // tag::notable-highlights[] -// -// [discrete] -// [[esql_inlinestats]] -// === ESQL: INLINESTATS -// This adds the `INLINESTATS` command to ESQL which performs a STATS and -// then enriches the results into the output stream. So, this query: -// -// [source,esql] -// ---- -// FROM test -// | INLINESTATS m=MAX(a * b) BY b -// | WHERE m == a * b -// | SORT a DESC, b DESC -// | LIMIT 3 -// ---- -// -// Produces output like: -// -// | a | b | m | -// | --- | --- | ----- | -// | 99 | 999 | 98901 | -// | 99 | 998 | 98802 | -// | 99 | 997 | 98703 | -// -// {es-pull}109583[#109583] -// -// [discrete] -// [[always_allow_rebalancing_by_default]] -// === Always allow rebalancing by default -// In earlier versions of {es} the `cluster.routing.allocation.allow_rebalance` setting defaults to -// `indices_all_active` which blocks all rebalancing moves while the cluster is in `yellow` or `red` health. This was -// appropriate for the legacy allocator which might do too many rebalancing moves otherwise. Today's allocator has -// better support for rebalancing a cluster that is not in `green` health, and expects to be able to rebalance some -// shards away from over-full nodes to avoid allocating shards to undesirable locations in the first place. From -// version 8.16 `allow_rebalance` setting defaults to `always` unless the legacy allocator is explicitly enabled. -// -// {es-pull}111015[#111015] -// -// [discrete] -// [[add_global_retention_in_data_stream_lifecycle]] -// === Add global retention in data stream lifecycle -// Data stream lifecycle now supports configuring retention on a cluster level, -// namely global retention. Global retention \nallows us to configure two different -// retentions: -// -// - `data_streams.lifecycle.retention.default` is applied to all data streams managed -// by the data stream lifecycle that do not have retention defined on the data stream level. -// - `data_streams.lifecycle.retention.max` is applied to all data streams managed by the -// data stream lifecycle and it allows any data stream \ndata to be deleted after the `max_retention` has passed. -// -// {es-pull}111972[#111972] -// -// [discrete] -// [[enable_zstandard_compression_for_indices_with_index_codec_set_to_best_compression]] -// === Enable ZStandard compression for indices with index.codec set to best_compression -// Before DEFLATE compression was used to compress stored fields in indices with index.codec index setting set to -// best_compression, with this change ZStandard is used as compression algorithm to stored fields for indices with -// index.codec index setting set to best_compression. The usage ZStandard results in less storage usage with a -// similar indexing throughput depending on what options are used. Experiments with indexing logs have shown that -// ZStandard offers ~12% lower storage usage and a ~14% higher indexing throughput compared to DEFLATE. -// -// {es-pull}112665[#112665] -// -// [discrete] -// [[esql_introduce_per_agg_filter]] -// === ESQL: Introduce per agg filter -// Add support for aggregation scoped filters that work dynamically on the -// data in each group. -// -// [source,esql] -// ---- -// | STATS success = COUNT(*) WHERE 200 <= code AND code < 300, -// redirect = COUNT(*) WHERE 300 <= code AND code < 400, -// client_err = COUNT(*) WHERE 400 <= code AND code < 500, -// server_err = COUNT(*) WHERE 500 <= code AND code < 600, -// total_count = COUNT(*) -// ---- -// -// Implementation wise, the base AggregateFunction has been extended to -// allow a filter to be passed on. This is required to incorporate the -// filter as part of the aggregate equality/identity which would fail with -// the filter as an external component. -// As part of the process, the serialization for the existing aggregations -// had to be fixed so AggregateFunction implementations so that it -// delegates to their parent first. -// -// {es-pull}113735[#113735] -// -// // end::notable-highlights[] -// -// -// [discrete] -// [[esql_multi_value_fields_supported_in_geospatial_predicates]] -// === ESQL: Multi-value fields supported in Geospatial predicates -// Supporting multi-value fields in `WHERE` predicates is a challenge due to not knowing whether `ALL` or `ANY` -// of the values in the field should pass the predicate. -// For example, should the field `age:[10,30]` pass the predicate `WHERE age>20` or not? -// This ambiguity does not exist with the spatial predicates -// `ST_INTERSECTS` and `ST_DISJOINT`, because the choice between `ANY` or `ALL` -// is implied by the predicate itself. -// Consider a predicate checking a field named `location` against a test geometry named `shape`: -// -// * `ST_INTERSECTS(field, shape)` - true if `ANY` value can intersect the shape -// * `ST_DISJOINT(field, shape)` - true only if `ALL` values are disjoint from the shape -// -// This works even if the shape argument is itself a complex or compound geometry. -// -// Similar logic exists for `ST_CONTAINS` and `ST_WITHIN` predicates, but these are not as easily solved -// with `ANY` or `ALL`, because a collection of geometries contains another collection if each of the contained -// geometries is within at least one of the containing geometries. Evaluating this requires that the multi-value -// field is first combined into a single geometry before performing the predicate check. -// -// * `ST_CONTAINS(field, shape)` - true if the combined geometry contains the shape -// * `ST_WITHIN(field, shape)` - true if the combined geometry is within the shape -// -// {es-pull}112063[#112063] -// -// [discrete] -// [[enhance_sort_push_down_to_lucene_to_cover_references_to_fields_st_distance_function]] -// === Enhance SORT push-down to Lucene to cover references to fields and ST_DISTANCE function -// The most used and likely most valuable geospatial search query in Elasticsearch is the sorted proximity search, -// finding items within a certain distance of a point of interest and sorting the results by distance. -// This has been possible in ES|QL since 8.15.0, but the sorting was done in-memory, not pushed down to Lucene. -// Now the sorting is pushed down to Lucene, which results in a significant performance improvement. -// -// Queries that perform both filtering and sorting on distance are supported. For example: -// -// [source,esql] -// ---- -// FROM test -// | EVAL distance = ST_DISTANCE(location, TO_GEOPOINT("POINT(37.7749, -122.4194)")) -// | WHERE distance < 1000000 -// | SORT distance ASC, name DESC -// | LIMIT 10 -// ---- -// -// In addition, the support for sorting on EVAL expressions has been extended to cover references to fields: -// -// [source,esql] -// ---- -// FROM test -// | EVAL ref = field -// | SORT ref ASC -// | LIMIT 10 -// ---- -// -// {es-pull}112938[#112938] -// + +// The notable-highlights tag marks entries that +// should be featured in the Stack Installation and Upgrade Guide: +// tag::notable-highlights[] // [discrete] -// [[cross_cluster_search_telemetry]] -// === Cross-cluster search telemetry -// The cross-cluster search telemetry is collected when cross-cluster searches -// are performed, and is returned as "ccs" field in `_cluster/stats` output. -// It also add a new parameter `include_remotes=true` to the `_cluster/stats` API -// which will collect data from connected remote clusters. +// === Heading // -// {es-pull}113825[#113825] +// Description. +// end::notable-highlights[] + From 573b8a9252e55e24c1c34e9e734b37aafd077e83 Mon Sep 17 00:00:00 2001 From: Dan Rubinstein Date: Thu, 21 Nov 2024 14:47:24 -0500 Subject: [PATCH 03/10] Adding chunking settings to IbmWatsonxService (#114914) * Adding chunking settings to IbmWatsonxService * Removing feature flag * Update docs/changelog/114914.yaml --------- Co-authored-by: Elastic Machine --- docs/changelog/114914.yaml | 5 + .../ibmwatsonx/IbmWatsonxService.java | 29 ++- .../embeddings/IbmWatsonxEmbeddingsModel.java | 6 +- .../ibmwatsonx/IbmWatsonxServiceTests.java | 173 +++++++++++++++++- .../IbmWatsonxEmbeddingsModelTests.java | 1 + 5 files changed, 211 insertions(+), 3 deletions(-) create mode 100644 docs/changelog/114914.yaml diff --git a/docs/changelog/114914.yaml b/docs/changelog/114914.yaml new file mode 100644 index 0000000000000..bad13e26682dc --- /dev/null +++ b/docs/changelog/114914.yaml @@ -0,0 +1,5 @@ +pr: 114914 +summary: Adding chunking settings to `IbmWatsonxService` +area: Machine Learning +type: enhancement +issues: [] diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java index e960b0b777f2b..f4f4605c667c3 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java @@ -16,6 +16,7 @@ import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; import org.elasticsearch.inference.ChunkingOptions; +import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.EmptySettingsConfiguration; import org.elasticsearch.inference.InferenceServiceConfiguration; import org.elasticsearch.inference.InferenceServiceResults; @@ -30,6 +31,7 @@ import org.elasticsearch.inference.configuration.SettingsConfigurationDisplayType; import org.elasticsearch.inference.configuration.SettingsConfigurationFieldType; import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.xpack.inference.chunking.ChunkingSettingsBuilder; import org.elasticsearch.xpack.inference.chunking.EmbeddingRequestChunker; import org.elasticsearch.xpack.inference.external.action.ibmwatsonx.IbmWatsonxActionCreator; import org.elasticsearch.xpack.inference.external.http.sender.DocumentsOnlyInput; @@ -86,11 +88,19 @@ public void parseRequestConfig( Map serviceSettingsMap = removeFromMapOrThrowIfNull(config, ModelConfigurations.SERVICE_SETTINGS); Map taskSettingsMap = removeFromMapOrDefaultEmpty(config, ModelConfigurations.TASK_SETTINGS); + ChunkingSettings chunkingSettings = null; + if (TaskType.TEXT_EMBEDDING.equals(taskType)) { + chunkingSettings = ChunkingSettingsBuilder.fromMap( + removeFromMapOrDefaultEmpty(config, ModelConfigurations.CHUNKING_SETTINGS) + ); + } + IbmWatsonxModel model = createModel( inferenceEntityId, taskType, serviceSettingsMap, taskSettingsMap, + chunkingSettings, serviceSettingsMap, TaskType.unsupportedTaskTypeErrorMsg(taskType, NAME), ConfigurationParseContext.REQUEST @@ -112,6 +122,7 @@ private static IbmWatsonxModel createModel( TaskType taskType, Map serviceSettings, Map taskSettings, + ChunkingSettings chunkingSettings, @Nullable Map secretSettings, String failureMessage, ConfigurationParseContext context @@ -123,6 +134,7 @@ private static IbmWatsonxModel createModel( NAME, serviceSettings, taskSettings, + chunkingSettings, secretSettings, context ); @@ -141,11 +153,17 @@ public IbmWatsonxModel parsePersistedConfigWithSecrets( Map taskSettingsMap = removeFromMapOrDefaultEmpty(config, ModelConfigurations.TASK_SETTINGS); Map secretSettingsMap = removeFromMapOrDefaultEmpty(secrets, ModelSecrets.SECRET_SETTINGS); + ChunkingSettings chunkingSettings = null; + if (TaskType.TEXT_EMBEDDING.equals(taskType)) { + chunkingSettings = ChunkingSettingsBuilder.fromMap(removeFromMapOrDefaultEmpty(config, ModelConfigurations.CHUNKING_SETTINGS)); + } + return createModelFromPersistent( inferenceEntityId, taskType, serviceSettingsMap, taskSettingsMap, + chunkingSettings, secretSettingsMap, parsePersistedConfigErrorMsg(inferenceEntityId, NAME) ); @@ -166,6 +184,7 @@ private static IbmWatsonxModel createModelFromPersistent( TaskType taskType, Map serviceSettings, Map taskSettings, + ChunkingSettings chunkingSettings, Map secretSettings, String failureMessage ) { @@ -174,6 +193,7 @@ private static IbmWatsonxModel createModelFromPersistent( taskType, serviceSettings, taskSettings, + chunkingSettings, secretSettings, failureMessage, ConfigurationParseContext.PERSISTENT @@ -185,11 +205,17 @@ public Model parsePersistedConfig(String inferenceEntityId, TaskType taskType, M Map serviceSettingsMap = removeFromMapOrThrowIfNull(config, ModelConfigurations.SERVICE_SETTINGS); Map taskSettingsMap = removeFromMapOrDefaultEmpty(config, ModelConfigurations.TASK_SETTINGS); + ChunkingSettings chunkingSettings = null; + if (TaskType.TEXT_EMBEDDING.equals(taskType)) { + chunkingSettings = ChunkingSettingsBuilder.fromMap(removeFromMapOrDefaultEmpty(config, ModelConfigurations.CHUNKING_SETTINGS)); + } + return createModelFromPersistent( inferenceEntityId, taskType, serviceSettingsMap, taskSettingsMap, + chunkingSettings, null, parsePersistedConfigErrorMsg(inferenceEntityId, NAME) ); @@ -266,7 +292,8 @@ protected void doChunkedInfer( var batchedRequests = new EmbeddingRequestChunker( input.getInputs(), EMBEDDING_MAX_BATCH_SIZE, - EmbeddingRequestChunker.EmbeddingType.FLOAT + EmbeddingRequestChunker.EmbeddingType.FLOAT, + model.getConfigurations().getChunkingSettings() ).batchRequestsWithListeners(listener); for (var request : batchedRequests) { var action = ibmWatsonxModel.accept(getActionCreator(getSender(), getServiceComponents()), taskSettings, inputType); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/embeddings/IbmWatsonxEmbeddingsModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/embeddings/IbmWatsonxEmbeddingsModel.java index d60e31b5d41c0..6b20e07ecc0a2 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/embeddings/IbmWatsonxEmbeddingsModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/embeddings/IbmWatsonxEmbeddingsModel.java @@ -9,6 +9,7 @@ import org.apache.http.client.utils.URIBuilder; import org.elasticsearch.core.Nullable; +import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.EmptyTaskSettings; import org.elasticsearch.inference.InputType; import org.elasticsearch.inference.ModelConfigurations; @@ -40,6 +41,7 @@ public IbmWatsonxEmbeddingsModel( String service, Map serviceSettings, Map taskSettings, + ChunkingSettings chunkingSettings, Map secrets, ConfigurationParseContext context ) { @@ -49,6 +51,7 @@ public IbmWatsonxEmbeddingsModel( service, IbmWatsonxEmbeddingsServiceSettings.fromMap(serviceSettings, context), EmptyTaskSettings.INSTANCE, + chunkingSettings, DefaultSecretSettings.fromMap(secrets) ); } @@ -64,10 +67,11 @@ public IbmWatsonxEmbeddingsModel(IbmWatsonxEmbeddingsModel model, IbmWatsonxEmbe String service, IbmWatsonxEmbeddingsServiceSettings serviceSettings, TaskSettings taskSettings, + ChunkingSettings chunkingsettings, @Nullable DefaultSecretSettings secrets ) { super( - new ModelConfigurations(inferenceEntityId, taskType, service, serviceSettings, taskSettings), + new ModelConfigurations(inferenceEntityId, taskType, service, serviceSettings, taskSettings, chunkingsettings), new ModelSecrets(secrets), serviceSettings ); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java index d6c491f2b7cec..f7f37c5bcd15f 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java @@ -20,6 +20,7 @@ import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; import org.elasticsearch.inference.ChunkingOptions; +import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.EmptyTaskSettings; import org.elasticsearch.inference.InferenceServiceConfiguration; import org.elasticsearch.inference.InferenceServiceResults; @@ -69,6 +70,8 @@ import static org.elasticsearch.xpack.inference.Utils.getPersistedConfigMap; import static org.elasticsearch.xpack.inference.Utils.inferenceUtilityPool; import static org.elasticsearch.xpack.inference.Utils.mockClusterServiceEmpty; +import static org.elasticsearch.xpack.inference.chunking.ChunkingSettingsTests.createRandomChunkingSettings; +import static org.elasticsearch.xpack.inference.chunking.ChunkingSettingsTests.createRandomChunkingSettingsMap; import static org.elasticsearch.xpack.inference.external.http.Utils.entityAsMap; import static org.elasticsearch.xpack.inference.external.http.Utils.getUrl; import static org.elasticsearch.xpack.inference.results.TextEmbeddingResultsTests.buildExpectationFloat; @@ -124,6 +127,7 @@ public void testParseRequestConfig_CreatesAIbmWatsonxEmbeddingsModel() throws IO assertThat(embeddingsModel.getServiceSettings().url(), is(URI.create(url))); assertThat(embeddingsModel.getServiceSettings().apiVersion(), is(apiVersion)); assertThat(embeddingsModel.getSecretSettings().apiKey().toString(), is(apiKey)); + assertThat(embeddingsModel.getConfigurations().getChunkingSettings(), instanceOf(ChunkingSettings.class)); }, e -> fail("Model parsing should have succeeded, but failed: " + e.getMessage())); service.parseRequestConfig( @@ -150,6 +154,45 @@ public void testParseRequestConfig_CreatesAIbmWatsonxEmbeddingsModel() throws IO } } + public void testParseRequestConfig_CreatesAIbmWatsonxEmbeddingsModelWhenChunkingSettingsProvided() throws IOException { + try (var service = createIbmWatsonxService()) { + ActionListener modelListener = ActionListener.wrap(model -> { + assertThat(model, instanceOf(IbmWatsonxEmbeddingsModel.class)); + + var embeddingsModel = (IbmWatsonxEmbeddingsModel) model; + assertThat(embeddingsModel.getServiceSettings().modelId(), is(modelId)); + assertThat(embeddingsModel.getServiceSettings().projectId(), is(projectId)); + assertThat(embeddingsModel.getServiceSettings().url(), is(URI.create(url))); + assertThat(embeddingsModel.getServiceSettings().apiVersion(), is(apiVersion)); + assertThat(embeddingsModel.getSecretSettings().apiKey().toString(), is(apiKey)); + assertThat(embeddingsModel.getConfigurations().getChunkingSettings(), instanceOf(ChunkingSettings.class)); + }, e -> fail("Model parsing should have succeeded, but failed: " + e.getMessage())); + + service.parseRequestConfig( + "id", + TaskType.TEXT_EMBEDDING, + getRequestConfigMap( + new HashMap<>( + Map.of( + ServiceFields.MODEL_ID, + modelId, + IbmWatsonxServiceFields.PROJECT_ID, + projectId, + ServiceFields.URL, + url, + IbmWatsonxServiceFields.API_VERSION, + apiVersion + ) + ), + new HashMap<>(Map.of()), + createRandomChunkingSettingsMap(), + getSecretSettingsMap(apiKey) + ), + modelListener + ); + } + } + public void testParseRequestConfig_ThrowsUnsupportedModelType() throws IOException { try (var service = createIbmWatsonxService()) { var failureListener = getModelListenerForException( @@ -235,6 +278,47 @@ public void testParsePersistedConfigWithSecrets_CreatesAIbmWatsonxEmbeddingsMode assertThat(embeddingsModel.getServiceSettings().apiVersion(), is(apiVersion)); assertThat(embeddingsModel.getTaskSettings(), is(EmptyTaskSettings.INSTANCE)); assertThat(embeddingsModel.getSecretSettings().apiKey().toString(), is(apiKey)); + assertThat(embeddingsModel.getConfigurations().getChunkingSettings(), instanceOf(ChunkingSettings.class)); + } + } + + public void testParsePersistedConfigWithSecrets_CreatesAIbmWatsonxEmbeddingsModelWhenChunkingSettingsProvided() throws IOException { + try (var service = createIbmWatsonxService()) { + var persistedConfig = getPersistedConfigMap( + new HashMap<>( + Map.of( + ServiceFields.MODEL_ID, + modelId, + IbmWatsonxServiceFields.PROJECT_ID, + projectId, + ServiceFields.URL, + url, + IbmWatsonxServiceFields.API_VERSION, + apiVersion + ) + ), + getTaskSettingsMapEmpty(), + createRandomChunkingSettingsMap(), + getSecretSettingsMap(apiKey) + ); + + var model = service.parsePersistedConfigWithSecrets( + "id", + TaskType.TEXT_EMBEDDING, + persistedConfig.config(), + persistedConfig.secrets() + ); + + assertThat(model, instanceOf(IbmWatsonxEmbeddingsModel.class)); + + var embeddingsModel = (IbmWatsonxEmbeddingsModel) model; + assertThat(embeddingsModel.getServiceSettings().modelId(), is(modelId)); + assertThat(embeddingsModel.getServiceSettings().projectId(), is(projectId)); + assertThat(embeddingsModel.getServiceSettings().url(), is(URI.create(url))); + assertThat(embeddingsModel.getServiceSettings().apiVersion(), is(apiVersion)); + assertThat(embeddingsModel.getTaskSettings(), is(EmptyTaskSettings.INSTANCE)); + assertThat(embeddingsModel.getSecretSettings().apiKey().toString(), is(apiKey)); + assertThat(embeddingsModel.getConfigurations().getChunkingSettings(), instanceOf(ChunkingSettings.class)); } } @@ -399,6 +483,73 @@ public void testParsePersistedConfigWithSecrets_DoesNotThrowWhenAnExtraKeyExists } } + public void testParsePersistedConfig_CreatesAIbmWatsonxEmbeddingsModelWhenChunkingSettingsNotProvided() throws IOException { + try (var service = createIbmWatsonxService()) { + var persistedConfig = getPersistedConfigMap( + new HashMap<>( + Map.of( + ServiceFields.MODEL_ID, + modelId, + IbmWatsonxServiceFields.PROJECT_ID, + projectId, + ServiceFields.URL, + url, + IbmWatsonxServiceFields.API_VERSION, + apiVersion + ) + ), + getTaskSettingsMapEmpty(), + null + ); + + var model = service.parsePersistedConfig("id", TaskType.TEXT_EMBEDDING, persistedConfig.config()); + + assertThat(model, instanceOf(IbmWatsonxEmbeddingsModel.class)); + + var embeddingsModel = (IbmWatsonxEmbeddingsModel) model; + assertThat(embeddingsModel.getServiceSettings().modelId(), is(modelId)); + assertThat(embeddingsModel.getServiceSettings().projectId(), is(projectId)); + assertThat(embeddingsModel.getServiceSettings().url(), is(URI.create(url))); + assertThat(embeddingsModel.getServiceSettings().apiVersion(), is(apiVersion)); + assertThat(embeddingsModel.getTaskSettings(), is(EmptyTaskSettings.INSTANCE)); + assertThat(embeddingsModel.getConfigurations().getChunkingSettings(), instanceOf(ChunkingSettings.class)); + } + } + + public void testParsePersistedConfig_CreatesAIbmWatsonxEmbeddingsModelWhenChunkingSettingsProvided() throws IOException { + try (var service = createIbmWatsonxService()) { + var persistedConfig = getPersistedConfigMap( + new HashMap<>( + Map.of( + ServiceFields.MODEL_ID, + modelId, + IbmWatsonxServiceFields.PROJECT_ID, + projectId, + ServiceFields.URL, + url, + IbmWatsonxServiceFields.API_VERSION, + apiVersion + ) + ), + getTaskSettingsMapEmpty(), + createRandomChunkingSettingsMap(), + null + ); + + var model = service.parsePersistedConfig("id", TaskType.TEXT_EMBEDDING, persistedConfig.config()); + + assertThat(model, instanceOf(IbmWatsonxEmbeddingsModel.class)); + + var embeddingsModel = (IbmWatsonxEmbeddingsModel) model; + assertThat(embeddingsModel.getServiceSettings().modelId(), is(modelId)); + assertThat(embeddingsModel.getServiceSettings().projectId(), is(projectId)); + assertThat(embeddingsModel.getServiceSettings().url(), is(URI.create(url))); + assertThat(embeddingsModel.getServiceSettings().apiVersion(), is(apiVersion)); + assertThat(embeddingsModel.getTaskSettings(), is(EmptyTaskSettings.INSTANCE)); + assertThat(embeddingsModel.getConfigurations().getChunkingSettings(), instanceOf(ChunkingSettings.class)); + } + } + public void testInfer_ThrowsErrorWhenModelIsNotIbmWatsonxModel() throws IOException { var sender = mock(Sender.class); @@ -488,7 +639,15 @@ public void testInfer_SendsEmbeddingsRequest() throws IOException { } } - public void testChunkedInfer_Batches() throws IOException { + public void testChunkedInfer_ChunkingSettingsNotSet() throws IOException { + testChunkedInfer_Batches(null); + } + + public void testChunkedInfer_ChunkingSettingsSet() throws IOException { + testChunkedInfer_Batches(createRandomChunkingSettings()); + } + + private void testChunkedInfer_Batches(ChunkingSettings chunkingSettings) throws IOException { var input = List.of("foo", "bar"); var senderFactory = HttpRequestSenderTests.createSenderFactory(threadPool, clientManager); @@ -878,6 +1037,18 @@ private static ActionListener getModelListenerForException(Class excep }); } + private Map getRequestConfigMap( + Map serviceSettings, + Map taskSettings, + Map chunkingSettings, + Map secretSettings + ) { + var requestConfigMap = getRequestConfigMap(serviceSettings, taskSettings, secretSettings); + requestConfigMap.put(ModelConfigurations.CHUNKING_SETTINGS, chunkingSettings); + + return requestConfigMap; + } + private Map getRequestConfigMap( Map serviceSettings, Map taskSettings, diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/embeddings/IbmWatsonxEmbeddingsModelTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/embeddings/IbmWatsonxEmbeddingsModelTests.java index 93fd7e402a0de..33fcd752fbf30 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/embeddings/IbmWatsonxEmbeddingsModelTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/embeddings/IbmWatsonxEmbeddingsModelTests.java @@ -82,6 +82,7 @@ public static IbmWatsonxEmbeddingsModel createModel( null ), EmptyTaskSettings.INSTANCE, + null, new DefaultSecretSettings(new SecureString(apiKey.toCharArray())) ); } From 6d963d324aa71a514cd4baa01bd0805eaba7426e Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Thu, 21 Nov 2024 12:50:32 -0800 Subject: [PATCH 04/10] Limit thread queue during init in ExchangeSource (#117273) ES|QL doesn't work well with 500 clusters or clusters with 500 nodes. The reason is that we enqueue three tasks to the thread pool queue, which has a limit of 1000, during the initialization of the exchange for each target (cluster or node). This simple PR reduces it to one task. I'm considering using AsyncProcessor for these requests, but that will be a follow-up issue for later. --- .../exchange/ExchangeSourceHandler.java | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeSourceHandler.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeSourceHandler.java index e3fc0e26e34e0..4baaf9ad89bd6 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeSourceHandler.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeSourceHandler.java @@ -220,20 +220,21 @@ void onSinkComplete() { * @see ExchangeSinkHandler#fetchPageAsync(boolean, ActionListener) */ public void addRemoteSink(RemoteSink remoteSink, int instances) { - for (int i = 0; i < instances; i++) { - var fetcher = new RemoteSinkFetcher(remoteSink); - fetchExecutor.execute(new AbstractRunnable() { - @Override - public void onFailure(Exception e) { - fetcher.onSinkFailed(e); - } + fetchExecutor.execute(new AbstractRunnable() { + @Override + public void onFailure(Exception e) { + failure.unwrapAndCollect(e); + buffer.waitForReading().listener().onResponse(null); // resume the Driver if it is being blocked on reading + } - @Override - protected void doRun() { + @Override + protected void doRun() { + for (int i = 0; i < instances; i++) { + var fetcher = new RemoteSinkFetcher(remoteSink); fetcher.fetchPage(); } - }); - } + } + }); } /** From 8e6e087ecc3ec883430b152622deab728f3f64bd Mon Sep 17 00:00:00 2001 From: Ioana Tagirta Date: Thu, 21 Nov 2024 22:09:03 +0100 Subject: [PATCH 05/10] Remove StringQueryPredicate (#117134) * Remove StringQueryPredicate * Fix tests --- .../fulltext/StringQueryPredicate.java | 62 ------------------- .../core/planner/ExpressionTranslators.java | 14 ----- .../core/querydsl/query/QueryStringQuery.java | 16 ++--- .../querydsl/query/QueryStringQueryTests.java | 20 +++--- .../function/fulltext/FullTextWritables.java | 9 +-- .../physical/local/PushFiltersToSource.java | 3 - .../planner/EsqlExpressionTranslators.java | 3 +- .../StringQuerySerializationTests.java | 34 ---------- 8 files changed, 15 insertions(+), 146 deletions(-) delete mode 100644 x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/fulltext/StringQueryPredicate.java delete mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/fulltext/StringQuerySerializationTests.java diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/fulltext/StringQueryPredicate.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/fulltext/StringQueryPredicate.java deleted file mode 100644 index 95000a5364e12..0000000000000 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/fulltext/StringQueryPredicate.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ -package org.elasticsearch.xpack.esql.core.expression.predicate.fulltext; - -import org.elasticsearch.common.io.stream.NamedWriteableRegistry; -import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.tree.NodeInfo; -import org.elasticsearch.xpack.esql.core.tree.Source; - -import java.io.IOException; -import java.util.List; -import java.util.Map; - -import static java.util.Collections.emptyList; - -public final class StringQueryPredicate extends FullTextPredicate { - - public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( - Expression.class, - "StringQueryPredicate", - StringQueryPredicate::new - ); - - private final Map fields; - - public StringQueryPredicate(Source source, String query, String options) { - super(source, query, options, emptyList()); - - // inferred - this.fields = FullTextUtils.parseFields(optionMap(), source); - } - - StringQueryPredicate(StreamInput in) throws IOException { - super(in); - assert super.children().isEmpty(); - this.fields = FullTextUtils.parseFields(optionMap(), source()); - } - - @Override - protected NodeInfo info() { - return NodeInfo.create(this, StringQueryPredicate::new, query(), options()); - } - - @Override - public Expression replaceChildren(List newChildren) { - throw new UnsupportedOperationException("this type of node doesn't have any children to replace"); - } - - public Map fields() { - return fields; - } - - @Override - public String getWriteableName() { - return ENTRY.name; - } -} diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/planner/ExpressionTranslators.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/planner/ExpressionTranslators.java index b6383fac33299..7836522c77130 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/planner/ExpressionTranslators.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/planner/ExpressionTranslators.java @@ -12,7 +12,6 @@ import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute; import org.elasticsearch.xpack.esql.core.expression.predicate.fulltext.MultiMatchQueryPredicate; -import org.elasticsearch.xpack.esql.core.expression.predicate.fulltext.StringQueryPredicate; import org.elasticsearch.xpack.esql.core.expression.predicate.logical.And; import org.elasticsearch.xpack.esql.core.expression.predicate.logical.Not; import org.elasticsearch.xpack.esql.core.expression.predicate.logical.Or; @@ -26,7 +25,6 @@ import org.elasticsearch.xpack.esql.core.querydsl.query.MultiMatchQuery; import org.elasticsearch.xpack.esql.core.querydsl.query.NotQuery; import org.elasticsearch.xpack.esql.core.querydsl.query.Query; -import org.elasticsearch.xpack.esql.core.querydsl.query.QueryStringQuery; import org.elasticsearch.xpack.esql.core.querydsl.query.RegexQuery; import org.elasticsearch.xpack.esql.core.querydsl.query.WildcardQuery; import org.elasticsearch.xpack.esql.core.tree.Source; @@ -73,18 +71,6 @@ private static Query translateField(RegexMatch e, String targetFieldName) { } } - public static class StringQueries extends ExpressionTranslator { - - @Override - protected Query asQuery(StringQueryPredicate q, TranslatorHandler handler) { - return doTranslate(q, handler); - } - - public static Query doTranslate(StringQueryPredicate q, TranslatorHandler handler) { - return new QueryStringQuery(q.source(), q.query(), q.fields(), q); - } - } - public static class MultiMatches extends ExpressionTranslator { @Override diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/querydsl/query/QueryStringQuery.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/querydsl/query/QueryStringQuery.java index 8ac90e6314174..8dcb87749ae48 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/querydsl/query/QueryStringQuery.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/querydsl/query/QueryStringQuery.java @@ -14,7 +14,6 @@ import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.QueryStringQueryBuilder; -import org.elasticsearch.xpack.esql.core.expression.predicate.fulltext.StringQueryPredicate; import org.elasticsearch.xpack.esql.core.tree.Source; import java.util.Collections; @@ -55,20 +54,13 @@ public class QueryStringQuery extends Query { private final String query; private final Map fields; - private StringQueryPredicate predicate; private final Map options; - // dedicated constructor for QueryTranslator - public QueryStringQuery(Source source, String query, String fieldName) { - this(source, query, Collections.singletonMap(fieldName, Float.valueOf(1.0f)), null); - } - - public QueryStringQuery(Source source, String query, Map fields, StringQueryPredicate predicate) { + public QueryStringQuery(Source source, String query, Map fields, Map options) { super(source); this.query = query; this.fields = fields; - this.predicate = predicate; - this.options = predicate == null ? Collections.emptyMap() : predicate.optionMap(); + this.options = options == null ? Collections.emptyMap() : options; } @Override @@ -95,7 +87,7 @@ public String query() { @Override public int hashCode() { - return Objects.hash(query, fields, predicate); + return Objects.hash(query, fields); } @Override @@ -109,7 +101,7 @@ public boolean equals(Object obj) { } QueryStringQuery other = (QueryStringQuery) obj; - return Objects.equals(query, other.query) && Objects.equals(fields, other.fields) && Objects.equals(predicate, other.predicate); + return Objects.equals(query, other.query) && Objects.equals(fields, other.fields) && Objects.equals(options, other.options); } @Override diff --git a/x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/querydsl/query/QueryStringQueryTests.java b/x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/querydsl/query/QueryStringQueryTests.java index 0f80011961092..22e7b93e84ce1 100644 --- a/x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/querydsl/query/QueryStringQueryTests.java +++ b/x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/querydsl/query/QueryStringQueryTests.java @@ -10,42 +10,40 @@ import org.elasticsearch.index.query.Operator; import org.elasticsearch.index.query.QueryStringQueryBuilder; import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.xpack.esql.core.expression.predicate.fulltext.StringQueryPredicate; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.util.StringUtils; import java.util.Collections; +import java.util.Map; import static org.hamcrest.Matchers.equalTo; public class QueryStringQueryTests extends ESTestCase { public void testQueryBuilding() { - QueryStringQueryBuilder qb = getBuilder("lenient=true"); + QueryStringQueryBuilder qb = getBuilder(Map.of("lenient", "true")); assertThat(qb.lenient(), equalTo(true)); - qb = getBuilder("lenient=true;default_operator=AND"); + qb = getBuilder(Map.of("lenient", "true", "default_operator", "AND")); assertThat(qb.lenient(), equalTo(true)); assertThat(qb.defaultOperator(), equalTo(Operator.AND)); - Exception e = expectThrows(IllegalArgumentException.class, () -> getBuilder("pizza=yummy")); + Exception e = expectThrows(IllegalArgumentException.class, () -> getBuilder(Map.of("pizza", "yummy"))); assertThat(e.getMessage(), equalTo("illegal query_string option [pizza]")); - e = expectThrows(ElasticsearchParseException.class, () -> getBuilder("type=aoeu")); + e = expectThrows(ElasticsearchParseException.class, () -> getBuilder(Map.of("type", "aoeu"))); assertThat(e.getMessage(), equalTo("failed to parse [multi_match] query type [aoeu]. unknown type.")); } - private static QueryStringQueryBuilder getBuilder(String options) { + private static QueryStringQueryBuilder getBuilder(Map options) { final Source source = new Source(1, 1, StringUtils.EMPTY); - final StringQueryPredicate mmqp = new StringQueryPredicate(source, "eggplant", options); - final QueryStringQuery mmq = new QueryStringQuery(source, "eggplant", Collections.singletonMap("foo", 1.0f), mmqp); - return (QueryStringQueryBuilder) mmq.asBuilder(); + final QueryStringQuery query = new QueryStringQuery(source, "eggplant", Collections.singletonMap("foo", 1.0f), options); + return (QueryStringQueryBuilder) query.asBuilder(); } public void testToString() { final Source source = new Source(1, 1, StringUtils.EMPTY); - final StringQueryPredicate mmqp = new StringQueryPredicate(source, "eggplant", ""); - final QueryStringQuery mmq = new QueryStringQuery(source, "eggplant", Collections.singletonMap("foo", 1.0f), mmqp); + final QueryStringQuery mmq = new QueryStringQuery(source, "eggplant", Collections.singletonMap("foo", 1.0f), Map.of()); assertEquals("QueryStringQuery@1:2[{foo=1.0}:eggplant]", mmq.toString()); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextWritables.java index 7fdfb4b328869..d59c736783172 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextWritables.java @@ -10,19 +10,12 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.xpack.esql.core.expression.predicate.fulltext.MatchQueryPredicate; import org.elasticsearch.xpack.esql.core.expression.predicate.fulltext.MultiMatchQueryPredicate; -import org.elasticsearch.xpack.esql.core.expression.predicate.fulltext.StringQueryPredicate; import java.util.List; public class FullTextWritables { public static List getNamedWriteables() { - return List.of( - MatchQueryPredicate.ENTRY, - MultiMatchQueryPredicate.ENTRY, - StringQueryPredicate.ENTRY, - QueryString.ENTRY, - Match.ENTRY - ); + return List.of(MatchQueryPredicate.ENTRY, MultiMatchQueryPredicate.ENTRY, QueryString.ENTRY, Match.ENTRY); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushFiltersToSource.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushFiltersToSource.java index f01e7c4b1f3a6..9f574ee8005b2 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushFiltersToSource.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushFiltersToSource.java @@ -19,7 +19,6 @@ import org.elasticsearch.xpack.esql.core.expression.function.scalar.UnaryScalarFunction; import org.elasticsearch.xpack.esql.core.expression.predicate.Predicates; import org.elasticsearch.xpack.esql.core.expression.predicate.Range; -import org.elasticsearch.xpack.esql.core.expression.predicate.fulltext.StringQueryPredicate; import org.elasticsearch.xpack.esql.core.expression.predicate.logical.BinaryLogic; import org.elasticsearch.xpack.esql.core.expression.predicate.logical.Not; import org.elasticsearch.xpack.esql.core.expression.predicate.nulls.IsNotNull; @@ -253,8 +252,6 @@ static boolean canPushToSource(Expression exp, LucenePushdownPredicates lucenePu && Expressions.foldable(cidrMatch.matches()); } else if (exp instanceof SpatialRelatesFunction spatial) { return canPushSpatialFunctionToSource(spatial, lucenePushdownPredicates); - } else if (exp instanceof StringQueryPredicate) { - return true; } else if (exp instanceof QueryString) { return true; } else if (exp instanceof Match mf) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsqlExpressionTranslators.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsqlExpressionTranslators.java index 12dc77e6e7c59..6fac7bab2bd80 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsqlExpressionTranslators.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsqlExpressionTranslators.java @@ -86,7 +86,6 @@ public final class EsqlExpressionTranslators { new ExpressionTranslators.IsNotNulls(), new ExpressionTranslators.Nots(), new ExpressionTranslators.Likes(), - new ExpressionTranslators.StringQueries(), new ExpressionTranslators.MultiMatches(), new MatchFunctionTranslator(), new QueryStringFunctionTranslator(), @@ -536,7 +535,7 @@ protected Query asQuery(Match match, TranslatorHandler handler) { public static class QueryStringFunctionTranslator extends ExpressionTranslator { @Override protected Query asQuery(QueryString queryString, TranslatorHandler handler) { - return new QueryStringQuery(queryString.source(), queryString.queryAsText(), Map.of(), null); + return new QueryStringQuery(queryString.source(), queryString.queryAsText(), Map.of(), Map.of()); } } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/fulltext/StringQuerySerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/fulltext/StringQuerySerializationTests.java deleted file mode 100644 index ff00a161e1bb1..0000000000000 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/fulltext/StringQuerySerializationTests.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.esql.expression.predicate.operator.fulltext; - -import org.elasticsearch.xpack.esql.core.expression.predicate.fulltext.StringQueryPredicate; - -import java.io.IOException; - -public class StringQuerySerializationTests extends AbstractFulltextSerializationTests { - - private static final String COMMA = ","; - - @Override - protected final StringQueryPredicate createTestInstance() { - return new StringQueryPredicate(randomSource(), randomAlphaOfLength(randomIntBetween(1, 16)), randomOptionOrNull()); - } - - @Override - protected StringQueryPredicate mutateInstance(StringQueryPredicate instance) throws IOException { - var query = instance.query(); - var options = instance.options(); - if (randomBoolean()) { - query = randomValueOtherThan(query, () -> randomAlphaOfLength(randomIntBetween(1, 16))); - } else { - options = randomValueOtherThan(options, this::randomOptionOrNull); - } - return new StringQueryPredicate(instance.source(), query, options); - } -} From a9451df21833b2ea38954ae99fe18eadbd2366d8 Mon Sep 17 00:00:00 2001 From: Ryan Ernst Date: Thu, 21 Nov 2024 13:11:04 -0800 Subject: [PATCH 06/10] Fix SecureSM to allow innocuous threads and threadgroups for parallel streams (#117277) When a parallel stream is opened, the jdk uses an internal fork join pool to do work on processing the stream. This pool is internal to the jdk, and so it should always be allowed to create threads. This commit modifies SecureSM to account for this innocuous thread group and threads. --- .../org/elasticsearch/secure_sm/SecureSM.java | 18 +++++++++++++++--- .../elasticsearch/secure_sm/SecureSMTests.java | 11 +++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/libs/secure-sm/src/main/java/org/elasticsearch/secure_sm/SecureSM.java b/libs/secure-sm/src/main/java/org/elasticsearch/secure_sm/SecureSM.java index 4fd471c529e75..02d0491118dc7 100644 --- a/libs/secure-sm/src/main/java/org/elasticsearch/secure_sm/SecureSM.java +++ b/libs/secure-sm/src/main/java/org/elasticsearch/secure_sm/SecureSM.java @@ -157,7 +157,9 @@ private static void debugThreadGroups(final ThreadGroup caller, final ThreadGrou // Returns true if the given thread is an instance of the JDK's InnocuousThread. private static boolean isInnocuousThread(Thread t) { final Class c = t.getClass(); - return c.getModule() == Object.class.getModule() && c.getName().equals("jdk.internal.misc.InnocuousThread"); + return c.getModule() == Object.class.getModule() + && (c.getName().equals("jdk.internal.misc.InnocuousThread") + || c.getName().equals("java.util.concurrent.ForkJoinWorkerThread$InnocuousForkJoinWorkerThread")); } protected void checkThreadAccess(Thread t) { @@ -184,11 +186,21 @@ protected void checkThreadAccess(Thread t) { private static final Permission MODIFY_THREADGROUP_PERMISSION = new RuntimePermission("modifyThreadGroup"); private static final Permission MODIFY_ARBITRARY_THREADGROUP_PERMISSION = new ThreadPermission("modifyArbitraryThreadGroup"); + // Returns true if the given thread is an instance of the JDK's InnocuousThread. + private static boolean isInnocuousThreadGroup(ThreadGroup t) { + final Class c = t.getClass(); + return c.getModule() == Object.class.getModule() && t.getName().equals("InnocuousForkJoinWorkerThreadGroup"); + } + protected void checkThreadGroupAccess(ThreadGroup g) { Objects.requireNonNull(g); + boolean targetThreadGroupIsInnocuous = isInnocuousThreadGroup(g); + // first, check if we can modify thread groups at all. - checkPermission(MODIFY_THREADGROUP_PERMISSION); + if (targetThreadGroupIsInnocuous == false) { + checkPermission(MODIFY_THREADGROUP_PERMISSION); + } // check the threadgroup, if its our thread group or an ancestor, its fine. final ThreadGroup source = Thread.currentThread().getThreadGroup(); @@ -196,7 +208,7 @@ protected void checkThreadGroupAccess(ThreadGroup g) { if (source == null) { return; // we are a dead thread, do nothing - } else if (source.parentOf(target) == false) { + } else if (source.parentOf(target) == false && targetThreadGroupIsInnocuous == false) { checkPermission(MODIFY_ARBITRARY_THREADGROUP_PERMISSION); } } diff --git a/libs/secure-sm/src/test/java/org/elasticsearch/secure_sm/SecureSMTests.java b/libs/secure-sm/src/test/java/org/elasticsearch/secure_sm/SecureSMTests.java index b94639414ffe5..69c6973f57cdf 100644 --- a/libs/secure-sm/src/test/java/org/elasticsearch/secure_sm/SecureSMTests.java +++ b/libs/secure-sm/src/test/java/org/elasticsearch/secure_sm/SecureSMTests.java @@ -14,7 +14,10 @@ import java.security.Permission; import java.security.Policy; import java.security.ProtectionDomain; +import java.util.ArrayList; +import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Collectors; /** Simple tests for SecureSM */ public class SecureSMTests extends TestCase { @@ -128,4 +131,12 @@ public void run() { t1.join(); assertTrue(interrupted1.get()); } + + public void testParallelStreamThreadGroup() throws Exception { + List list = new ArrayList<>(); + for (int i = 0; i < 100; ++i) { + list.add(i); + } + list.parallelStream().collect(Collectors.toSet()); + } } From ec644b10ec28b6eaaa5d658eedeac02dcb7bab5c Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Thu, 21 Nov 2024 16:17:22 -0500 Subject: [PATCH 07/10] Bump versions after 8.16.1 release --- .buildkite/pipelines/intake.yml | 2 +- .buildkite/pipelines/periodic-packaging.yml | 6 +++--- .buildkite/pipelines/periodic.yml | 10 +++++----- .ci/bwcVersions | 2 +- .ci/snapshotBwcVersions | 2 +- server/src/main/java/org/elasticsearch/Version.java | 1 + .../resources/org/elasticsearch/TransportVersions.csv | 1 + .../org/elasticsearch/index/IndexVersions.csv | 1 + 8 files changed, 14 insertions(+), 11 deletions(-) diff --git a/.buildkite/pipelines/intake.yml b/.buildkite/pipelines/intake.yml index 8935872fdec83..5be5990cfb203 100644 --- a/.buildkite/pipelines/intake.yml +++ b/.buildkite/pipelines/intake.yml @@ -56,7 +56,7 @@ steps: timeout_in_minutes: 300 matrix: setup: - BWC_VERSION: ["8.16.1", "8.17.0", "8.18.0", "9.0.0"] + BWC_VERSION: ["8.16.2", "8.17.0", "8.18.0", "9.0.0"] agents: provider: gcp image: family/elasticsearch-ubuntu-2004 diff --git a/.buildkite/pipelines/periodic-packaging.yml b/.buildkite/pipelines/periodic-packaging.yml index 2dbb7f5193af6..162a7e4995467 100644 --- a/.buildkite/pipelines/periodic-packaging.yml +++ b/.buildkite/pipelines/periodic-packaging.yml @@ -288,8 +288,8 @@ steps: env: BWC_VERSION: 8.15.4 - - label: "{{matrix.image}} / 8.16.1 / packaging-tests-upgrade" - command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v8.16.1 + - label: "{{matrix.image}} / 8.16.2 / packaging-tests-upgrade" + command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v8.16.2 timeout_in_minutes: 300 matrix: setup: @@ -302,7 +302,7 @@ steps: machineType: custom-16-32768 buildDirectory: /dev/shm/bk env: - BWC_VERSION: 8.16.1 + BWC_VERSION: 8.16.2 - label: "{{matrix.image}} / 8.17.0 / packaging-tests-upgrade" command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v8.17.0 diff --git a/.buildkite/pipelines/periodic.yml b/.buildkite/pipelines/periodic.yml index 047e4a3f4f8f6..aa1db893df8cc 100644 --- a/.buildkite/pipelines/periodic.yml +++ b/.buildkite/pipelines/periodic.yml @@ -306,8 +306,8 @@ steps: - signal_reason: agent_stop limit: 3 - - label: 8.16.1 / bwc - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true v8.16.1#bwcTest + - label: 8.16.2 / bwc + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true v8.16.2#bwcTest timeout_in_minutes: 300 agents: provider: gcp @@ -316,7 +316,7 @@ steps: buildDirectory: /dev/shm/bk preemptible: true env: - BWC_VERSION: 8.16.1 + BWC_VERSION: 8.16.2 retry: automatic: - exit_status: "-1" @@ -448,7 +448,7 @@ steps: setup: ES_RUNTIME_JAVA: - openjdk21 - BWC_VERSION: ["8.16.1", "8.17.0", "8.18.0", "9.0.0"] + BWC_VERSION: ["8.16.2", "8.17.0", "8.18.0", "9.0.0"] agents: provider: gcp image: family/elasticsearch-ubuntu-2004 @@ -490,7 +490,7 @@ steps: ES_RUNTIME_JAVA: - openjdk21 - openjdk23 - BWC_VERSION: ["8.16.1", "8.17.0", "8.18.0", "9.0.0"] + BWC_VERSION: ["8.16.2", "8.17.0", "8.18.0", "9.0.0"] agents: provider: gcp image: family/elasticsearch-ubuntu-2004 diff --git a/.ci/bwcVersions b/.ci/bwcVersions index ac07e14c2a176..a8d6dda4fb0c2 100644 --- a/.ci/bwcVersions +++ b/.ci/bwcVersions @@ -15,7 +15,7 @@ BWC_VERSION: - "8.13.4" - "8.14.3" - "8.15.4" - - "8.16.1" + - "8.16.2" - "8.17.0" - "8.18.0" - "9.0.0" diff --git a/.ci/snapshotBwcVersions b/.ci/snapshotBwcVersions index 351c605e6e092..5514fc376a285 100644 --- a/.ci/snapshotBwcVersions +++ b/.ci/snapshotBwcVersions @@ -1,5 +1,5 @@ BWC_VERSION: - - "8.16.1" + - "8.16.2" - "8.17.0" - "8.18.0" - "9.0.0" diff --git a/server/src/main/java/org/elasticsearch/Version.java b/server/src/main/java/org/elasticsearch/Version.java index 40071b19af5d3..7b65547a7d591 100644 --- a/server/src/main/java/org/elasticsearch/Version.java +++ b/server/src/main/java/org/elasticsearch/Version.java @@ -189,6 +189,7 @@ public class Version implements VersionId, ToXContentFragment { public static final Version V_8_15_4 = new Version(8_15_04_99); public static final Version V_8_16_0 = new Version(8_16_00_99); public static final Version V_8_16_1 = new Version(8_16_01_99); + public static final Version V_8_16_2 = new Version(8_16_02_99); public static final Version V_8_17_0 = new Version(8_17_00_99); public static final Version V_8_18_0 = new Version(8_18_00_99); public static final Version V_9_0_0 = new Version(9_00_00_99); diff --git a/server/src/main/resources/org/elasticsearch/TransportVersions.csv b/server/src/main/resources/org/elasticsearch/TransportVersions.csv index ba575cc642a81..6191922f13094 100644 --- a/server/src/main/resources/org/elasticsearch/TransportVersions.csv +++ b/server/src/main/resources/org/elasticsearch/TransportVersions.csv @@ -133,3 +133,4 @@ 8.15.3,8702003 8.15.4,8702003 8.16.0,8772001 +8.16.1,8772004 diff --git a/server/src/main/resources/org/elasticsearch/index/IndexVersions.csv b/server/src/main/resources/org/elasticsearch/index/IndexVersions.csv index c54aea88613f5..f84d69af727ac 100644 --- a/server/src/main/resources/org/elasticsearch/index/IndexVersions.csv +++ b/server/src/main/resources/org/elasticsearch/index/IndexVersions.csv @@ -133,3 +133,4 @@ 8.15.3,8512000 8.15.4,8512000 8.16.0,8518000 +8.16.1,8518000 From 8fe8d22f7c63d574d7570abcf21757926468b415 Mon Sep 17 00:00:00 2001 From: Lisa Cawley Date: Thu, 21 Nov 2024 14:02:18 -0800 Subject: [PATCH 08/10] [DOCS] Remove broken migration guide link (#117293) --- docs/reference/cluster/update-settings.asciidoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/reference/cluster/update-settings.asciidoc b/docs/reference/cluster/update-settings.asciidoc index ca3d100e31e06..3d8bdcca07e2b 100644 --- a/docs/reference/cluster/update-settings.asciidoc +++ b/docs/reference/cluster/update-settings.asciidoc @@ -59,8 +59,8 @@ An example of a transient update: ==== We no longer recommend using transient cluster settings. Use persistent cluster settings instead. If a cluster becomes unstable, transient settings can clear -unexpectedly, resulting in a potentially undesired cluster configuration. See -the <>. +unexpectedly, resulting in a potentially undesired cluster configuration. +// See the <>. ==== // end::transient-settings-warning[] From 7768133f44421e13c40922dbf23ab47b8afdf46c Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Fri, 22 Nov 2024 09:31:02 +1100 Subject: [PATCH 09/10] Mute org.elasticsearch.xpack.test.rest.XPackRestIT test {p0=snapshot/10_basic/Create a source only snapshot and then restore it} #117295 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index d1e1976262f55..01cc6d0355a59 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -250,6 +250,9 @@ tests: - class: org.elasticsearch.discovery.ClusterDisruptionIT method: testAckedIndexing issue: https://github.com/elastic/elasticsearch/issues/117024 +- class: org.elasticsearch.xpack.test.rest.XPackRestIT + method: test {p0=snapshot/10_basic/Create a source only snapshot and then restore it} + issue: https://github.com/elastic/elasticsearch/issues/117295 # Examples: # From bead24880b9d6e8099c7c9a4043f5cee448ed7db Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Fri, 22 Nov 2024 09:39:10 +1100 Subject: [PATCH 10/10] Mute org.elasticsearch.xpack.searchablesnapshots.RetrySearchIntegTests testRetryPointInTime #117116 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 01cc6d0355a59..fae848c600aea 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -253,6 +253,9 @@ tests: - class: org.elasticsearch.xpack.test.rest.XPackRestIT method: test {p0=snapshot/10_basic/Create a source only snapshot and then restore it} issue: https://github.com/elastic/elasticsearch/issues/117295 +- class: org.elasticsearch.xpack.searchablesnapshots.RetrySearchIntegTests + method: testRetryPointInTime + issue: https://github.com/elastic/elasticsearch/issues/117116 # Examples: #