Skip to content

Commit

Permalink
Merge branch 'main' into esql_float_aggs
Browse files Browse the repository at this point in the history
  • Loading branch information
elasticmachine authored Jun 17, 2024
2 parents d9c2f55 + 7ad9534 commit 994ca02
Show file tree
Hide file tree
Showing 267 changed files with 6,301 additions and 3,575 deletions.
2 changes: 1 addition & 1 deletion benchmarks/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ dependencies {
api(project(':x-pack:plugin:esql-core'))
api(project(':x-pack:plugin:esql'))
api(project(':x-pack:plugin:esql:compute'))
implementation project(path: ':libs:elasticsearch-vec')
implementation project(path: ':libs:elasticsearch-simdvec')
expression(project(path: ':modules:lang-expression', configuration: 'zip'))
painless(project(path: ':modules:lang-painless', configuration: 'zip'))
api "org.openjdk.jmh:jmh-core:$versions.jmh"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public class DistanceFunctionBenchmark {
@Param({ "96" })
private int dims;

@Param({ "dot", "cosine", "l1", "l2" })
@Param({ "dot", "cosine", "l1", "l2", "hamming" })
private String function;

@Param({ "knn", "binary" })
Expand Down Expand Up @@ -330,6 +330,18 @@ public void execute(Consumer<Object> consumer) {
}
}

private static class HammingKnnByteBenchmarkFunction extends KnnByteBenchmarkFunction {

private HammingKnnByteBenchmarkFunction(int dims) {
super(dims);
}

@Override
public void execute(Consumer<Object> consumer) {
new ByteKnnDenseVector(docVector).hamming(queryVector);
}
}

private static class L1BinaryFloatBenchmarkFunction extends BinaryFloatBenchmarkFunction {

private L1BinaryFloatBenchmarkFunction(int dims) {
Expand All @@ -354,6 +366,18 @@ public void execute(Consumer<Object> consumer) {
}
}

private static class HammingBinaryByteBenchmarkFunction extends BinaryByteBenchmarkFunction {

private HammingBinaryByteBenchmarkFunction(int dims) {
super(dims);
}

@Override
public void execute(Consumer<Object> consumer) {
new ByteBinaryDenseVector(vectorValue, docVector, dims).hamming(queryVector);
}
}

private static class L2KnnFloatBenchmarkFunction extends KnnFloatBenchmarkFunction {

private L2KnnFloatBenchmarkFunction(int dims) {
Expand Down Expand Up @@ -454,6 +478,11 @@ public void setBenchmarkFunction() {
case "binary" -> new L2BinaryByteBenchmarkFunction(dims);
default -> throw new UnsupportedOperationException("unexpected type [" + type + "]");
};
case "hamming" -> benchmarkFunction = switch (type) {
case "knn" -> new HammingKnnByteBenchmarkFunction(dims);
case "binary" -> new HammingBinaryByteBenchmarkFunction(dims);
default -> throw new UnsupportedOperationException("unexpected type [" + type + "]");
};
default -> throw new UnsupportedOperationException("unexpected function [" + function + "]");
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import org.apache.lucene.util.quantization.ScalarQuantizer;
import org.elasticsearch.common.logging.LogConfigurator;
import org.elasticsearch.core.IOUtils;
import org.elasticsearch.vec.VectorScorerFactory;
import org.elasticsearch.simdvec.VectorScorerFactory;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
Expand All @@ -41,8 +41,8 @@
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;

import static org.elasticsearch.vec.VectorSimilarityType.DOT_PRODUCT;
import static org.elasticsearch.vec.VectorSimilarityType.EUCLIDEAN;
import static org.elasticsearch.simdvec.VectorSimilarityType.DOT_PRODUCT;
import static org.elasticsearch.simdvec.VectorSimilarityType.EUCLIDEAN;

@Fork(value = 1, jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
@Warmup(iterations = 3, time = 3)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ public class InternalDistributionModuleCheckTaskProvider {
"org.elasticsearch.preallocate",
"org.elasticsearch.securesm",
"org.elasticsearch.server",
"org.elasticsearch.simdvec",
"org.elasticsearch.tdigest",
"org.elasticsearch.vec",
"org.elasticsearch.xcontent"
);

Expand Down
5 changes: 5 additions & 0 deletions docs/changelog/108793.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 108793
summary: Add `SparseVectorStats`
area: Search
type: enhancement
issues: []
6 changes: 6 additions & 0 deletions docs/changelog/109025.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 109025
summary: Introduce a setting controlling the activation of the `logs` index mode in logs@settings
area: Logs
type: feature
issues:
- 108762
13 changes: 13 additions & 0 deletions docs/changelog/109317.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
pr: 109317
summary: Add new int4 quantization to dense_vector
area: Search
type: feature
issues: []
highlight:
title: Add new int4 quantization to dense_vector
body: |-
New int4 (half-byte) scalar quantization support via two knew index types: `int4_hnsw` and `int4_flat`.
This gives an 8x reduction from `float32` with some accuracy loss. In addition to less memory required, this
improves query and merge speed significantly when compared to raw vectors.
notable: true

5 changes: 5 additions & 0 deletions docs/changelog/109359.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 109359
summary: Adding hamming distance function to painless for `dense_vector` fields
area: Vector Search
type: enhancement
issues: []
5 changes: 5 additions & 0 deletions docs/changelog/109480.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 109480
summary: "[Connector API] Add claim sync job endpoint"
area: Application
type: feature
issues: []
5 changes: 5 additions & 0 deletions docs/changelog/109634.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 109634
summary: "[Query Rules] Require Enterprise License for Query Rules"
area: Relevance
type: enhancement
issues: []
5 changes: 5 additions & 0 deletions docs/changelog/109717.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 109717
summary: Bump jackson version in modules:repository-azure
area: Snapshot/Restore
type: upgrade
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ The following methods are directly callable without a class/instance qualifier.
* double dotProduct(Object *, String *)
* double l1norm(Object *, String *)
* double l2norm(Object *, String *)
* double hamming(Object *, String *)
* double randomScore(int *)
* double randomScore(int *, String *)
* double saturation(double, double)
Expand Down
38 changes: 19 additions & 19 deletions docs/reference/cat/nodes.asciidoc
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
[[cat-nodes]]
=== cat nodes API

++++
<titleabbrev>cat nodes</titleabbrev>
++++

[IMPORTANT]
====
cat APIs are only intended for human consumption using the command line or {kib}
console. They are _not_ intended for use by applications. For application
consumption, use the <<cluster-nodes-info,nodes info API>>.
console.
They are _not_ intended for use by applications.
For application consumption, use the <<cluster-nodes-info,nodes info API>>.
====

Returns information about a cluster's nodes.
Expand All @@ -32,13 +34,15 @@ include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=bytes]
include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=http-format]

`full_id`::
(Optional, Boolean) If `true`, return the full node ID. If `false`, return the
shortened node ID. Defaults to `false`.
(Optional, Boolean) If `true`, return the full node ID.
If `false`, return the shortened node ID.
Defaults to `false`.

include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cat-h]
+
--
If you do not specify which columns to include, the API returns the default columns in the order listed below. If you explicitly specify one or more columns, it only returns the specified columns.
If you do not specify which columns to include, the API returns the default columns in the order listed below.
If you explicitly specify one or more columns, it only returns the specified columns.

Valid columns are:

Expand All @@ -58,7 +62,8 @@ Valid columns are:
(Default) Used file descriptors percentage, such as `1`.

`node.role`, `r`, `role`, `nodeRole`::
(Default) Roles of the node. Returned values include
(Default) Roles of the node.
Returned values include
`c` (cold node),
`d` (data node),
`f` (frozen node),
Expand All @@ -73,12 +78,13 @@ Valid columns are:
`w` (warm node), and
`-` (coordinating node only).
+
For example, `dim` indicates a master-eligible data and ingest node. See
For example, `dim` indicates a master-eligible data and ingest node.
See
<<modules-node>>.

`master`, `m`::
(Default) Indicates whether the node is the elected master node. Returned values
include `*` (elected master) and `-` (not elected master).
(Default) Indicates whether the node is the elected master node.
Returned values include `*` (elected master) and `-` (not elected master).

`name`, `n`::
(Default) Node name, such as `I8hydUG`.
Expand Down Expand Up @@ -149,9 +155,6 @@ Node uptime, such as `17.3m`.
`completion.size`, `cs`, `completionSize`::
Size of completion, such as `0b`.

`dense_vector.value_count`, `dvc`, `denseVectorCount`::
Number of indexed dense vector.

`fielddata.memory_size`, `fm`, `fielddataMemory`::
Used fielddata cache memory, such as `0b`.

Expand Down Expand Up @@ -306,8 +309,7 @@ Memory used by index writer, such as `18mb`.
Memory used by version map, such as `1.0kb`.

`segments.fixed_bitset_memory`, `sfbm`, `fixedBitsetMemory`::
Memory used by fixed bit sets for nested object field types and type filters for
types referred in <<parent-join,`join`>> fields, such as `1.0kb`.
Memory used by fixed bit sets for nested object field types and type filters for types referred in <<parent-join,`join`>> fields, such as `1.0kb`.

`suggest.current`, `suc`, `suggestCurrent`::
Number of current suggest operations, such as `0`.
Expand Down Expand Up @@ -362,15 +364,13 @@ ip heap.percent ram.percent cpu load_1m load_5m load_15m node.role master
// TESTRESPONSE[s/65 99 42/\\d+ \\d+ \\d+/]
// TESTRESPONSE[s/dim/.+/ s/[*]/[*]/ s/mJw06l1/.+/ non_json]

The `ip`, `heap.percent`, `ram.percent`, `cpu`, and `load_*` columns provide the
IP addresses and performance information of each node.

The `node.role`, `master`, and `name` columns provide information useful for
monitoring an entire cluster, particularly large ones.
The `ip`, `heap.percent`, `ram.percent`, `cpu`, and `load_*` columns provide the IP addresses and performance information of each node.

The `node.role`, `master`, and `name` columns provide information useful for monitoring an entire cluster, particularly large ones.

[[cat-nodes-api-ex-headings]]
===== Example with explicit columns

The following API request returns the `id`, `ip`, `port`, `v` (version), and `m`
(master) columns.

Expand Down
Loading

0 comments on commit 994ca02

Please sign in to comment.