Merge branch '8.x' into lucene_9_12_1

ChrisHegarty · Dec 10, 2024 · 033d3fd · 033d3fd
2 parents 81d8102 + 8b0c4d8
commit 033d3fd
Show file tree

Hide file tree

Showing 34 changed files with 3,650 additions and 139 deletions.
diff --git a/docs/changelog/117994.yaml b/docs/changelog/117994.yaml
@@ -0,0 +1,5 @@
+pr: 117994
+summary: Even better(er) binary quantization
+area: Vector Search
+type: enhancement
+issues: []
diff --git a/docs/reference/setup/install/docker.asciidoc b/docs/reference/setup/install/docker.asciidoc
@@ -39,7 +39,7 @@ adjust memory usage in Docker Desktop by going to **Settings > Resources**.
 ----
 docker network create elastic
 ----
-
+// REVIEWED[DEC.10.24]
 . Pull the {es} Docker image.
 +
 --
@@ -52,10 +52,11 @@ endif::[]
 ----
 docker pull {docker-image}
 ----
+// REVIEWED[DEC.10.24]
 --
 
 . Optional: Install
-https://docs.sigstore.dev/system_config/installation/[Cosign] for your
+https://docs.sigstore.dev/cosign/system_config/installation/[Cosign] for your
 environment. Then use Cosign to verify the {es} image's signature.
 +
 [[docker-verify-signature]]
@@ -64,6 +65,7 @@ environment. Then use Cosign to verify the {es} image's signature.
 wget https://artifacts.elastic.co/cosign.pub
 cosign verify --key cosign.pub {docker-image}
 ----
+// REVIEWED[DEC.10.24]
 +
 The `cosign` command prints the check results and the signature payload in JSON format:
 +
@@ -75,13 +77,15 @@ The following checks were performed on each of these signatures:
   - Existence of the claims in the transparency log was verified offline
   - The signatures were verified against the specified public key
 ----
+// REVIEWED[DEC.10.24]
 
 . Start an {es} container.
 +
 [source,sh,subs="attributes"]
 ----
 docker run --name es01 --net elastic -p 9200:9200 -it -m 1GB {docker-image}
 ----
+// REVIEWED[DEC.10.24]
 +
 TIP: Use the `-m` flag to set a memory limit for the container. This removes the
 need to <<docker-set-heap-size,manually set the JVM size>>.
@@ -95,6 +99,7 @@ If you intend to use the {ml} capabilities, then start the container with this c
 ----
 docker run --name es01 --net elastic -p 9200:9200 -it -m 6GB -e "xpack.ml.use_auto_machine_memory_percent=true" {docker-image}
 ----
+// REVIEWED[DEC.10.24]
 The command prints the `elastic` user password and an enrollment token for {kib}.
 
 . Copy the generated `elastic` password and enrollment token. These credentials
@@ -106,20 +111,23 @@ credentials using the following commands.
 docker exec -it es01 /usr/share/elasticsearch/bin/elasticsearch-reset-password -u elastic
 docker exec -it es01 /usr/share/elasticsearch/bin/elasticsearch-create-enrollment-token -s kibana
 ----
+// REVIEWED[DEC.10.24]
 +
 We recommend storing the `elastic` password as an environment variable in your shell. Example:
 +
 [source,sh]
 ----
 export ELASTIC_PASSWORD="your_password"
 ----
+// REVIEWED[DEC.10.24]
 
 . Copy the `http_ca.crt` SSL certificate from the container to your local machine.
 +
 [source,sh]
 ----
 docker cp es01:/usr/share/elasticsearch/config/certs/http_ca.crt .
 ----
+// REVIEWED[DEC.10.24]
 
 . Make a REST API call to {es} to ensure the {es} container is running.
 +
@@ -128,6 +136,7 @@ docker cp es01:/usr/share/elasticsearch/config/certs/http_ca.crt .
 curl --cacert http_ca.crt -u elastic:$ELASTIC_PASSWORD https://localhost:9200
 ----
 // NOTCONSOLE
+// REVIEWED[DEC.10.24]
 
 ===== Add more nodes
 
@@ -137,6 +146,7 @@ curl --cacert http_ca.crt -u elastic:$ELASTIC_PASSWORD https://localhost:9200
 ----
 docker exec -it es01 /usr/share/elasticsearch/bin/elasticsearch-create-enrollment-token -s node
 ----
+// REVIEWED[DEC.10.24]
 +
 The enrollment token is valid for 30 minutes.
 
@@ -146,6 +156,7 @@ The enrollment token is valid for 30 minutes.
 ----
 docker run -e ENROLLMENT_TOKEN="<token>" --name es02 --net elastic -it -m 1GB {docker-image}
 ----
+// REVIEWED[DEC.10.24]
 
 . Call the <<cat-nodes,cat nodes API>> to verify the node was added to the cluster.
 +
@@ -154,6 +165,7 @@ docker run -e ENROLLMENT_TOKEN="<token>" --name es02 --net elastic -it -m 1GB {d
 curl --cacert http_ca.crt -u elastic:$ELASTIC_PASSWORD https://localhost:9200/_cat/nodes
 ----
 // NOTCONSOLE
+// REVIEWED[DEC.10.24]
 
 [[run-kibana-docker]]
 ===== Run {kib}
@@ -170,6 +182,7 @@ endif::[]
 ----
 docker pull {kib-docker-image}
 ----
+// REVIEWED[DEC.10.24]
 --
 
 . Optional: Verify the {kib} image's signature.
@@ -179,13 +192,15 @@ docker pull {kib-docker-image}
 wget https://artifacts.elastic.co/cosign.pub
 cosign verify --key cosign.pub {kib-docker-image}
 ----
+// REVIEWED[DEC.10.24]
 
 . Start a {kib} container.
 +
 [source,sh,subs="attributes"]
 ----
 docker run --name kib01 --net elastic -p 5601:5601 {kib-docker-image}
 ----
+// REVIEWED[DEC.10.24]
 
 . When {kib} starts, it outputs a unique generated link to the terminal. To
 access {kib}, open this link in a web browser.
@@ -198,6 +213,7 @@ To regenerate the token, run:
 ----
 docker exec -it es01 /usr/share/elasticsearch/bin/elasticsearch-create-enrollment-token -s kibana
 ----
+// REVIEWED[DEC.10.24]
 
 . Log in to {kib} as the `elastic` user with the password that was generated
 when you started {es}.
@@ -208,6 +224,7 @@ To regenerate the password, run:
 ----
 docker exec -it es01 /usr/share/elasticsearch/bin/elasticsearch-reset-password -u elastic
 ----
+// REVIEWED[DEC.10.24]
 
 [[remove-containers-docker]]
 ===== Remove containers
@@ -226,6 +243,7 @@ docker rm es02
 # Remove the {kib} container
 docker rm kib01
 ----
+// REVIEWED[DEC.10.24]
 
 ===== Next steps
 
@@ -306,6 +324,7 @@ ES_PORT=127.0.0.1:9200
 ----
 docker-compose up -d
 ----
+// REVIEWED[DEC.10.24]
 
 . After the cluster has started, open http://localhost:5601 in a web browser to
 access {kib}.
@@ -321,6 +340,7 @@ is preserved and loaded when you restart the cluster with `docker-compose up`.
 ----
 docker-compose down
 ----
+// REVIEWED[DEC.10.24]
 
 To delete the network, containers, and volumes when you stop the cluster,
 specify the `-v` option:
@@ -329,6 +349,7 @@ specify the `-v` option:
 ----
 docker-compose down -v
 ----
+// REVIEWED[DEC.10.24]
 
 ===== Next steps
 
@@ -377,6 +398,7 @@ The `vm.max_map_count` setting must be set within the xhyve virtual machine:
 --------------------------------------------
 screen ~/Library/Containers/com.docker.docker/Data/vms/0/tty
 --------------------------------------------
+// REVIEWED[DEC.10.24]
 
 . Press enter and use `sysctl` to configure `vm.max_map_count`:
 +
@@ -494,6 +516,7 @@ To check the Docker daemon defaults for ulimits, run:
 --------------------------------------------
 docker run --rm {docker-image} /bin/bash -c 'ulimit -Hn && ulimit -Sn && ulimit -Hu && ulimit -Su'
 --------------------------------------------
+// REVIEWED[DEC.10.24]
 
 If needed, adjust them in the Daemon or override them per container.
 For example, when using `docker run`, set:
@@ -502,6 +525,7 @@ For example, when using `docker run`, set:
 --------------------------------------------
 --ulimit nofile=65535:65535
 --------------------------------------------
+// REVIEWED[DEC.10.24]
 
 ===== Disable swapping
 
@@ -518,6 +542,7 @@ When using `docker run`, you can specify:
 ----
 -e "bootstrap.memory_lock=true" --ulimit memlock=-1:-1
 ----
+// REVIEWED[DEC.10.24]
 
 ===== Randomize published ports
 
@@ -545,6 +570,7 @@ environment variable. For example, to use 1GB, use the following command.
 ----
 docker run -e ES_JAVA_OPTS="-Xms1g -Xmx1g" -e ENROLLMENT_TOKEN="<token>" --name es01 -p 9200:9200 --net elastic -it {docker-image}
 ----
+// REVIEWED[DEC.10.24]
 
 The `ES_JAVA_OPTS` variable overrides all other JVM options.
 We do not recommend using `ES_JAVA_OPTS` in production.
@@ -616,6 +642,7 @@ If you mount the password file to `/run/secrets/bootstrapPassword.txt`, specify:
 --------------------------------------------
 -e ELASTIC_PASSWORD_FILE=/run/secrets/bootstrapPassword.txt
 --------------------------------------------
+// REVIEWED[DEC.10.24]
 
 You can override the default command for the image to pass {es} configuration
 parameters as command line options. For example:

diff --git a/rest-api-spec/build.gradle b/rest-api-spec/build.gradle
@@ -255,4 +255,6 @@ tasks.named("yamlRestTestV7CompatTransform").configure({ task ->
   task.skipTest("logsdb/20_source_mapping/include/exclude is supported with stored _source", "no longer serialize source_mode")
   task.skipTest("logsdb/20_source_mapping/synthetic _source is default", "no longer serialize source_mode")
   task.skipTest("search/520_fetch_fields/fetch _seq_no via fields", "error code is changed from 5xx to 400 in 9.0")
+  task.skipTest("search.vectors/41_knn_search_bbq_hnsw/Test knn search", "Scoring has changed in latest versions")
+  task.skipTest("search.vectors/42_knn_search_bbq_flat/Test knn search", "Scoring has changed in latest versions")
 })
diff --git a/...c/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_bbq_hnsw.yml b/...c/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_bbq_hnsw.yml
@@ -11,20 +11,11 @@ setup:
               number_of_shards: 1
           mappings:
             properties:
-              name:
-                type: keyword
               vector:
                 type: dense_vector
                 dims: 64
                 index: true
-                similarity: l2_norm
-                index_options:
-                  type: bbq_hnsw
-              another_vector:
-                type: dense_vector
-                dims: 64
-                index: true
-                similarity: l2_norm
+                similarity: max_inner_product
                 index_options:
                   type: bbq_hnsw
 
@@ -33,9 +24,14 @@ setup:
         index: bbq_hnsw
         id: "1"
         body:
-          name: cow.jpg
-          vector: [300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0, 230.0, 300.33, -34.8988, 15.555, -200.0]
-          another_vector: [115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0, 130.0, 115.0, -1.02, 15.555, -100.0]
+          vector: [0.077,  0.32 , -0.205,  0.63 ,  0.032,  0.201,  0.167, -0.313,
+                   0.176,  0.531, -0.375,  0.334, -0.046,  0.078, -0.349,  0.272,
+                   0.307, -0.083,  0.504,  0.255, -0.404,  0.289, -0.226, -0.132,
+                   -0.216,  0.49 ,  0.039,  0.507, -0.307,  0.107,  0.09 , -0.265,
+                   -0.285,  0.336, -0.272,  0.369, -0.282,  0.086, -0.132,  0.475,
+                   -0.224,  0.203,  0.439,  0.064,  0.246, -0.396,  0.297,  0.242,
+                   -0.028,  0.321, -0.022, -0.009, -0.001  ,  0.031, -0.533,  0.45,
+                   -0.683,  1.331,  0.194, -0.157, -0.1  , -0.279, -0.098, -0.176]
   # Flush in order to provoke a merge later
   - do:
       indices.flush:
@@ -46,9 +42,14 @@ setup:
         index: bbq_hnsw
         id: "2"
         body:
-          name: moose.jpg
-          vector: [100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0, -0.5, 100.0, -13, 14.8, -156.0]
-          another_vector: [50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120, -0.5, 50.0, -1, 1, 120]
+          vector: [0.196,  0.514,  0.039,  0.555, -0.042,  0.242,  0.463, -0.348,
+                   -0.08 ,  0.442, -0.067, -0.05 , -0.001,  0.298, -0.377,  0.048,
+                   0.307,  0.159,  0.278,  0.119, -0.057,  0.333, -0.289, -0.438,
+                   -0.014,  0.361, -0.169,  0.292, -0.229,  0.123,  0.031, -0.138,
+                   -0.139,  0.315, -0.216,  0.322, -0.445, -0.059,  0.071,  0.429,
+                   -0.602, -0.142,  0.11 ,  0.192,  0.259, -0.241,  0.181, -0.166,
+                   0.082,  0.107, -0.05 ,  0.155,  0.011,  0.161, -0.486,  0.569,
+                   -0.489,  0.901,  0.208,  0.011, -0.209, -0.153, -0.27 , -0.013]
   # Flush in order to provoke a merge later
   - do:
       indices.flush:
@@ -60,8 +61,14 @@ setup:
         id: "3"
         body:
           name: rabbit.jpg
-          vector: [111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0, 0.5, 111.3, -13.0, 14.8, -156.0]
-          another_vector: [11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0, -0.5, 11.0, 0, 12, 111.0]
+          vector: [0.139,  0.178, -0.117,  0.399,  0.014, -0.139,  0.347, -0.33 ,
+                   0.139,  0.34 , -0.052, -0.052, -0.249,  0.327, -0.288,  0.049,
+                   0.464,  0.338,  0.516,  0.247, -0.104,  0.259, -0.209, -0.246,
+                   -0.11 ,  0.323,  0.091,  0.442, -0.254,  0.195, -0.109, -0.058,
+                   -0.279,  0.402, -0.107,  0.308, -0.273,  0.019,  0.082,  0.399,
+                   -0.658, -0.03 ,  0.276,  0.041,  0.187, -0.331,  0.165,  0.017,
+                   0.171, -0.203, -0.198,  0.115, -0.007,  0.337, -0.444,  0.615,
+                   -0.657,  1.285,  0.2  , -0.062,  0.038,  0.089, -0.068, -0.058]
   # Flush in order to provoke a merge later
   - do:
       indices.flush:
@@ -73,20 +80,33 @@ setup:
         max_num_segments: 1
 ---
 "Test knn search":
+  - requires:
+      capabilities:
+        - method: POST
+          path: /_search
+          capabilities: [ optimized_scalar_quantization_bbq ]
+      test_runner_features: capabilities
+      reason: "BBQ scoring improved and changed with optimized_scalar_quantization_bbq"
   - do:
       search:
         index: bbq_hnsw
         body:
           knn:
             field: vector
-            query_vector: [ 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0, -0.5, 90.0, -10, 14.8, -156.0]
+            query_vector: [0.128,  0.067, -0.08 ,  0.395, -0.11 , -0.259,  0.473, -0.393,
+                           0.292,  0.571, -0.491,  0.444, -0.288,  0.198, -0.343,  0.015,
+                           0.232,  0.088,  0.228,  0.151, -0.136,  0.236, -0.273, -0.259,
+                           -0.217,  0.359, -0.207,  0.352, -0.142,  0.192, -0.061, -0.17 ,
+                           -0.343,  0.189, -0.221,  0.32 , -0.301, -0.1  ,  0.005,  0.232,
+                           -0.344,  0.136,  0.252,  0.157, -0.13 , -0.244,  0.193, -0.034,
+                           -0.12 , -0.193, -0.102,  0.252, -0.185, -0.167, -0.575,  0.582,
+                           -0.426,  0.983,  0.212,  0.204,  0.03 , -0.276, -0.425, -0.158]
             k: 3
             num_candidates: 3
 
-  # Depending on how things are distributed, docs 2 and 3 might be swapped
-  # here we verify that are last hit is always the worst one
-  - match: { hits.hits.2._id: "1" }
-
+  - match: { hits.hits.0._id: "1" }
+  - match: { hits.hits.1._id: "3" }
+  - match: { hits.hits.2._id: "2" }
 ---
 "Test bad quantization parameters":
   - do: