Skip to content

Commit

Permalink
[8.15] Fix Synthetic Source Handling for bit Type in dense_vector Fie…
Browse files Browse the repository at this point in the history
…ld (#114407) (#114759)

* Fix Synthetic Source Handling for `bit` Type in `dense_vector` Field (#114407)

**Description:**

This PR addresses the issue described in [#114402](#114402), where the `synthetic_source` feature does not correctly handle the `bit` type in `dense_vector` fields when `index` is set to `false`. The root cause of the issue was that the `bit` type was not properly accounted for, leading to an array that is 8 times the size of the actual `dims` value of docvalue. This mismatch will causes an array out-of-bounds exception when reconstructing the document.

**Changes:**

- Adjusted the `synthetic_source` logic to correctly handle the `bit` type by ensuring the array size accounts for the 8x difference in dimensions.
- Added yaml test to cover the `bit` type scenario in `dense_vector` fields with `index` set to `false`.

**Related Issues:**

- Closes [#114402](#114402)
- Introduced in [#110059](#110059)

(cherry picked from commit 465c65c)

* fixing backport of search capabilities

* fixing license header

* adding capabilities to RestSearchAction

* fixing backport

* spotless

* muting teset for ccs

* adding capabilities to the ccs test runner

---------

Co-authored-by: Rassyan <[email protected]>
  • Loading branch information
benwtrent and Rassyan authored Oct 15, 2024
1 parent 54a220a commit f8386c3
Show file tree
Hide file tree
Showing 7 changed files with 97 additions and 8 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/114407.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 114407
summary: Fix synthetic source handling for `bit` type in `dense_vector` field
area: Search
type: bug
issues:
- 114402
2 changes: 1 addition & 1 deletion qa/ccs-common-rest/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ apply plugin: 'elasticsearch.internal-yaml-rest-test'

restResources {
restApi {
include '_common', 'bulk', 'count', 'cluster', 'field_caps', 'get', 'knn_search', 'index', 'indices', 'msearch',
include 'capabilities', '_common', 'bulk', 'count', 'cluster', 'field_caps', 'get', 'knn_search', 'index', 'indices', 'msearch',
'search', 'async_search', 'graph', '*_point_in_time', 'info', 'scroll', 'clear_scroll', 'search_mvt', 'eql', 'sql'
}
restTests {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -354,3 +354,54 @@ setup:
dims: 40
index: true
similarity: max_inner_product


---
"Search with synthetic source":
- requires:
reason: "Support for bit dense vector synthetic source capability required"
test_runner_features: [capabilities]
capabilities:
- method: POST
path: /_search
capabilities: [ bit_dense_vector_synthetic_source ]
- do:
indices.create:
index: test_synthetic_source
body:
mappings:
properties:
name:
type: keyword
vector1:
type: dense_vector
element_type: bit
dims: 40
index: false
vector2:
type: dense_vector
element_type: bit
dims: 40
index: true
similarity: l2_norm

- do:
index:
index: test_synthetic_source
id: "1"
body:
name: cow.jpg
vector1: [2, -1, 1, 4, -3]
vector2: [2, -1, 1, 4, -3]

- do:
indices.refresh: {}

- do:
search:
force_synthetic_source: true
index: test_synthetic_source

- match: {hits.hits.0._id: "1"}
- match: {hits.hits.0._source.vector1: [2, -1, 1, 4, -3]}
- match: {hits.hits.0._source.vector2: [2, -1, 1, 4, -3]}
Original file line number Diff line number Diff line change
Expand Up @@ -2218,7 +2218,7 @@ public void write(XContentBuilder b) throws IOException {
if (indexCreatedVersion.onOrAfter(LITTLE_ENDIAN_FLOAT_STORED_INDEX_VERSION)) {
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
}
int dims = fieldType().dims;
int dims = fieldType().elementType == ElementType.BIT ? fieldType().dims / Byte.SIZE : fieldType().dims;
for (int dim = 0; dim < dims; dim++) {
fieldType().elementType.readAndWriteValue(byteBuffer, b);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ public List<Route> routes() {
);
}

@Override
public Set<String> supportedCapabilities() {
return SearchCapabilities.CAPABILITIES;
}

@Override
public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException {

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.rest.action.search;

import java.util.Set;

/**
* A {@link Set} of "capabilities" supported by the {@link RestSearchAction}.
*/
public final class SearchCapabilities {

private SearchCapabilities() {}

/** Support synthetic source with `bit` type in `dense_vector` field when `index` is set to `false`. */
private static final String BIT_DENSE_VECTOR_SYNTHETIC_SOURCE_CAPABILITY = "bit_dense_vector_synthetic_source";

public static final Set<String> CAPABILITIES = Set.of(BIT_DENSE_VECTOR_SYNTHETIC_SOURCE_CAPABILITY);
}
Original file line number Diff line number Diff line change
Expand Up @@ -1435,24 +1435,27 @@ protected boolean supportsEmptyInputArray() {

private static class DenseVectorSyntheticSourceSupport implements SyntheticSourceSupport {
private final int dims = between(5, 1000);
private final ElementType elementType = randomFrom(ElementType.BYTE, ElementType.FLOAT);
private final ElementType elementType = randomFrom(ElementType.BYTE, ElementType.FLOAT, ElementType.BIT);
private final boolean indexed = randomBoolean();
private final boolean indexOptionsSet = indexed && randomBoolean();

@Override
public SyntheticSourceExample example(int maxValues) throws IOException {
Object value = elementType == ElementType.BYTE
? randomList(dims, dims, ESTestCase::randomByte)
: randomList(dims, dims, ESTestCase::randomFloat);
Object value = switch (elementType) {
case BYTE, BIT:
yield randomList(dims, dims, ESTestCase::randomByte);
case FLOAT:
yield randomList(dims, dims, ESTestCase::randomFloat);
};
return new SyntheticSourceExample(value, value, this::mapping);
}

private void mapping(XContentBuilder b) throws IOException {
b.field("type", "dense_vector");
b.field("dims", dims);
if (elementType == ElementType.BYTE || randomBoolean()) {
if (elementType == ElementType.BYTE || elementType == ElementType.BIT || randomBoolean()) {
b.field("element_type", elementType.toString());
}
b.field("dims", elementType == ElementType.BIT ? dims * Byte.SIZE : dims);
if (indexed) {
b.field("index", true);
b.field("similarity", "l2_norm");
Expand Down

0 comments on commit f8386c3

Please sign in to comment.