Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[8.15] Fix Synthetic Source Handling for bit Type in dense_vector Field (#114407) #114759

Merged
merged 9 commits into from
Oct 15, 2024
6 changes: 6 additions & 0 deletions docs/changelog/114407.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 114407
summary: Fix synthetic source handling for `bit` type in `dense_vector` field
area: Search
type: bug
issues:
- 114402
2 changes: 1 addition & 1 deletion qa/ccs-common-rest/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ apply plugin: 'elasticsearch.internal-yaml-rest-test'

restResources {
restApi {
include '_common', 'bulk', 'count', 'cluster', 'field_caps', 'get', 'knn_search', 'index', 'indices', 'msearch',
include 'capabilities', '_common', 'bulk', 'count', 'cluster', 'field_caps', 'get', 'knn_search', 'index', 'indices', 'msearch',
'search', 'async_search', 'graph', '*_point_in_time', 'info', 'scroll', 'clear_scroll', 'search_mvt', 'eql', 'sql'
}
restTests {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -354,3 +354,54 @@ setup:
dims: 40
index: true
similarity: max_inner_product


---
"Search with synthetic source":
- requires:
reason: "Support for bit dense vector synthetic source capability required"
test_runner_features: [capabilities]
capabilities:
- method: POST
path: /_search
capabilities: [ bit_dense_vector_synthetic_source ]
- do:
indices.create:
index: test_synthetic_source
body:
mappings:
properties:
name:
type: keyword
vector1:
type: dense_vector
element_type: bit
dims: 40
index: false
vector2:
type: dense_vector
element_type: bit
dims: 40
index: true
similarity: l2_norm

- do:
index:
index: test_synthetic_source
id: "1"
body:
name: cow.jpg
vector1: [2, -1, 1, 4, -3]
vector2: [2, -1, 1, 4, -3]

- do:
indices.refresh: {}

- do:
search:
force_synthetic_source: true
index: test_synthetic_source

- match: {hits.hits.0._id: "1"}
- match: {hits.hits.0._source.vector1: [2, -1, 1, 4, -3]}
- match: {hits.hits.0._source.vector2: [2, -1, 1, 4, -3]}
Original file line number Diff line number Diff line change
Expand Up @@ -2218,7 +2218,7 @@ public void write(XContentBuilder b) throws IOException {
if (indexCreatedVersion.onOrAfter(LITTLE_ENDIAN_FLOAT_STORED_INDEX_VERSION)) {
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
}
int dims = fieldType().dims;
int dims = fieldType().elementType == ElementType.BIT ? fieldType().dims / Byte.SIZE : fieldType().dims;
for (int dim = 0; dim < dims; dim++) {
fieldType().elementType.readAndWriteValue(byteBuffer, b);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ public List<Route> routes() {
);
}

@Override
public Set<String> supportedCapabilities() {
return SearchCapabilities.CAPABILITIES;
}

@Override
public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException {

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.rest.action.search;

import java.util.Set;

/**
* A {@link Set} of "capabilities" supported by the {@link RestSearchAction}.
*/
public final class SearchCapabilities {

private SearchCapabilities() {}

/** Support synthetic source with `bit` type in `dense_vector` field when `index` is set to `false`. */
private static final String BIT_DENSE_VECTOR_SYNTHETIC_SOURCE_CAPABILITY = "bit_dense_vector_synthetic_source";

public static final Set<String> CAPABILITIES = Set.of(BIT_DENSE_VECTOR_SYNTHETIC_SOURCE_CAPABILITY);
}
Original file line number Diff line number Diff line change
Expand Up @@ -1435,24 +1435,27 @@ protected boolean supportsEmptyInputArray() {

private static class DenseVectorSyntheticSourceSupport implements SyntheticSourceSupport {
private final int dims = between(5, 1000);
private final ElementType elementType = randomFrom(ElementType.BYTE, ElementType.FLOAT);
private final ElementType elementType = randomFrom(ElementType.BYTE, ElementType.FLOAT, ElementType.BIT);
private final boolean indexed = randomBoolean();
private final boolean indexOptionsSet = indexed && randomBoolean();

@Override
public SyntheticSourceExample example(int maxValues) throws IOException {
Object value = elementType == ElementType.BYTE
? randomList(dims, dims, ESTestCase::randomByte)
: randomList(dims, dims, ESTestCase::randomFloat);
Object value = switch (elementType) {
case BYTE, BIT:
yield randomList(dims, dims, ESTestCase::randomByte);
case FLOAT:
yield randomList(dims, dims, ESTestCase::randomFloat);
};
return new SyntheticSourceExample(value, value, this::mapping);
}

private void mapping(XContentBuilder b) throws IOException {
b.field("type", "dense_vector");
b.field("dims", dims);
if (elementType == ElementType.BYTE || randomBoolean()) {
if (elementType == ElementType.BYTE || elementType == ElementType.BIT || randomBoolean()) {
b.field("element_type", elementType.toString());
}
b.field("dims", elementType == ElementType.BIT ? dims * Byte.SIZE : dims);
if (indexed) {
b.field("index", true);
b.field("similarity", "l2_norm");
Expand Down