From 97c62a769f8059d0056f0c98eec6945220c16fc2 Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Wed, 20 Oct 2021 10:41:03 -0700 Subject: [PATCH] Disallow kNN searches on nested vector fields (#79403) The new kNN endpoint currently doesn't support searches on nested fields. This PR updates the validation logic to detect this case and throw a clear error. It also adds tests for kNN search when there are nested documents. Relates to #78473. --- .../index/mapper/FieldAliasMapper.java | 4 +- .../index/mapper/FieldMapper.java | 4 +- .../index/mapper/MappingLookup.java | 39 +++---------- .../test/vectors/40_knn_search.yml | 55 +++++++++++++++---- .../vectors/query/KnnVectorQueryBuilder.java | 6 +- 5 files changed, 62 insertions(+), 46 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldAliasMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldAliasMapper.java index f51f6c9259a30..3f6e9ce34fd97 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FieldAliasMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldAliasMapper.java @@ -91,8 +91,8 @@ public void validate(MappingLookup mappers) { throw new MapperParsingException("Invalid [path] value [" + path + "] for field alias [" + name() + "]: an alias cannot refer to another alias."); } - String aliasScope = mappers.getNestedScope(name); - String pathScope = mappers.getNestedScope(path); + String aliasScope = mappers.getNestedParent(name); + String pathScope = mappers.getNestedParent(path); if (Objects.equals(aliasScope, pathScope) == false) { StringBuilder message = new StringBuilder("Invalid [path] value [" + path + "] for field alias [" + diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index 0f7134841ab24..8df8d1c72e3cf 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -290,7 +290,7 @@ public final void validate(MappingLookup mappers) { throw new IllegalArgumentException("[copy_to] may not be used to copy from a multi-field: [" + this.name() + "]"); } - final String sourceScope = mappers.getNestedScope(this.name()); + final String sourceScope = mappers.getNestedParent(this.name()); for (String copyTo : this.copyTo().copyToFields()) { if (mappers.isMultiField(copyTo)) { throw new IllegalArgumentException("[copy_to] may not be used to copy to a multi-field: [" + copyTo + "]"); @@ -299,7 +299,7 @@ public final void validate(MappingLookup mappers) { throw new IllegalArgumentException("Cannot copy to field [" + copyTo + "] since it is mapped as an object"); } - final String targetScope = mappers.getNestedScope(copyTo); + final String targetScope = mappers.getNestedParent(copyTo); checkNestedScopeCompatibility(sourceScope, targetScope); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java b/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java index c0eb5837cee05..29cb9f520313a 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java @@ -329,7 +329,14 @@ public boolean isObjectField(String field) { return objectMappers.containsKey(field); } - public String getNestedScope(String path) { + /** + * Given a nested object path, returns the path to its nested parent + * + * In particular, if a nested field `foo` contains an object field + * `bar.baz`, then calling this method with `foo.bar.baz` will return + * the path `foo`, skipping over the object-but-not-nested `foo.bar` + */ + public String getNestedParent(String path) { for (String parentPath = parentObject(path); parentPath != null; parentPath = parentObject(parentPath)) { ObjectMapper objectMapper = objectMappers.get(parentPath); if (objectMapper != null && objectMapper.isNested()) { @@ -466,34 +473,4 @@ public List getNestedParentMappers() { } return parents; } - - /** - * Given a nested object path, returns the path to its nested parent - * - * In particular, if a nested field `foo` contains an object field - * `bar.baz`, then calling this method with `foo.bar.baz` will return - * the path `foo`, skipping over the object-but-not-nested `foo.bar` - */ - public String getNestedParent(String path) { - ObjectMapper mapper = objectMappers().get(path); - if (mapper == null) { - return null; - } - if (path.contains(".") == false) { - return null; - } - do { - path = path.substring(0, path.lastIndexOf(".")); - mapper = objectMappers().get(path); - if (mapper == null) { - return null; - } - if (mapper.isNested()) { - return path; - } - if (path.contains(".") == false) { - return null; - } - } while(true); - } } diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/40_knn_search.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/40_knn_search.yml index a83ec8c410978..03a33d902a34d 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/40_knn_search.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/40_knn_search.yml @@ -14,12 +14,26 @@ setup: dims: 5 index: true similarity: l2_norm + comments: + type: nested + properties: + body: + type: text + vector: + type: dense_vector + dims: 5 + index: true + similarity: l2_norm + - do: index: index: test body: name: cow.jpg vector: [230.0, 300.33, -34.8988, 15.555, -200.0] + comments: + - body: "free the cows" + vector: [0.75, 100.0, 0.33, 16.2, -10.2] - do: index: @@ -28,6 +42,9 @@ setup: body: name: moose.jpg vector: [-0.5, 100.0, -13, 14.8, -156.0] + comments: + - body: "what a great moose" + vector: [11.4, 99.0, 1.55, -2.9, -10.2] - do: index: @@ -63,19 +80,20 @@ setup: "Test nonexistent field": - do: catch: bad_request - search: - rest_total_hits_as_int: true - index: test-index + knn_search: + index: test body: - query: - knn: - field: nonexistent - query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] - num_candidates: 1 - - match: { error.root_cause.0.type: "illegal_argument_exception" } + fields: [ "name" ] + knn: + field: nonexistent + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] + k: 2 + num_candidates: 3 + - match: { error.root_cause.0.type: "query_shard_exception" } + - match: { error.root_cause.0.reason: "failed to create query: field [nonexistent] does not exist in the mapping" } --- -"Direct knn queries are disallowed": +"Direct kNN queries are disallowed": - do: catch: bad_request search: @@ -88,3 +106,20 @@ setup: query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] num_candidates: 1 - match: { error.root_cause.0.type: "illegal_argument_exception" } + - match: { error.root_cause.0.reason: "[knn] queries cannot be provided directly, use the [_knn_search] endpoint instead" } + +--- +"kNN searches on nested fields are disallowed": + - do: + catch: bad_request + knn_search: + index: test + body: + fields: [ "nonexistent" ] + knn: + field: comments.vector + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] + k: 2 + num_candidates: 3 + - match: { error.root_cause.0.type: "query_shard_exception" } + - match: { error.root_cause.0.reason: "failed to create query: [knn] queries are not supported on nested fields" } diff --git a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/KnnVectorQueryBuilder.java b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/KnnVectorQueryBuilder.java index 226a017f0d8dc..7b93840d5baac 100644 --- a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/KnnVectorQueryBuilder.java +++ b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/KnnVectorQueryBuilder.java @@ -87,13 +87,17 @@ protected Query doToQuery(SearchExecutionContext context) { + DenseVectorFieldMapper.CONTENT_TYPE + "] fields"); } + if (context.getNestedParent(fieldType.name()) != null) { + throw new IllegalArgumentException("[" + NAME + "] queries are not supported on nested fields"); + } + DenseVectorFieldType vectorFieldType = (DenseVectorFieldType) fieldType; if (queryVector.length != vectorFieldType.dims()) { throw new IllegalArgumentException("the query vector has a different dimension [" + queryVector.length + "] " + "than the index vectors [" + vectorFieldType.dims() + "]"); } if (vectorFieldType.isSearchable() == false) { - throw new IllegalArgumentException("[" + "[" + NAME + "] queries are not supported if [index] is disabled"); + throw new IllegalArgumentException("[" + NAME + "] queries are not supported if [index] is disabled"); } return new KnnVectorQuery(fieldType.name(), queryVector, numCands); }