Skip to content

Commit

Permalink
Add exists query to semantic_text
Browse files Browse the repository at this point in the history
  • Loading branch information
carlosdelest committed Jun 21, 2024
1 parent 3faf4ce commit b0a5b53
Show file tree
Hide file tree
Showing 2 changed files with 165 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,15 @@

package org.elasticsearch.xpack.inference.mapper;

import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.join.BitSetProducer;
import org.apache.lucene.search.join.QueryBitSetProducer;
import org.apache.lucene.search.join.ScoreMode;
import org.elasticsearch.cluster.metadata.InferenceFieldMetadata;
import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.core.Nullable;
Expand Down Expand Up @@ -42,6 +45,7 @@
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.index.search.ESToParentBlockJoinQuery;
import org.elasticsearch.inference.InferenceResults;
import org.elasticsearch.inference.SimilarityMeasure;
import org.elasticsearch.search.vectors.KnnVectorQueryBuilder;
Expand Down Expand Up @@ -352,6 +356,23 @@ public Query termQuery(Object value, SearchExecutionContext context) {
throw new IllegalArgumentException(CONTENT_TYPE + " fields do not support term query");
}

@Override
public Query existsQuery(SearchExecutionContext context) {
if (getEmbeddingsField() == null) {
return new MatchNoDocsQuery();
}

// Do the equivalent of a nested query with an exists query for the embeddings field
String nestedFieldPath = getChunksFieldName(name());
BitSetProducer parentFilter = new QueryBitSetProducer(Queries.newNonNestedFilter(IndexVersion.current()));
return new ESToParentBlockJoinQuery(
getEmbeddingsField().fieldType().existsQuery(context),
parentFilter,
ScoreMode.None,
nestedFieldPath
);
}

@Override
public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
// Redirect the fetcher to load the original values of the field
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
setup:
- requires:
cluster_features: "gte_v8.15.0"
reason: semantic_text introduced in 8.15.0

- do:
inference.put:
task_type: sparse_embedding
inference_id: sparse-inference-id
body: >
{
"service": "test_service",
"service_settings": {
"model": "my_model",
"api_key": "abc64"
},
"task_settings": {
}
}
- do:
inference.put:
task_type: text_embedding
inference_id: dense-inference-id
body: >
{
"service": "text_embedding_test_service",
"service_settings": {
"model": "my_model",
"dimensions": 10,
"api_key": "abc64",
"similarity": "COSINE"
},
"task_settings": {
}
}
- do:
indices.create:
index: test-sparse-index
body:
mappings:
properties:
inference_field:
type: semantic_text
inference_id: sparse-inference-id

- do:
indices.create:
index: test-dense-index
body:
mappings:
properties:
inference_field:
type: semantic_text
inference_id: dense-inference-id

---
"Exists query with no indexed documents":
- do:
search:
index: test-sparse-index
body:
query:
exists:
field: "inference_field"

- match: { hits.total.value: 0 }

---
"Exists query with null indexed documents":
- do:
index:
index: test-sparse-index
id: doc
body:
inference_field: null
refresh: true

- do:
search:
index: test-sparse-index
body:
query:
exists:
field: "inference_field"

- match: { hits.total.value: 0 }

- do:
index:
index: test-dense-index
id: doc
body:
inference_field: null
refresh: true

- do:
search:
index: test-dense-index
body:
query:
exists:
field: "inference_field"

- match: { hits.total.value: 0 }

---
"Exists query with indexed documents":
- do:
index:
index: test-sparse-index
id: doc
body:
inference_field: "hello world"
refresh: true

- do:
search:
index: test-sparse-index
body:
query:
exists:
field: "inference_field"

- match: { hits.total.value: 1 }

- do:
index:
index: test-dense-index
id: doc
body:
inference_field: "hello world"
refresh: true

- do:
search:
index: test-dense-index
body:
query:
exists:
field: "inference_field"

- match: { hits.total.value: 1 }

0 comments on commit b0a5b53

Please sign in to comment.