Skip to content

Commit

Permalink
Make semantic text part of the text family (elastic#119792)
Browse files Browse the repository at this point in the history
Co-authored-by: Ioana Tagirta <[email protected]>
(cherry picked from commit 29e1bf9)

# Conflicts:
#	x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java
#	x-pack/plugin/esql/qa/testFixtures/src/main/resources/semantic_text.csv-spec
  • Loading branch information
Mikep86 committed Jan 10, 2025
1 parent d4ec629 commit 076bf12
Show file tree
Hide file tree
Showing 11 changed files with 197 additions and 116 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/119792.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 119792
summary: Make semantic text part of the text family
area: Search
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@
import org.elasticsearch.index.mapper.PlaceHolderFieldMapper;
import org.elasticsearch.index.mapper.TextFieldMapper;
import org.elasticsearch.index.mapper.TextSearchInfo;
import org.elasticsearch.index.query.MatchBoolPrefixQueryBuilder;
import org.elasticsearch.index.query.MatchPhrasePrefixQueryBuilder;
import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.index.query.ZeroTermsQueryOption;
import org.elasticsearch.lucene.analysis.miscellaneous.DisableGraphAttribute;
Expand All @@ -62,24 +65,26 @@ public enum Type implements Writeable {
/**
* The text is analyzed and terms are added to a boolean query.
*/
BOOLEAN(0),
BOOLEAN(0, org.elasticsearch.index.query.MatchQueryBuilder.NAME),
/**
* The text is analyzed and used as a phrase query.
*/
PHRASE(1),
PHRASE(1, MatchPhraseQueryBuilder.NAME),
/**
* The text is analyzed and used in a phrase query, with the last term acting as a prefix.
*/
PHRASE_PREFIX(2),
PHRASE_PREFIX(2, MatchPhrasePrefixQueryBuilder.NAME),
/**
* The text is analyzed, terms are added to a boolean query with the last term acting as a prefix.
*/
BOOLEAN_PREFIX(3);
BOOLEAN_PREFIX(3, MatchBoolPrefixQueryBuilder.NAME);

private final int ordinal;
private final String queryName;

Type(int ordinal) {
Type(int ordinal, String queryName) {
this.ordinal = ordinal;
this.queryName = queryName;
}

public static Type readFromStream(StreamInput in) throws IOException {
Expand All @@ -92,6 +97,10 @@ public static Type readFromStream(StreamInput in) throws IOException {
throw new ElasticsearchException("unknown serialized type [" + ord + "]");
}

public String getQueryName() {
return queryName;
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeVInt(this.ordinal);
Expand Down Expand Up @@ -206,11 +215,23 @@ public Query parse(Type type, String fieldName, Object value) throws IOException
IllegalArgumentException iae;
if (fieldType instanceof PlaceHolderFieldMapper.PlaceHolderFieldType) {
iae = new IllegalArgumentException(
"Field [" + fieldType.name() + "] of type [" + fieldType.typeName() + "] in legacy index does not support match queries"
"Field ["
+ fieldType.name()
+ "] of type ["
+ fieldType.typeName()
+ "] in legacy index does not support "
+ type.getQueryName()
+ " queries"
);
} else {
iae = new IllegalArgumentException(
"Field [" + fieldType.name() + "] of type [" + fieldType.typeName() + "] does not support match queries"
"Field ["
+ fieldType.name()
+ "] of type ["
+ fieldType.typeName()
+ "] does not support "
+ type.getQueryName()
+ " queries"
);
}
if (lenient) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,8 @@ public final void test() throws Throwable {
}

protected void shouldSkipTest(String testName) throws IOException {
if (testCase.requiredCapabilities.contains("semantic_text_type")) {
if (testCase.requiredCapabilities.contains("semantic_text_type")
|| testCase.requiredCapabilities.contains("semantic_text_field_caps")) {
assumeTrue("Inference test service needs to be supported for semantic_text", supportsInferenceTestService());
}
checkCapabilities(adminClient(), testFeatureService, testName, testCase);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,9 @@ private static Void convertUnsupported(String s) {
for (Type value : Type.values()) {
LOOKUP.put(value.name(), value);
}
// Types with a different field caps family type
LOOKUP.put("SEMANTIC_TEXT", TEXT);

// widen smaller types
LOOKUP.put("SHORT", INTEGER);
LOOKUP.put("BYTE", INTEGER);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -601,14 +601,15 @@ emp_no_bool:boolean
testMatchWithSemanticText
required_capability: match_function
required_capability: semantic_text_type
required_capability: semantic_text_field_caps

from semantic_text
| where match(semantic_text_field, "something")
| keep semantic_text_field
| sort semantic_text_field asc
;

semantic_text_field:semantic_text
semantic_text_field:text
all we have to decide is what to do with the time that is given to us
be excellent to each other
live long and prosper
Expand All @@ -617,32 +618,35 @@ live long and prosper
testMatchWithSemanticTextAndKeyword
required_capability: match_function
required_capability: semantic_text_type
required_capability: semantic_text_field_caps

from semantic_text
| where match(semantic_text_field, "something") AND match(host, "host1")
| keep semantic_text_field, host
;

semantic_text_field:semantic_text | host:keyword
live long and prosper | host1
semantic_text_field:text | host:keyword
live long and prosper | host1
;

testMatchWithSemanticTextMultiValueField
required_capability: match_function
required_capability: semantic_text_type
required_capability: semantic_text_field_caps

from semantic_text metadata _id
| where match(st_multi_value, "something") AND match(host, "host1")
| keep _id, st_multi_value
;

_id: keyword | st_multi_value:semantic_text
_id: keyword | st_multi_value:text
1 | ["Hello there!", "This is a random value", "for testing purposes"]
;

testMatchWithSemanticTextWithEvalsAndOtherFunctionsAndStats
required_capability: match_function
required_capability: semantic_text_type
required_capability: semantic_text_field_caps

from semantic_text
| where qstr("description:some*")
Expand All @@ -659,12 +663,13 @@ testMatchWithSemanticTextAndKql
required_capability: match_function
required_capability: semantic_text_type
required_capability: kql_function
required_capability: semantic_text_field_caps

from semantic_text
| where kql("host:host1") AND match(semantic_text_field, "something")
| KEEP host, semantic_text_field
;

host:keyword | semantic_text_field:semantic_text
host:keyword | semantic_text_field:text
"host1" | live long and prosper
;
Original file line number Diff line number Diff line change
Expand Up @@ -611,14 +611,15 @@ emp_no_bool:boolean
testMatchWithSemanticText
required_capability: match_operator_colon
required_capability: semantic_text_type
required_capability: semantic_text_field_caps

from semantic_text
| where semantic_text_field:"something"
| keep semantic_text_field
| sort semantic_text_field asc
;

semantic_text_field:semantic_text
semantic_text_field:text
all we have to decide is what to do with the time that is given to us
be excellent to each other
live long and prosper
Expand All @@ -627,32 +628,35 @@ live long and prosper
testMatchWithSemanticTextAndKeyword
required_capability: match_operator_colon
required_capability: semantic_text_type
required_capability: semantic_text_field_caps

from semantic_text
| where semantic_text_field:"something" AND host:"host1"
| keep semantic_text_field, host
;

semantic_text_field:semantic_text | host:keyword
live long and prosper | host1
semantic_text_field:text | host:keyword
live long and prosper | host1
;

testMatchWithSemanticTextMultiValueField
required_capability: match_operator_colon
required_capability: semantic_text_type
required_capability: semantic_text_field_caps

from semantic_text metadata _id
| where st_multi_value:"something" AND match(host, "host1")
| keep _id, st_multi_value
;

_id: keyword | st_multi_value:semantic_text
_id: keyword | st_multi_value:text
1 | ["Hello there!", "This is a random value", "for testing purposes"]
;

testMatchWithSemanticTextWithEvalsAndOtherFunctionsAndStats
required_capability: match_operator_colon
required_capability: semantic_text_type
required_capability: semantic_text_field_caps

from semantic_text
| where qstr("description:some*")
Expand All @@ -669,12 +673,13 @@ testMatchWithSemanticTextAndKql
required_capability: match_operator_colon
required_capability: semantic_text_type
required_capability: kql_function
required_capability: semantic_text_field_caps

from semantic_text
| where kql("host:host1") AND semantic_text_field:"something"
| KEEP host, semantic_text_field
;

host:keyword | semantic_text_field:semantic_text
host:keyword | semantic_text_field:text
"host1" | live long and prosper
;
Loading

0 comments on commit 076bf12

Please sign in to comment.