-
Notifications
You must be signed in to change notification settings - Fork 24.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[feature/semantic-text] semantic text copy to support #106689
Changes from 26 commits
7aaa3b6
ebc26d2
86ddc9d
dd73d01
c5de0da
cf62b1b
f029015
d3f9d86
140caa3
7d1c92a
e023a19
05aa06f
c80677f
5896c60
df0cc90
6d4bbf3
068615a
5c3d9c4
ace17dd
8bebb8d
47a22d7
5311424
707b3f1
5e5b32a
1abf95c
29dd33e
b17d584
82ffb5b
6b26200
bf5b837
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -297,41 +297,47 @@ private Map<String, List<FieldInferenceRequest>> createFieldInferenceRequests(Bu | |
final Map<String, Object> docMap = indexRequest.sourceAsMap(); | ||
boolean hasInput = false; | ||
for (var entry : fieldInferenceMetadata.getFieldInferenceOptions().entrySet()) { | ||
String field = entry.getKey(); | ||
String inferenceId = entry.getValue().inferenceId(); | ||
var value = XContentMapValues.extractValue(field, docMap); | ||
if (value == null) { | ||
continue; | ||
} | ||
if (inferenceResults.get(item.id()) == null) { | ||
inferenceResults.set( | ||
item.id(), | ||
new FieldInferenceResponseAccumulator( | ||
String fieldName = entry.getKey(); | ||
for (var sourceField : entry.getValue().sourceFields()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now we iterate on all the source fields for retrieving inference - so we're adding an additional loop here |
||
|
||
var value = XContentMapValues.extractValue(sourceField, docMap); | ||
if (value == null) { | ||
continue; | ||
} | ||
if (inferenceResults.get(item.id()) == null) { | ||
inferenceResults.set( | ||
item.id(), | ||
Collections.synchronizedList(new ArrayList<>()), | ||
Collections.synchronizedList(new ArrayList<>()) | ||
) | ||
); | ||
} | ||
if (value instanceof String valueStr) { | ||
List<FieldInferenceRequest> fieldRequests = fieldRequestsMap.computeIfAbsent(inferenceId, k -> new ArrayList<>()); | ||
fieldRequests.add(new FieldInferenceRequest(item.id(), field, valueStr)); | ||
hasInput = true; | ||
} else { | ||
inferenceResults.get(item.id()).failures.add( | ||
new ElasticsearchStatusException( | ||
"Invalid format for field [{}], expected [String] got [{}]", | ||
RestStatus.BAD_REQUEST, | ||
field, | ||
value.getClass().getSimpleName() | ||
) | ||
); | ||
new FieldInferenceResponseAccumulator( | ||
item.id(), | ||
Collections.synchronizedList(new ArrayList<>()), | ||
Collections.synchronizedList(new ArrayList<>()) | ||
) | ||
); | ||
} | ||
if (value instanceof String valueStr) { | ||
List<FieldInferenceRequest> fieldRequests = fieldRequestsMap.computeIfAbsent( | ||
inferenceId, | ||
k -> new ArrayList<>() | ||
); | ||
fieldRequests.add(new FieldInferenceRequest(item.id(), fieldName, valueStr)); | ||
hasInput = true; | ||
} else { | ||
inferenceResults.get(item.id()).failures.add( | ||
new ElasticsearchStatusException( | ||
"Invalid format for field [{}], expected [String] got [{}]", | ||
RestStatus.BAD_REQUEST, | ||
fieldName, | ||
value.getClass().getSimpleName() | ||
) | ||
); | ||
} | ||
} | ||
} | ||
if (hasInput == false) { | ||
// remove the existing _inference field (if present) since none of the content require inference. | ||
if (docMap.remove(InferenceMetadataFieldMapper.NAME) != null) { | ||
indexRequest.source(docMap); | ||
if (hasInput == false) { | ||
// remove the existing _inference field (if present) since none of the content require inference. | ||
if (docMap.remove(InferenceMetadataFieldMapper.NAME) != null) { | ||
indexRequest.source(docMap); | ||
} | ||
} | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -345,6 +345,8 @@ private void parseResultsObject( | |
} | ||
parser.nextToken(); | ||
fieldMapper.parse(context); | ||
// Reset leaf object after parsing the field | ||
context.path().setWithinLeafObject(true); | ||
} | ||
if (visited.containsAll(REQUIRED_SUBFIELDS) == false) { | ||
Set<String> missingSubfields = REQUIRED_SUBFIELDS.stream() | ||
|
@@ -380,6 +382,7 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { | |
return SourceLoader.SyntheticFieldLoader.NOTHING; | ||
} | ||
|
||
@SuppressWarnings("unchecked") | ||
public static void applyFieldInference( | ||
Map<String, Object> inferenceMap, | ||
String field, | ||
|
@@ -404,11 +407,12 @@ public static void applyFieldInference( | |
results.getWriteableName() | ||
); | ||
} | ||
Map<String, Object> fieldMap = new LinkedHashMap<>(); | ||
fieldMap.put(INFERENCE_ID, model.getInferenceEntityId()); | ||
|
||
Map<String, Object> fieldMap = (Map<String, Object>) inferenceMap.computeIfAbsent(field, s -> new LinkedHashMap<>()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Multiple inference results can be applied to a single field from each source field - so we need to be prepared for that |
||
fieldMap.putAll(new SemanticTextModelSettings(model).asMap()); | ||
fieldMap.put(CHUNKS, chunks); | ||
inferenceMap.put(field, fieldMap); | ||
List<Map<String, Object>> fieldChunks = (List<Map<String, Object>>) fieldMap.computeIfAbsent(CHUNKS, k -> new ArrayList<>()); | ||
fieldChunks.addAll(chunks); | ||
fieldMap.put(INFERENCE_ID, model.getInferenceEntityId()); | ||
} | ||
|
||
record SemanticTextMapperContext(MapperBuilderContext context, SemanticTextFieldMapper mapper) {} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -310,3 +310,38 @@ setup: | |
id: doc_1 | ||
body: | ||
non_inference_field: "non inference test" | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any other tests come to mind? |
||
--- | ||
"semantic_text copy_to calculate inference for source fields": | ||
- do: | ||
indices.create: | ||
index: test-copy-to-index | ||
body: | ||
mappings: | ||
properties: | ||
inference_field: | ||
type: semantic_text | ||
inference_id: dense-inference-id | ||
source_field: | ||
type: text | ||
copy_to: inference_field | ||
|
||
- do: | ||
index: | ||
index: test-copy-to-index | ||
id: doc_1 | ||
body: | ||
source_field: "copy_to inference test" | ||
inference_field: "inference test" | ||
|
||
- do: | ||
get: | ||
index: test-copy-to-index | ||
id: doc_1 | ||
|
||
- match: { _source.inference_field: "inference test" } | ||
- length: {_source._inference.inference_field.chunks: 2} | ||
- exists: _source._inference.inference_field.chunks.0.inference | ||
- exists: _source._inference.inference_field.chunks.0.text | ||
- exists: _source._inference.inference_field.chunks.1.inference | ||
- exists: _source._inference.inference_field.chunks.1.text |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I had to take back this to protected so it can be overriden by our tests