Skip to content

Commit

Permalink
[8.x] Support semantic_text in object fields (elastic#114601) (elasti…
Browse files Browse the repository at this point in the history
…c#115040)

* Support semantic_text in object fields (elastic#114601)

* Fix build error

* Fix test build error
  • Loading branch information
Mikep86 authored Oct 17, 2024
1 parent 1d8c61f commit 310f67f
Show file tree
Hide file tree
Showing 7 changed files with 542 additions and 1 deletion.
6 changes: 6 additions & 0 deletions docs/changelog/114601.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 114601
summary: Support semantic_text in object fields
area: Vector Search
type: bug
issues:
- 114401
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,8 @@ public Set<NodeFeature> getFeatures() {
);
}

@Override
public Set<NodeFeature> getTestFeatures() {
return Set.of(SemanticTextFieldMapper.SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ private void applyInferenceResponses(BulkItemRequest item, FieldInferenceRespons
),
indexRequest.getContentType()
);
newDocMap.put(fieldName, result);
SemanticTextFieldMapper.insertValue(fieldName, newDocMap, result);
}
indexRequest.source(newDocMap, indexRequest.getContentType());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperBuilderContext;
import org.elasticsearch.index.mapper.MapperMergeContext;
import org.elasticsearch.index.mapper.MappingLookup;
import org.elasticsearch.index.mapper.NestedObjectMapper;
import org.elasticsearch.index.mapper.ObjectMapper;
import org.elasticsearch.index.mapper.SimpleMappedFieldType;
Expand Down Expand Up @@ -85,6 +86,7 @@
public class SemanticTextFieldMapper extends FieldMapper implements InferenceFieldMapper {
public static final NodeFeature SEMANTIC_TEXT_SEARCH_INFERENCE_ID = new NodeFeature("semantic_text.search_inference_id");
public static final NodeFeature SEMANTIC_TEXT_DEFAULT_ELSER_2 = new NodeFeature("semantic_text.default_elser_2");
public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix");

public static final String CONTENT_TYPE = "semantic_text";
public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID;
Expand Down Expand Up @@ -393,6 +395,25 @@ public Object getOriginalValue(Map<String, Object> sourceAsMap) {
return XContentMapValues.extractValue(TEXT_FIELD, fieldValueMap);
}

@Override
protected void doValidate(MappingLookup mappers) {
int parentPathIndex = fullPath().lastIndexOf(leafName());
if (parentPathIndex > 0) {
// Check that the parent object field allows subobjects.
// Subtract one from the parent path index to omit the trailing dot delimiter.
ObjectMapper parentMapper = mappers.objectMappers().get(fullPath().substring(0, parentPathIndex - 1));
if (parentMapper == null) {
throw new IllegalStateException(CONTENT_TYPE + " field [" + fullPath() + "] does not have a parent object mapper");
}

if (parentMapper.subobjects() == ObjectMapper.Subobjects.DISABLED) {
throw new IllegalArgumentException(
CONTENT_TYPE + " field [" + fullPath() + "] cannot be in an object field with subobjects disabled"
);
}
}
}

public static class SemanticTextFieldType extends SimpleMappedFieldType {
private final String inferenceId;
private final String searchInferenceId;
Expand Down Expand Up @@ -587,6 +608,116 @@ private String generateInvalidQueryInferenceResultsMessage(StringBuilder baseMes
}
}

/**
* <p>
* Insert or replace the path's value in the map with the provided new value. The map will be modified in-place.
* If the complete path does not exist in the map, it will be added to the deepest (sub-)map possible.
* </p>
* <p>
* For example, given the map:
* </p>
* <pre>
* {
* "path1": {
* "path2": {
* "key1": "value1"
* }
* }
* }
* </pre>
* <p>
* And the caller wanted to insert {@code "path1.path2.path3.key2": "value2"}, the method would emit the modified map:
* </p>
* <pre>
* {
* "path1": {
* "path2": {
* "key1": "value1",
* "path3.key2": "value2"
* }
* }
* }
* </pre>
*
* @param path the value's path in the map.
* @param map the map to search and modify in-place.
* @param newValue the new value to assign to the path.
*
* @throws IllegalArgumentException If either the path cannot be fully traversed or there is ambiguity about where to insert the new
* value.
*/
public static void insertValue(String path, Map<?, ?> map, Object newValue) {
String[] pathElements = path.split("\\.");
if (pathElements.length == 0) {
return;
}

List<SuffixMap> suffixMaps = extractSuffixMaps(pathElements, 0, map);
if (suffixMaps.isEmpty()) {
// This should never happen. Throw in case it does for some reason.
throw new IllegalStateException("extractSuffixMaps returned an empty suffix map list");
} else if (suffixMaps.size() == 1) {
SuffixMap suffixMap = suffixMaps.get(0);
suffixMap.map().put(suffixMap.suffix(), newValue);
} else {
throw new IllegalArgumentException(
"Path [" + path + "] could be inserted in " + suffixMaps.size() + " distinct ways, it is ambiguous which one to use"
);
}
}

private record SuffixMap(String suffix, Map<String, Object> map) {}

private static List<SuffixMap> extractSuffixMaps(String[] pathElements, int index, Object currentValue) {
if (currentValue instanceof List<?> valueList) {
List<SuffixMap> suffixMaps = new ArrayList<>(valueList.size());
for (Object o : valueList) {
suffixMaps.addAll(extractSuffixMaps(pathElements, index, o));
}

return suffixMaps;
} else if (currentValue instanceof Map<?, ?>) {
@SuppressWarnings("unchecked")
Map<String, Object> map = (Map<String, Object>) currentValue;
List<SuffixMap> suffixMaps = new ArrayList<>(map.size());

String key = pathElements[index];
while (index < pathElements.length) {
if (map.containsKey(key)) {
if (index + 1 == pathElements.length) {
// We found the complete path
suffixMaps.add(new SuffixMap(key, map));
} else {
// We've matched that path partially, keep traversing to try to match it fully
suffixMaps.addAll(extractSuffixMaps(pathElements, index + 1, map.get(key)));
}
}

if (++index < pathElements.length) {
key += "." + pathElements[index];
}
}

if (suffixMaps.isEmpty()) {
// We checked for all remaining elements in the path, and they do not exist. This means we found a leaf map that we should
// add the value to.
suffixMaps.add(new SuffixMap(key, map));
}

return suffixMaps;
} else {
throw new IllegalArgumentException(
"Path ["
+ String.join(".", Arrays.copyOfRange(pathElements, 0, index))
+ "] has value ["
+ currentValue
+ "] of type ["
+ currentValue.getClass().getSimpleName()
+ "], which cannot be traversed into further"
);
}
}

private static ObjectMapper createInferenceField(
MapperBuilderContext context,
IndexVersion indexVersionCreated,
Expand Down
Loading

0 comments on commit 310f67f

Please sign in to comment.