From df6911164265f5244098c81a54c526df4f296ca5 Mon Sep 17 00:00:00 2001 From: Arpan Chakraborty Date: Tue, 3 Sep 2024 15:19:07 +0100 Subject: [PATCH 1/5] feat(custom-highlight): added infra for custom highlighting in es query --- .../common/mappers/SearchFlagsInputMapper.java | 7 +++++++ .../src/main/resources/search.graphql | 10 ++++++++++ .../query/request/SearchRequestHandler.java | 14 ++++++++++++++ .../com/linkedin/metadata/query/SearchFlags.pdl | 10 ++++++++++ 4 files changed, 41 insertions(+) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java index e6b75f9482f59..10b5580a4d91e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java @@ -8,6 +8,7 @@ import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import com.linkedin.data.template.StringArray; /** * Maps GraphQL SearchFlags to Pegasus @@ -64,6 +65,12 @@ public com.linkedin.metadata.query.SearchFlags apply( .map(c -> GroupingCriterionInputMapper.map(context, c)) .collect(Collectors.toList())))); } + if (searchFlags.getCustomHighlighting() != null) { + result.setCustomHighlighting(searchFlags.getCustomHighlighting()); + } + if (searchFlags.getCustomHighlightingFields() != null) { + result.setCustomHighlightingFields(new StringArray(searchFlags.getCustomHighlightingFields())); + } return result; } } diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql index 09a7217073527..75baff1a3c6c1 100644 --- a/datahub-graphql-core/src/main/resources/search.graphql +++ b/datahub-graphql-core/src/main/resources/search.graphql @@ -162,6 +162,16 @@ input SearchFlags { Whether to include restricted entities """ includeRestricted: Boolean + + """ + Whether to enable custom highlighting + """ + customHighlighting: Boolean + + """ + fields to include for custom Highlighting + """ + customHighlightingFields: [String!] } """ diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java index 6e4210de6ef80..2d19bb56f7c5e 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java @@ -213,6 +213,20 @@ public SearchRequest getSearchRequest( if (Boolean.FALSE.equals(searchFlags.isSkipHighlighting())) { searchSourceBuilder.highlighter(highlights); } + if (Boolean.TRUE.equals(finalSearchFlags.isCustomHighlighting())) { + HighlightBuilder highlightBuilder = new HighlightBuilder(); + + // Don't set tags to get the original field value + highlightBuilder.preTags(""); + highlightBuilder.postTags(""); + + // Check for each field name and any subfields + finalSearchFlags.getCustomHighlightingFields().stream() + .flatMap(fieldName -> Stream.of(fieldName, fieldName + ".*")).distinct() + .forEach(highlightBuilder::field); + + searchSourceBuilder.highlighter(highlightBuilder); + } ESUtils.buildSortOrder(searchSourceBuilder, sortCriteria, entitySpecs); if (Boolean.TRUE.equals(searchFlags.isGetSuggestions())) { diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl index 355a8bb7a5cb3..1f23551eb2406 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl @@ -48,4 +48,14 @@ record SearchFlags { * include restricted entities in results (default is to filter) */ includeRestricted:optional boolean = false + + /** + * Whether to enable custom highlighting + */ + customHighlighting:optional boolean = false + + /** + * include mentioned fields inside elastich highlighting query + */ + customHighlightingFields:optional array[string] } From 1e93579986e82809d80878b4f4d72f3a6f997208 Mon Sep 17 00:00:00 2001 From: krekacsk Date: Mon, 9 Sep 2024 20:18:28 +0200 Subject: [PATCH 2/5] Modify custom highlighting to only include field names provided in the request --- .../mappers/SearchFlagsInputMapper.java | 3 -- .../src/main/resources/search.graphql | 5 --- .../query/request/SearchRequestHandler.java | 31 ++++++++++--------- .../linkedin/metadata/query/SearchFlags.pdl | 7 +---- 4 files changed, 18 insertions(+), 28 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java index 10b5580a4d91e..dea87c9a0583d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java @@ -65,9 +65,6 @@ public com.linkedin.metadata.query.SearchFlags apply( .map(c -> GroupingCriterionInputMapper.map(context, c)) .collect(Collectors.toList())))); } - if (searchFlags.getCustomHighlighting() != null) { - result.setCustomHighlighting(searchFlags.getCustomHighlighting()); - } if (searchFlags.getCustomHighlightingFields() != null) { result.setCustomHighlightingFields(new StringArray(searchFlags.getCustomHighlightingFields())); } diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql index 75baff1a3c6c1..50e7c3e812234 100644 --- a/datahub-graphql-core/src/main/resources/search.graphql +++ b/datahub-graphql-core/src/main/resources/search.graphql @@ -163,11 +163,6 @@ input SearchFlags { """ includeRestricted: Boolean - """ - Whether to enable custom highlighting - """ - customHighlighting: Boolean - """ fields to include for custom Highlighting """ diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java index 2d19bb56f7c5e..68f7ef05421ab 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java @@ -49,6 +49,7 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.collections.CollectionUtils; import org.opensearch.action.search.SearchRequest; import org.opensearch.action.search.SearchResponse; import org.opensearch.common.unit.TimeValue; @@ -211,22 +212,13 @@ public SearchRequest getSearchRequest( .forEach(searchSourceBuilder::aggregation); } if (Boolean.FALSE.equals(searchFlags.isSkipHighlighting())) { - searchSourceBuilder.highlighter(highlights); + if (CollectionUtils.isNotEmpty(searchFlags.getCustomHighlightingFields())) { + searchSourceBuilder.highlighter(getValidatedHighlighter(searchFlags.getCustomHighlightingFields())); + } else { + searchSourceBuilder.highlighter(highlights); + } } - if (Boolean.TRUE.equals(finalSearchFlags.isCustomHighlighting())) { - HighlightBuilder highlightBuilder = new HighlightBuilder(); - - // Don't set tags to get the original field value - highlightBuilder.preTags(""); - highlightBuilder.postTags(""); - // Check for each field name and any subfields - finalSearchFlags.getCustomHighlightingFields().stream() - .flatMap(fieldName -> Stream.of(fieldName, fieldName + ".*")).distinct() - .forEach(highlightBuilder::field); - - searchSourceBuilder.highlighter(highlightBuilder); - } ESUtils.buildSortOrder(searchSourceBuilder, sortCriteria, entitySpecs); if (Boolean.TRUE.equals(searchFlags.isGetSuggestions())) { @@ -570,4 +562,15 @@ private List extractSearchSuggestions(@Nonnull SearchResponse } return searchSuggestions; } + + private HighlightBuilder getValidatedHighlighter(Collection fieldsToHighlight) { + HighlightBuilder highlightBuilder = new HighlightBuilder(); + highlightBuilder.preTags(""); + highlightBuilder.postTags(""); + fieldsToHighlight.stream() + .filter(defaultQueryFieldNames::contains) + .flatMap(fieldName -> Stream.of(fieldName, fieldName + ".*")).distinct() + .forEach(highlightBuilder::field); + return highlightBuilder; + } } diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl index 1f23551eb2406..0561a9c6f7374 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl @@ -48,14 +48,9 @@ record SearchFlags { * include restricted entities in results (default is to filter) */ includeRestricted:optional boolean = false - - /** - * Whether to enable custom highlighting - */ - customHighlighting:optional boolean = false /** - * include mentioned fields inside elastich highlighting query + * Include mentioned fields inside elastic highlighting query */ customHighlightingFields:optional array[string] } From 37b99f275b316bac22b8036fd508cdb53d8660ab Mon Sep 17 00:00:00 2001 From: krekacsk Date: Mon, 9 Sep 2024 20:19:04 +0200 Subject: [PATCH 3/5] Add test for custom highlighting --- .../request/SearchRequestHandlerTest.java | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java index 1cd9a274463d3..0c523c9c51177 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java @@ -55,6 +55,9 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests { private OperationContext operationContext; public static SearchConfiguration testQueryConfig; + public static List validHighlightingFields = List.of("urn", "foreignKey"); + public static StringArray customHighlightFields = new StringArray( + List.of(validHighlightingFields.get(0), validHighlightingFields.get(1), "notExistingField", "")); static { testQueryConfig = new SearchConfiguration(); @@ -102,6 +105,31 @@ public void testDatasetFieldsAndHighlights() { "unexpected lineage fields in highlights: " + highlightFields); } + @Test + public void testCustomHighlights() { + EntitySpec entitySpec = operationContext.getEntityRegistry().getEntitySpec("dataset"); + SearchRequestHandler requestHandler = + SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); + SearchRequest searchRequest = + requestHandler.getSearchRequest( + operationContext.withSearchFlags( + flags -> flags.setFulltext(false).setCustomHighlightingFields(customHighlightFields)), + "testQuery", + null, + null, + 0, + 10, + null); + SearchSourceBuilder sourceBuilder = searchRequest.source(); + assertNotNull(sourceBuilder.highlighter()); + assertEquals(4, sourceBuilder.highlighter().fields().size()); + assertTrue(sourceBuilder.highlighter().fields() + .stream() + .map(HighlightBuilder.Field::name) + .toList() + .containsAll(validHighlightingFields)); + } + @Test public void testSearchRequestHandlerHighlightingTurnedOff() { SearchRequestHandler requestHandler = From 293440ef4035ed2dace034132ec077302f568f9a Mon Sep 17 00:00:00 2001 From: krekacsk Date: Tue, 10 Sep 2024 17:40:23 +0200 Subject: [PATCH 4/5] SpotlessApply linting changes --- .../mappers/SearchFlagsInputMapper.java | 5 +-- .../query/request/SearchRequestHandler.java | 10 +++--- .../request/SearchRequestHandlerTest.java | 34 +++++++++++-------- 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java index dea87c9a0583d..9f5025ccf303a 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.graphql.types.common.mappers; +import com.linkedin.data.template.StringArray; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.generated.SearchFlags; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; @@ -8,7 +9,6 @@ import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; -import com.linkedin.data.template.StringArray; /** * Maps GraphQL SearchFlags to Pegasus @@ -66,7 +66,8 @@ public com.linkedin.metadata.query.SearchFlags apply( .collect(Collectors.toList())))); } if (searchFlags.getCustomHighlightingFields() != null) { - result.setCustomHighlightingFields(new StringArray(searchFlags.getCustomHighlightingFields())); + result.setCustomHighlightingFields( + new StringArray(searchFlags.getCustomHighlightingFields())); } return result; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java index 68f7ef05421ab..163bae64169d5 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java @@ -213,7 +213,8 @@ public SearchRequest getSearchRequest( } if (Boolean.FALSE.equals(searchFlags.isSkipHighlighting())) { if (CollectionUtils.isNotEmpty(searchFlags.getCustomHighlightingFields())) { - searchSourceBuilder.highlighter(getValidatedHighlighter(searchFlags.getCustomHighlightingFields())); + searchSourceBuilder.highlighter( + getValidatedHighlighter(searchFlags.getCustomHighlightingFields())); } else { searchSourceBuilder.highlighter(highlights); } @@ -568,9 +569,10 @@ private HighlightBuilder getValidatedHighlighter(Collection fieldsToHigh highlightBuilder.preTags(""); highlightBuilder.postTags(""); fieldsToHighlight.stream() - .filter(defaultQueryFieldNames::contains) - .flatMap(fieldName -> Stream.of(fieldName, fieldName + ".*")).distinct() - .forEach(highlightBuilder::field); + .filter(defaultQueryFieldNames::contains) + .flatMap(fieldName -> Stream.of(fieldName, fieldName + ".*")) + .distinct() + .forEach(highlightBuilder::field); return highlightBuilder; } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java index 0c523c9c51177..2f7120e1f0b5e 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java @@ -56,8 +56,13 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests { public static SearchConfiguration testQueryConfig; public static List validHighlightingFields = List.of("urn", "foreignKey"); - public static StringArray customHighlightFields = new StringArray( - List.of(validHighlightingFields.get(0), validHighlightingFields.get(1), "notExistingField", "")); + public static StringArray customHighlightFields = + new StringArray( + List.of( + validHighlightingFields.get(0), + validHighlightingFields.get(1), + "notExistingField", + "")); static { testQueryConfig = new SearchConfiguration(); @@ -109,22 +114,23 @@ public void testDatasetFieldsAndHighlights() { public void testCustomHighlights() { EntitySpec entitySpec = operationContext.getEntityRegistry().getEntitySpec("dataset"); SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); + SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); SearchRequest searchRequest = - requestHandler.getSearchRequest( - operationContext.withSearchFlags( - flags -> flags.setFulltext(false).setCustomHighlightingFields(customHighlightFields)), - "testQuery", - null, - null, - 0, - 10, - null); + requestHandler.getSearchRequest( + operationContext.withSearchFlags( + flags -> + flags.setFulltext(false).setCustomHighlightingFields(customHighlightFields)), + "testQuery", + null, + null, + 0, + 10, + null); SearchSourceBuilder sourceBuilder = searchRequest.source(); assertNotNull(sourceBuilder.highlighter()); assertEquals(4, sourceBuilder.highlighter().fields().size()); - assertTrue(sourceBuilder.highlighter().fields() - .stream() + assertTrue( + sourceBuilder.highlighter().fields().stream() .map(HighlightBuilder.Field::name) .toList() .containsAll(validHighlightingFields)); From 16c4c72e378cc246b6d37cb09d8219076a93b8aa Mon Sep 17 00:00:00 2001 From: krekacsk Date: Wed, 11 Sep 2024 09:18:23 +0200 Subject: [PATCH 5/5] Add snapshot files --- .../snapshot/com.linkedin.entity.aspects.snapshot.json | 2 +- .../com.linkedin.entity.entities.snapshot.json | 10 +++++++++- .../snapshot/com.linkedin.entity.runs.snapshot.json | 2 +- .../com.linkedin.operations.operations.snapshot.json | 2 +- .../com.linkedin.platform.platform.snapshot.json | 2 +- 5 files changed, 13 insertions(+), 5 deletions(-) diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json index c85e57a35eac7..3688311b1f234 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json @@ -3180,7 +3180,7 @@ }, { "name" : "isPartitioningKey", "type" : "boolean", - "doc" : "For Datasets which are partitioned, this determines the partitioning key.", + "doc" : "For Datasets which are partitioned, this determines the partitioning key.\nNote that multiple columns can be part of a partitioning key, but currently we do not support\nrendering the ordered partitioning key.", "optional" : true }, { "name" : "jsonProps", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index 5b6f7a290fd1a..edbb847a43408 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -3572,7 +3572,7 @@ }, { "name" : "isPartitioningKey", "type" : "boolean", - "doc" : "For Datasets which are partitioned, this determines the partitioning key.", + "doc" : "For Datasets which are partitioned, this determines the partitioning key.\nNote that multiple columns can be part of a partitioning key, but currently we do not support\nrendering the ordered partitioning key.", "optional" : true }, { "name" : "jsonProps", @@ -6032,6 +6032,14 @@ "doc" : "include restricted entities in results (default is to filter)", "default" : false, "optional" : true + }, { + "name" : "customHighlightingFields", + "type" : { + "type" : "array", + "items" : "string" + }, + "doc" : "Include mentioned fields inside elastic highlighting query", + "optional" : true } ] }, { "type" : "enum", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json index dc226adf635c4..16549757a961f 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json @@ -2908,7 +2908,7 @@ }, { "name" : "isPartitioningKey", "type" : "boolean", - "doc" : "For Datasets which are partitioned, this determines the partitioning key.", + "doc" : "For Datasets which are partitioned, this determines the partitioning key.\nNote that multiple columns can be part of a partitioning key, but currently we do not support\nrendering the ordered partitioning key.", "optional" : true }, { "name" : "jsonProps", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json index 7bc02ce2017fd..95df1d2ce21d9 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json @@ -2902,7 +2902,7 @@ }, { "name" : "isPartitioningKey", "type" : "boolean", - "doc" : "For Datasets which are partitioned, this determines the partitioning key.", + "doc" : "For Datasets which are partitioned, this determines the partitioning key.\nNote that multiple columns can be part of a partitioning key, but currently we do not support\nrendering the ordered partitioning key.", "optional" : true }, { "name" : "jsonProps", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json index 3f64d1b948035..3d16550db1e0f 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json @@ -3566,7 +3566,7 @@ }, { "name" : "isPartitioningKey", "type" : "boolean", - "doc" : "For Datasets which are partitioned, this determines the partitioning key.", + "doc" : "For Datasets which are partitioned, this determines the partitioning key.\nNote that multiple columns can be part of a partitioning key, but currently we do not support\nrendering the ordered partitioning key.", "optional" : true }, { "name" : "jsonProps",