From 8b32e9467ab8c68ed4e07519e1f76024661c41dd Mon Sep 17 00:00:00 2001 From: Paul Butcher Date: Tue, 1 Aug 2023 11:37:46 +0100 Subject: [PATCH] Define the main Works search query in plain Json (#682) * start gherkining the facets * start filling out the faceting features * Apply auto-formatting rules * messing about with portability of the features file * only return populated buckets * finish new faceting feature tests for works. * add todo about order * a bit more faceting test finessing * WorkFacet tests pass (todo: spread the filtering business to other fields) * aggregate properly on language * workType filtering and aggregation * subjects and contributors * availabilities * tidy up bucket matching * improve filter test output * license works properly * skip tests that are not yet ready * filters now only return the filtered value in aggregations * remove AnyFunSpec from base class * remove AnyFunSpec from base class * commit missed file * Do not return unwanted empty buckets * Do not return unwanted empty buckets * extend no-empties rule to images * avoid naming collision * new faceting paradigm * enforce order in aggregations * remove unnecessary ignored scenarios * fix test to correspond ot actual filtering * fix low-level tests that expected the old-style query * fix the "all other" test * improve commentary * tidying * revert irrelevant change * improve commentary * revert irrelevant change * revert irrelevant change * Fix paired aggregation behaviour * add test to ensure all filtered values are returned * improve safety * include parens in a test * fix sorting even more * improve commentary * better rule name * Karl and Jake * remove redundant test * start adding Image faceting * finish adding Image faceting tests * better naming, as per review * some fiddling to get round E4S Template limitations * add example to work towards * Apply auto-formatting rules * Inline templates work, now to put all the other bits in * still more messing about * don't query if no query term * Fix paired aggregation behaviour (#676) * Fix paired aggregation behaviour * improve safety * fix sorting even more * improve commentary * Karl and Jake * remove redundant test * It Works! * Apply auto-formatting rules * tidy whitespace * Apply auto-formatting rules * don't use E4s Indexes * Apply auto-formatting rules * tidying * Apply auto-formatting rules * tidy * Apply auto-formatting rules * more tidying * Apply auto-formatting rules * autoformat * Apply auto-formatting rules * remove redundant note * match ImageFilter search to main * Apply auto-formatting rules * minimise diff * Apply auto-formatting rules * switch off intellij autoformat and try again * Apply auto-formatting rules * bit more tidying * better template params * improve commentary * Update search/src/main/scala/weco/api/search/services/WorksRequestBuilder.scala Co-authored-by: Jamie Parkinson * improve commentary --------- Co-authored-by: Buildkite on behalf of Wellcome Collection Co-authored-by: Jamie Parkinson --- .../WorksMultiMatcherQueryTemplate.json | 149 +++++++++++++++ .../scala/weco/api/search/SearchApi.scala | 11 +- .../elasticsearch/ElasticsearchService.scala | 31 ++- .../elasticsearch/WorksMultiMatcher.scala | 180 ------------------ .../TemplateSearchHandlers.scala | 59 ++++++ .../TemplateSearchRequest.scala | 9 + .../ElasticsearchRequestBuilder.scala | 8 +- .../weco/api/search/services/Encoders.scala | 38 ++++ .../services/ImagesRequestBuilder.scala | 43 +++-- .../api/search/services/SearchService.scala | 13 +- .../services/SearchTemplateParams.scala | 16 ++ .../services/TemplateSearchBuilder.scala | 66 +++++++ .../search/services/WorksRequestBuilder.scala | 81 ++++---- .../services/WorksTemplateSearchBuilder.scala | 12 ++ .../elasticsearch/SearchQueryJsonTest.scala | 19 +- 15 files changed, 468 insertions(+), 267 deletions(-) create mode 100644 search/src/main/resources/WorksMultiMatcherQueryTemplate.json delete mode 100644 search/src/main/scala/weco/api/search/elasticsearch/WorksMultiMatcher.scala create mode 100644 search/src/main/scala/weco/api/search/elasticsearch/templateSearch/TemplateSearchHandlers.scala create mode 100644 search/src/main/scala/weco/api/search/elasticsearch/templateSearch/TemplateSearchRequest.scala create mode 100644 search/src/main/scala/weco/api/search/services/Encoders.scala create mode 100644 search/src/main/scala/weco/api/search/services/SearchTemplateParams.scala create mode 100644 search/src/main/scala/weco/api/search/services/TemplateSearchBuilder.scala create mode 100644 search/src/main/scala/weco/api/search/services/WorksTemplateSearchBuilder.scala diff --git a/search/src/main/resources/WorksMultiMatcherQueryTemplate.json b/search/src/main/resources/WorksMultiMatcherQueryTemplate.json new file mode 100644 index 0000000000..e6d89c65c4 --- /dev/null +++ b/search/src/main/resources/WorksMultiMatcherQueryTemplate.json @@ -0,0 +1,149 @@ +{ + "bool": { + "should": [ + { + "span_first": { + "match": { + "span_term": { + "query.title.shingles": "{{query}}" + } + }, + "end": 1, + "boost": 1000.0, + "_name": "start of title" + } + }, + { + "multi_match": { + "query": "{{query}}", + "fields": [ + "query.id^1000.0", + "query.identifiers.value^1000.0", + "query.items.id^1000.0", + "query.items.identifiers.value^1000.0", + "query.images.id^1000.0", + "query.images.identifiers.value^1000.0", + "query.referenceNumber^1000.0", + "query.allIdentifiers^1000.0" + ], + "type": "best_fields", + "analyzer": "whitespace_analyzer", + "operator": "Or", + "_name": "identifiers" + } + }, + { + "dis_max": { + "queries": [ + { + "multi_match": { + "query": "{{query}}", + "fields": [ + "query.titlesAndContributors^100.0", + "query.titlesAndContributors.english^100.0", + "query.titlesAndContributors.shingles^100.0" + ], + "type": "best_fields", + "minimum_should_match": "-30%", + "operator": "Or", + "_name": "title and contributor exact spellings" + } + }, + { + "multi_match": { + "query": "{{query}}", + "fields": [ + "query.titlesAndContributors.arabic", + "query.titlesAndContributors.bengali", + "query.titlesAndContributors.french", + "query.titlesAndContributors.german", + "query.titlesAndContributors.hindi", + "query.titlesAndContributors.italian" + ], + "type": "best_fields", + "minimum_should_match": "-30%", + "operator": "Or", + "_name": "non-english titles and contributors" + } + } + ] + } + }, + { + "bool": { + "must": [ + { + "multi_match": { + "query": "{{query}}", + "fields": [ + "query.collectionPath.path.clean", + "query.collectionPath.label.cleanPath", + "query.collectionPath.label", + "query.collectionPath.path.keyword" + ], + "operator": "Or", + "_name": "relations paths" + } + } + ], + "should": [ + { + "multi_match": { + "query": "{{query}}", + "fields": ["query.title^100.0", "query.description^10.0"], + "type": "cross_fields", + "operator": "Or", + "_name": "relations text" + } + } + ] + } + }, + { + "multi_match": { + "query": "{{query}}", + "fields": [ + "query.contributors.agent.label^1000.0", + "query.subjects.concepts.label^10.0", + "query.genres.concepts.label^10.0", + "query.production.label^10.0", + "query.description", + "query.physicalDescription", + "query.languages.label", + "query.edition", + "query.notes.contents", + "query.lettering" + ], + "type": "cross_fields", + "minimum_should_match": "-30%", + "operator": "Or", + "_name": "data" + } + }, + { + "multi_match": { + "query": "{{query}}", + "fields": [ + "query.title.shingles_cased^1000.0", + "query.alternativeTitles.shingles_cased^100.0", + "query.partOf.title.shingles_cased^10.0" + ], + "type": "most_fields", + "minimum_should_match": "-30%", + "operator": "Or", + "_name": "shingles cased" + } + } + ], + "filter": [ + { + "term": { + "type": { + "value": "Visible" + } + } + } + ], + "minimum_should_match": "1" + } +} diff --git a/search/src/main/scala/weco/api/search/SearchApi.scala b/search/src/main/scala/weco/api/search/SearchApi.scala index 81c1ce0247..a6f29883c1 100644 --- a/search/src/main/scala/weco/api/search/SearchApi.scala +++ b/search/src/main/scala/weco/api/search/SearchApi.scala @@ -13,14 +13,10 @@ import akka.http.scaladsl.server.{ ValidationRejection } import com.sksamuel.elastic4s.ElasticClient -import com.sksamuel.elastic4s.ElasticDsl._ -import weco.api.search.elasticsearch.{ - ElasticsearchService, - ImagesMultiMatcher, - WorksMultiMatcher -} +import weco.api.search.elasticsearch.{ElasticsearchService, ImagesMultiMatcher} import weco.api.search.models._ import weco.api.search.rest._ +import weco.api.search.services.WorksTemplateSearchBuilder import weco.catalogue.display_model.rest.IdentifierDirectives import weco.http.models.DisplayError @@ -142,8 +138,7 @@ class SearchApi( val worksSearchTemplate = SearchTemplate( "multi_matcher_search_query", elasticConfig.worksIndex.name, - WorksMultiMatcher("{{query}}") - .filter(termQuery(field = "type", value = "Visible")) + WorksTemplateSearchBuilder.queryTemplate ) val imageSearchTemplate = SearchTemplate( diff --git a/search/src/main/scala/weco/api/search/elasticsearch/ElasticsearchService.scala b/search/src/main/scala/weco/api/search/elasticsearch/ElasticsearchService.scala index 82cabbe59b..1dc5b64c51 100644 --- a/search/src/main/scala/weco/api/search/elasticsearch/ElasticsearchService.scala +++ b/search/src/main/scala/weco/api/search/elasticsearch/ElasticsearchService.scala @@ -12,6 +12,10 @@ import com.sksamuel.elastic4s.{ElasticClient, Hit, Index, Response} import grizzled.slf4j.Logging import io.circe.Decoder import weco.Tracing +import weco.api.search.elasticsearch.templateSearch.{ + TemplateSearchHandlers, + TemplateSearchRequest +} import scala.concurrent.{ExecutionContext, Future} import scala.util.{Failure, Success} @@ -19,7 +23,8 @@ import scala.util.{Failure, Success} class ElasticsearchService(elasticClient: ElasticClient)( implicit ec: ExecutionContext ) extends Logging - with Tracing { + with Tracing + with TemplateSearchHandlers { def findById[T](id: String)( index: Index @@ -91,6 +96,30 @@ class ElasticsearchService(elasticClient: ElasticClient)( } } + def executeTemplateSearchRequest( + request: TemplateSearchRequest + ): Future[Either[ElasticsearchError, SearchResponse]] = + spanFuture( + name = "ElasticSearch#executeSearchRequest", + spanType = "request", + subType = "elastic", + action = "query" + ) { + debug(s"Sending ES request: ${request.show}") + val transaction = Tracing.currentTransaction + withActiveTrace(elasticClient.execute(request)) + .map(_.toEither) + .map { + case Right(response) => + transaction.setLabel("elasticTook", response.took) + Right(response) + + case Left(err) => + warn(s"Error while making request=${request.show}, error=$err") + Left(ElasticsearchError(err)) + } + } + def executeMultiSearchRequest( request: MultiSearchRequest ): Future[Seq[Either[ElasticsearchError, SearchResponse]]] = diff --git a/search/src/main/scala/weco/api/search/elasticsearch/WorksMultiMatcher.scala b/search/src/main/scala/weco/api/search/elasticsearch/WorksMultiMatcher.scala deleted file mode 100644 index 2bcf063dba..0000000000 --- a/search/src/main/scala/weco/api/search/elasticsearch/WorksMultiMatcher.scala +++ /dev/null @@ -1,180 +0,0 @@ -package weco.api.search.elasticsearch - -import com.sksamuel.elastic4s.ElasticDsl._ -import com.sksamuel.elastic4s.requests.common.Operator.OR -import com.sksamuel.elastic4s.requests.searches.queries.compound.BoolQuery -import com.sksamuel.elastic4s.requests.searches.queries.matches.MultiMatchQueryBuilderType.{ - BEST_FIELDS, - CROSS_FIELDS, - MOST_FIELDS -} -import com.sksamuel.elastic4s.requests.searches.queries.matches.{ - FieldWithOptionalBoost, - MultiMatchQuery -} -import com.sksamuel.elastic4s.requests.searches.span.{ - SpanFirstQuery, - SpanTermQuery -} - -case object WorksMultiMatcher { - val titleFields = Seq( - "query.titlesAndContributors", - "query.titlesAndContributors.english", - "query.titlesAndContributors.shingles" - ) - - def fieldsWithBoost( - boost: Int, - fields: Seq[String] - ): Seq[FieldWithOptionalBoost] = - fields.map(FieldWithBoost(_, boost)) - - def apply(q: String): BoolQuery = - boolQuery() - .should( - // This prioritises exact matches at the start of titles. - // - // e.g. if we had three works - // - // Human genetic information : science, law, and ethics - // International journal of law and information technology - // Information law : compliance for librarians and information professionals - // - // and somebody searches for "Information law", all other things being equal, - // we want to prioritise the third result. - // - // See https://github.com/wellcomecollection/catalogue-api/issues/466 - SpanFirstQuery( - SpanTermQuery( - field = "query.title.shingles", - value = q - ), - boost = Some(1000), - queryName = Some("start of title"), - end = 1 - ), - MultiMatchQuery( - q, - queryName = Some("identifiers"), - `type` = Some(BEST_FIELDS), - operator = Some(OR), - analyzer = Some("whitespace_analyzer"), - fields = fieldsWithBoost( - boost = 1000, - fields = Seq( - "query.id", - "query.identifiers.value", - "query.items.id", - "query.items.identifiers.value", - "query.images.id", - "query.images.identifiers.value", - "query.referenceNumber", - // TODO: Do we need to be querying this field at this point? - // Could we delete it and query the individual fields instead? - "query.allIdentifiers" - ) - ) - ), - /** - * This is the different ways we can match on the title fields - * - title exact spellings: Exact spellings as they have been catalogued - * - title alternative spellings: Alternative spellings which people might search for e.g. in transliterations - */ - dismax( - queries = Seq( - MultiMatchQuery( - q, - queryName = Some("title and contributor exact spellings"), - fields = fieldsWithBoost( - boost = 100, - fields = titleFields - ), - `type` = Some(BEST_FIELDS), - operator = Some(OR) - ).minimumShouldMatch("-30%"), - MultiMatchQuery( - q, - queryName = Some("non-english titles and contributors"), - fields = List( - "arabic", - "bengali", - "french", - "german", - "hindi", - "italian" - ).map( - language => - FieldWithoutBoost(s"query.titlesAndContributors.$language") - ), - `type` = Some(BEST_FIELDS), - operator = Some(OR) - ).minimumShouldMatch("-30%") - ) - ), - bool( - shouldQueries = List( - MultiMatchQuery( - q, - queryName = Some("relations text"), - `type` = Some(CROSS_FIELDS), - operator = Some(OR), - fields = Seq( - FieldWithBoost("query.title", boost = 100), - FieldWithBoost("query.description", boost = 10) - ) - ) - ), - mustQueries = List( - MultiMatchQuery( - q, - queryName = Some("relations paths"), - operator = Some(OR), - fields = Seq( - FieldWithoutBoost("query.collectionPath.path.clean"), - FieldWithoutBoost("query.collectionPath.label.cleanPath"), - FieldWithoutBoost("query.collectionPath.label"), - FieldWithoutBoost("query.collectionPath.path.keyword") - ) - ) - ), - notQueries = Nil - ), - MultiMatchQuery( - q, - queryName = Some("data"), - `type` = Some(CROSS_FIELDS), - operator = Some(OR), - fields = Seq( - (Some(1000), "query.contributors.agent.label"), - (Some(10), "query.subjects.concepts.label"), - (Some(10), "query.genres.concepts.label"), - (Some(10), "query.production.label"), - (None, "query.description"), - (None, "query.physicalDescription"), - (None, "query.languages.label"), - (None, "query.edition"), - (None, "query.notes.contents"), - (None, "query.lettering") - ).map { - case (boost, field) => - FieldWithOptionalBoost(field, boost.map(_.toDouble)) - } - ).minimumShouldMatch("-30%"), - MultiMatchQuery( - q, - queryName = Some("shingles cased"), - `type` = Some(MOST_FIELDS), - operator = Some(OR), - fields = Seq( - (Some(1000), "query.title.shingles_cased"), - (Some(100), "query.alternativeTitles.shingles_cased"), - (Some(10), "query.partOf.title.shingles_cased") - ).map { - case (boost, field) => - FieldWithOptionalBoost(field, boost.map(_.toDouble)) - } - ).minimumShouldMatch("-30%") - ) - .minimumShouldMatch(1) -} diff --git a/search/src/main/scala/weco/api/search/elasticsearch/templateSearch/TemplateSearchHandlers.scala b/search/src/main/scala/weco/api/search/elasticsearch/templateSearch/TemplateSearchHandlers.scala new file mode 100644 index 0000000000..b9bbca1013 --- /dev/null +++ b/search/src/main/scala/weco/api/search/elasticsearch/templateSearch/TemplateSearchHandlers.scala @@ -0,0 +1,59 @@ +package weco.api.search.elasticsearch.templateSearch + +import com.sksamuel.elastic4s.{ + ElasticRequest, + ElasticUrlEncoder, + Handler, + HttpEntity +} +import com.sksamuel.elastic4s.requests.searches.SearchResponse +import io.circe.Json +import io.circe.syntax.EncoderOps + +/** + * Handler used by E4S ElasticClient.execute to turn a + * request (the E4S-style description of what we want the request to contain) + * into an ElasticRequest (the method/url/body combination that will do that) + * + * The template search implementation built in to E4S does not currently support + * sending the template in the source attribute. It only works with templates that + * have been stored on the cluster. + * + * We do not want to permit the API to make cluster changes, and although (in the future) + * we may wish to configure the cluster at deploy time to contain the template, + * it is currently a step too far in terms of maintenance and ownership of the queries. + */ +trait TemplateSearchHandlers { + implicit object TemplateSearchHandler + extends Handler[TemplateSearchRequest, SearchResponse] { + + private def endpoint(indexes: Seq[String]): String = + indexes match { + case Nil => "/_all/_search/template" + case _ => + "/" + indexes + .map(ElasticUrlEncoder.encodeUrlFragment) + .mkString(",") + "/_search/template" + } + + override def build(request: TemplateSearchRequest): ElasticRequest = { + val body = Json + .obj( + // The source is a string containing an Elasticsearch-flavoured Mustache template. + // (See: https://www.elastic.co/guide/en/elasticsearch/reference/8.9/search-template-with-mustache-examples.html) + // It may look like Json at a glance, and most of it is, but as a whole thing, it is not, + // so must be sent as a String containing escaped Json content. + "source" -> request.source.asJson, + "params" -> request.params + ) + .noSpaces + + ElasticRequest( + method = "POST", + endpoint = endpoint(request.indexes), + body = HttpEntity(body, "application/json") + ) + } + } + +} diff --git a/search/src/main/scala/weco/api/search/elasticsearch/templateSearch/TemplateSearchRequest.scala b/search/src/main/scala/weco/api/search/elasticsearch/templateSearch/TemplateSearchRequest.scala new file mode 100644 index 0000000000..a9ac5df750 --- /dev/null +++ b/search/src/main/scala/weco/api/search/elasticsearch/templateSearch/TemplateSearchRequest.scala @@ -0,0 +1,9 @@ +package weco.api.search.elasticsearch.templateSearch + +import io.circe.Json + +case class TemplateSearchRequest( + indexes: Seq[String], + source: String, + params: Json +) diff --git a/search/src/main/scala/weco/api/search/services/ElasticsearchRequestBuilder.scala b/search/src/main/scala/weco/api/search/services/ElasticsearchRequestBuilder.scala index 1eb964d5c6..0404d3f37b 100644 --- a/search/src/main/scala/weco/api/search/services/ElasticsearchRequestBuilder.scala +++ b/search/src/main/scala/weco/api/search/services/ElasticsearchRequestBuilder.scala @@ -5,12 +5,18 @@ import com.sksamuel.elastic4s.requests.searches.sort.FieldSort import com.sksamuel.elastic4s.Index import com.sksamuel.elastic4s.requests.searches.queries.Query import com.sksamuel.elastic4s.ElasticDsl._ +import weco.api.search.elasticsearch.templateSearch.TemplateSearchRequest import weco.api.search.models.SearchOptions trait ElasticsearchRequestBuilder[S <: SearchOptions[_, _, _]] { val idSort: FieldSort - def request(searchOptions: S, index: Index): SearchRequest + // return Either because Images and countWorkTypes still use the old way. + // Eventually, this should only return a TemplateSearchRequest. + def request( + searchOptions: S, + index: Index + ): Either[SearchRequest, TemplateSearchRequest] } object ElasticsearchRequestBuilder { diff --git a/search/src/main/scala/weco/api/search/services/Encoders.scala b/search/src/main/scala/weco/api/search/services/Encoders.scala new file mode 100644 index 0000000000..1ee3505a66 --- /dev/null +++ b/search/src/main/scala/weco/api/search/services/Encoders.scala @@ -0,0 +1,38 @@ +package weco.api.search.services + +import com.sksamuel.elastic4s.handlers.searches.queries.QueryBuilderFn +import com.sksamuel.elastic4s.json.JacksonBuilder +import com.sksamuel.elastic4s.requests.searches.aggs.{ + AbstractAggregation, + AggregationBuilderFn +} +import com.sksamuel.elastic4s.requests.searches.queries.Query +import io.circe.{parser, Encoder, Json} +import io.circe.syntax.EncoderOps +import weco.api.search.models.request.SortingOrder + +trait Encoders { + // Encoders for turning E4S-style data into Circe JSON. + // Currently, this simply uses E4S to generate the JSON as a string + // and parse that back into JSON. + // When we eventually eradicate E4S, these will have to create the JSON + // directly + + implicit val aggregationEncoder: Encoder[AbstractAggregation] = agg => + parser + .parse(JacksonBuilder.writeAsString(AggregationBuilderFn(agg).value)) + .getOrElse(Json.obj()) + + implicit val aggregationsEncoder: Encoder[Seq[AbstractAggregation]] = + aggs => aggs.map(agg => agg.name -> agg.asJson).toMap.asJson + + implicit val queryEncoder: Encoder[Query] = query => + parser + .parse(JacksonBuilder.writeAsString(QueryBuilderFn(query).value)) + .getOrElse(Json.obj()) + + implicit val sortOrderEncoder: Encoder[SortingOrder] = { + case SortingOrder.Ascending => "asc".asJson + case SortingOrder.Descending => "desc".asJson + } +} diff --git a/search/src/main/scala/weco/api/search/services/ImagesRequestBuilder.scala b/search/src/main/scala/weco/api/search/services/ImagesRequestBuilder.scala index d18e9a2d09..8572ebb0f7 100644 --- a/search/src/main/scala/weco/api/search/services/ImagesRequestBuilder.scala +++ b/search/src/main/scala/weco/api/search/services/ImagesRequestBuilder.scala @@ -36,29 +36,34 @@ class ImagesRequestBuilder(queryConfig: QueryConfig) binMinima = queryConfig.paletteBinMinima ) - def request(searchOptions: ImageSearchOptions, index: Index): SearchRequest = - search(index) - .aggs { filteredAggregationBuilder(searchOptions).filteredAggregations } - .query(searchQuery(searchOptions)) - .sortBy { sortBy(searchOptions) } - .limit(searchOptions.pageSize) - .from(PaginationQuery.safeGetFrom(searchOptions)) - .sourceInclude( - "display", - // we do KNN searches for similar images, and for that we need - // to send the image's vectors to Elasticsearch - "query.inferredData.reducedFeatures" - ) - .postFilter { - must(buildImageFilterQuery(searchOptions.filters)) - } + def request( + searchOptions: ImageSearchOptions, + index: Index + ): Left[SearchRequest, Nothing] = + Left( + search(index) + .aggs { filteredAggregationBuilder(searchOptions).filteredAggregations } + .query(searchQuery(searchOptions)) + .sortBy { sortBy(searchOptions) } + .limit(searchOptions.pageSize) + .from(PaginationQuery.safeGetFrom(searchOptions)) + .sourceInclude( + "display", + // we do KNN searches for similar images, and for that we need + // to send the image's vectors to Elasticsearch + "query.inferredData.reducedFeatures" + ) + .postFilter { + must(buildImageFilterQuery(searchOptions.filters)) + } + ) private def filteredAggregationBuilder(searchOptions: ImageSearchOptions) = new ImageFiltersAndAggregationsBuilder( aggregationRequests = searchOptions.aggregations, filters = searchOptions.filters, requestToAggregation = toAggregation, - filterToQuery = buildImageFilterQuery, + filterToQuery = buildImageFilterQuery ) private def searchQuery(searchOptions: ImageSearchOptions): BoolQuery = @@ -111,7 +116,9 @@ class ImagesRequestBuilder(queryConfig: QueryConfig) case ProductionDateSortRequest => "query.source.production.dates.range.from" } - .map { FieldSort(_).order(sortOrder) } + .map { + FieldSort(_).order(sortOrder) + } private def sortOrder(implicit searchOptions: ImageSearchOptions) = searchOptions.sortOrder match { diff --git a/search/src/main/scala/weco/api/search/services/SearchService.scala b/search/src/main/scala/weco/api/search/services/SearchService.scala index 9d9f068580..5c4adeea09 100644 --- a/search/src/main/scala/weco/api/search/services/SearchService.scala +++ b/search/src/main/scala/weco/api/search/services/SearchService.scala @@ -61,11 +61,18 @@ trait SearchService[T, VisibleT, Aggs, S <: SearchOptions[_, _, _]] { searchOptions: S, index: Index ): Future[Either[ElasticsearchError, SearchResponse]] = { - val searchRequest = requestBuilder + val request = requestBuilder .request(searchOptions, index) - .trackTotalHits(true) Tracing.currentTransaction.addQueryOptionLabels(searchOptions) - elasticsearchService.executeSearchRequest(searchRequest) + // This offers a choice between the two options Images and countWorkTypes + // still use the old way. + // Eventually, this should only return a TemplateSearchRequest. + request match { + case Left(search) => elasticsearchService.executeSearchRequest(search) + case Right(template) => + elasticsearchService.executeTemplateSearchRequest(template) + } + } implicit class EnhancedTransaction(transaction: Transaction) { diff --git a/search/src/main/scala/weco/api/search/services/SearchTemplateParams.scala b/search/src/main/scala/weco/api/search/services/SearchTemplateParams.scala new file mode 100644 index 0000000000..64ff7a7350 --- /dev/null +++ b/search/src/main/scala/weco/api/search/services/SearchTemplateParams.scala @@ -0,0 +1,16 @@ +package weco.api.search.services + +import com.sksamuel.elastic4s.requests.searches.aggs.AbstractAggregation +import com.sksamuel.elastic4s.requests.searches.queries.Query +import weco.api.search.models.request.SortingOrder + +case class SearchTemplateParams( + query: Option[String], + from: Int, + size: Int, + sortByDate: Option[SortingOrder], + sortByScore: Boolean, + includes: Seq[String], + aggs: Seq[AbstractAggregation], + postFilter: Option[Query] +) diff --git a/search/src/main/scala/weco/api/search/services/TemplateSearchBuilder.scala b/search/src/main/scala/weco/api/search/services/TemplateSearchBuilder.scala new file mode 100644 index 0000000000..21ae0d1e06 --- /dev/null +++ b/search/src/main/scala/weco/api/search/services/TemplateSearchBuilder.scala @@ -0,0 +1,66 @@ +package weco.api.search.services + +import weco.api.search.elasticsearch.templateSearch.TemplateSearchRequest +import io.circe.syntax.EncoderOps +import io.circe.generic.auto._ + +trait TemplateSearchBuilder extends Encoders { + // Template for the "query" part of the request. + // This is expected to be a mustache template in which + // the query term to be used in the search is represented by a variable + // called "query". + // This preserves the existing behaviour of /search-templates.json + val queryTemplate: String + + // Importantly, this is *not* JSON, due to the `{{#` sequences, so must be created and sent as a string. + lazy protected val source: String = + s""" + |{ {{#query}} + | "query": $queryTemplate, + | {{/query}} + | + | "from": "{{from}}", + | "size": "{{size}}", + | "_source": { + | "includes": {{#toJson}}includes{{/toJson}} + | }, + | + | {{#aggs}} + | "aggs": {{#toJson}}aggs{{/toJson}}, + | {{/aggs}} + | + | {{#postFilter}} + | "post_filter": {{#toJson}}postFilter{{/toJson}}, + | {{/postFilter}} + | + | "sort": [ + | {{#sortByDate}} + | { + | "query.production.dates.range.from": { + | "order": "{{sortByDate}}" + | } + | }, + | {{/sortByDate}} + | {{#sortByScore}} + | { + | "_score": { + | "order": "desc" + | } + | }, + | {{/sortByScore}} + | { + | "query.id": { + | "order": "asc" + | } + | } + | ] + |} + |""".stripMargin + + def searchRequest( + indexes: Seq[String], + params: SearchTemplateParams + ): TemplateSearchRequest = + TemplateSearchRequest(indexes, source, params.asJson) + +} diff --git a/search/src/main/scala/weco/api/search/services/WorksRequestBuilder.scala b/search/src/main/scala/weco/api/search/services/WorksRequestBuilder.scala index f18d4f29bf..f78d25197c 100644 --- a/search/src/main/scala/weco/api/search/services/WorksRequestBuilder.scala +++ b/search/src/main/scala/weco/api/search/services/WorksRequestBuilder.scala @@ -2,11 +2,10 @@ package weco.api.search.services import com.sksamuel.elastic4s.ElasticDsl._ import com.sksamuel.elastic4s._ -import com.sksamuel.elastic4s.requests.searches._ import com.sksamuel.elastic4s.requests.searches.aggs._ import com.sksamuel.elastic4s.requests.searches.queries._ -import com.sksamuel.elastic4s.requests.searches.queries.compound.BoolQuery import com.sksamuel.elastic4s.requests.searches.sort._ + import weco.api.search.models._ import weco.api.search.models.request.{ ProductionDateSortRequest, @@ -14,29 +13,42 @@ import weco.api.search.models.request.{ WorkAggregationRequest } import weco.api.search.rest.PaginationQuery -import weco.api.search.elasticsearch.WorksMultiMatcher +import weco.api.search.elasticsearch.templateSearch.TemplateSearchRequest object WorksRequestBuilder - extends ElasticsearchRequestBuilder[WorkSearchOptions] { + extends ElasticsearchRequestBuilder[WorkSearchOptions] + with WorksTemplateSearchBuilder + with Encoders { import ElasticsearchRequestBuilder._ val idSort: FieldSort = fieldSort("query.id").order(SortOrder.ASC) - def request(searchOptions: WorkSearchOptions, index: Index): SearchRequest = { - implicit val s = searchOptions - search(index) - .aggs { filteredAggregationBuilder.filteredAggregations } - .query { searchQuery } - .sortBy { sortBy } - .limit { searchOptions.pageSize } - .from { PaginationQuery.safeGetFrom(searchOptions) } - .sourceInclude("display", "type") - .postFilter { - must( - buildWorkFilterQuery(VisibleWorkFilter :: searchOptions.filters) + def request( + searchOptions: WorkSearchOptions, + index: Index + ): Right[Nothing, TemplateSearchRequest] = { + implicit val s: WorkSearchOptions = searchOptions + + Right( + searchRequest( + indexes = Seq(index.name), + params = SearchTemplateParams( + query = searchOptions.searchQuery.map(_.query), + from = PaginationQuery.safeGetFrom(searchOptions), + size = searchOptions.pageSize, + sortByDate = dateOrder, + sortByScore = searchOptions.searchQuery.isDefined, + includes = Seq("display", "type"), + aggs = filteredAggregationBuilder.filteredAggregations, + postFilter = Some( + must( + buildWorkFilterQuery(VisibleWorkFilter :: searchOptions.filters) + ) + ) ) - } + ) + ) } private def filteredAggregationBuilder( @@ -108,35 +120,13 @@ object WorksRequestBuilder .field("aggregatableValues.availabilities") } - private def sortBy(implicit searchOptions: WorkSearchOptions) = - if (searchOptions.searchQuery.isDefined || searchOptions.mustQueries.nonEmpty) { - sort :+ scoreSort(SortOrder.DESC) :+ idSort - } else { - sort :+ idSort - } - - private def sort(implicit searchOptions: WorkSearchOptions) = - searchOptions.sortBy - .map { - case ProductionDateSortRequest => "query.production.dates.range.from" - } - .map { FieldSort(_).order(sortOrder) } - - private def sortOrder(implicit searchOptions: WorkSearchOptions) = - searchOptions.sortOrder match { - case SortingOrder.Ascending => SortOrder.ASC - case SortingOrder.Descending => SortOrder.DESC - } - - private def searchQuery( + private def dateOrder( implicit searchOptions: WorkSearchOptions - ): BoolQuery = - searchOptions.searchQuery - .map { - case SearchQuery(query) => - WorksMultiMatcher(query) - } - .getOrElse { boolQuery } + ): Option[SortingOrder] = + searchOptions.sortBy collectFirst { + case ProductionDateSortRequest => + searchOptions.sortOrder + } private def buildWorkFilterQuery(filters: Seq[WorkFilter]): Seq[Query] = filters.map { @@ -241,4 +231,5 @@ object WorksRequestBuilder values = availabilityIds ) } + } diff --git a/search/src/main/scala/weco/api/search/services/WorksTemplateSearchBuilder.scala b/search/src/main/scala/weco/api/search/services/WorksTemplateSearchBuilder.scala new file mode 100644 index 0000000000..9d92b5cad7 --- /dev/null +++ b/search/src/main/scala/weco/api/search/services/WorksTemplateSearchBuilder.scala @@ -0,0 +1,12 @@ +package weco.api.search.services + +import scala.io.Source + +trait WorksTemplateSearchBuilder extends TemplateSearchBuilder { + + val queryTemplate: String = + Source.fromResource("WorksMultiMatcherQueryTemplate.json").mkString + +} + +object WorksTemplateSearchBuilder extends WorksTemplateSearchBuilder diff --git a/search/src/test/scala/weco/api/search/elasticsearch/SearchQueryJsonTest.scala b/search/src/test/scala/weco/api/search/elasticsearch/SearchQueryJsonTest.scala index 9c53a92e87..de6e952a44 100644 --- a/search/src/test/scala/weco/api/search/elasticsearch/SearchQueryJsonTest.scala +++ b/search/src/test/scala/weco/api/search/elasticsearch/SearchQueryJsonTest.scala @@ -1,29 +1,26 @@ package weco.api.search.elasticsearch -import com.sksamuel.elastic4s.ElasticDsl._ import com.sksamuel.elastic4s.handlers.searches.queries.QueryBuilderFn import com.sksamuel.elastic4s.json.JacksonBuilder import org.scalatest.funspec.AnyFunSpec import org.scalatest.matchers.should.Matchers +import weco.api.search.services.WorksTemplateSearchBuilder import weco.fixtures.LocalResources import weco.json.utils.JsonAssertions class SearchQueryJsonTest - extends AnyFunSpec + extends AnyFunSpec with Matchers with JsonAssertions with LocalResources { it("matches the works JSON") { + // This test is essentially redundant, given that the test is basically doing + // the same thing as the SUT, just with the file in a different location. + // However, I'm leaving them both in until after the refactor is complete + // for Images as well, to ensure that the search-templates.json endpoint + // continues to work as expected. val fileJson = readResource("WorksMultiMatcherQuery.json") - - val queryJson = JacksonBuilder.writeAsString( - QueryBuilderFn( - WorksMultiMatcher("{{query}}") - .filter(termQuery(field = "type", value = "Visible")) - ).value - ) - - assertJsonStringsAreEqual(fileJson, queryJson) + assertJsonStringsAreEqual(fileJson, WorksTemplateSearchBuilder.queryTemplate) } it("matches the images JSON") {