Skip to content

Commit

Permalink
Define the main Works search query in plain Json (#682)
Browse files Browse the repository at this point in the history
* start gherkining the facets

* start filling out the faceting features

* Apply auto-formatting rules

* messing about with portability of the features file

* only return populated buckets

* finish new faceting feature tests for works.

* add todo about order

* a bit more faceting test finessing

* WorkFacet tests pass (todo: spread the filtering business to other fields)

* aggregate properly on language

* workType filtering and aggregation

* subjects and contributors

* availabilities

* tidy up bucket matching

* improve filter test output

* license works properly

* skip tests that are not yet ready

* filters now only return the filtered value in aggregations

* remove AnyFunSpec from base class

* remove AnyFunSpec from base class

* commit missed file

* Do not return unwanted empty buckets

* Do not return unwanted empty buckets

* extend no-empties rule to images

* avoid naming collision

* new faceting paradigm

* enforce order in aggregations

* remove unnecessary ignored scenarios

* fix test to correspond ot actual filtering

* fix low-level tests that expected the old-style query

* fix the "all other" test

* improve commentary

* tidying

* revert irrelevant change

* improve commentary

* revert irrelevant change

* revert irrelevant change

* Fix paired aggregation behaviour

* add test to ensure all filtered values are returned

* improve safety

* include parens in a test

* fix sorting even more

* improve commentary

* better rule name

* Karl and Jake

* remove redundant test

* start adding Image faceting

* finish adding Image faceting tests

* better naming, as per review

* some fiddling to get round E4S Template limitations

* add example to work towards

* Apply auto-formatting rules

* Inline templates work, now to put all the other bits in

* still more messing about

* don't query if no query term

* Fix paired aggregation behaviour (#676)

* Fix paired aggregation behaviour

* improve safety

* fix sorting even more

* improve commentary

* Karl and Jake

* remove redundant test

* It Works!

* Apply auto-formatting rules

* tidy whitespace

* Apply auto-formatting rules

* don't use E4s Indexes

* Apply auto-formatting rules

* tidying

* Apply auto-formatting rules

* tidy

* Apply auto-formatting rules

* more tidying

* Apply auto-formatting rules

* autoformat

* Apply auto-formatting rules

* remove redundant note

* match ImageFilter search to main

* Apply auto-formatting rules

* minimise diff

* Apply auto-formatting rules

* switch off intellij autoformat and try again

* Apply auto-formatting rules

* bit more tidying

* better template params

* improve commentary

* Update search/src/main/scala/weco/api/search/services/WorksRequestBuilder.scala

Co-authored-by: Jamie Parkinson <[email protected]>

* improve commentary

---------

Co-authored-by: Buildkite on behalf of Wellcome Collection <[email protected]>
Co-authored-by: Jamie Parkinson <[email protected]>
  • Loading branch information
3 people authored Aug 1, 2023
1 parent 7913484 commit 8b32e94
Show file tree
Hide file tree
Showing 15 changed files with 468 additions and 267 deletions.
149 changes: 149 additions & 0 deletions search/src/main/resources/WorksMultiMatcherQueryTemplate.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
{
"bool": {
"should": [
{
"span_first": {
"match": {
"span_term": {
"query.title.shingles": "{{query}}"
}
},
"end": 1,
"boost": 1000.0,
"_name": "start of title"
}
},
{
"multi_match": {
"query": "{{query}}",
"fields": [
"query.id^1000.0",
"query.identifiers.value^1000.0",
"query.items.id^1000.0",
"query.items.identifiers.value^1000.0",
"query.images.id^1000.0",
"query.images.identifiers.value^1000.0",
"query.referenceNumber^1000.0",
"query.allIdentifiers^1000.0"
],
"type": "best_fields",
"analyzer": "whitespace_analyzer",
"operator": "Or",
"_name": "identifiers"
}
},
{
"dis_max": {
"queries": [
{
"multi_match": {
"query": "{{query}}",
"fields": [
"query.titlesAndContributors^100.0",
"query.titlesAndContributors.english^100.0",
"query.titlesAndContributors.shingles^100.0"
],
"type": "best_fields",
"minimum_should_match": "-30%",
"operator": "Or",
"_name": "title and contributor exact spellings"
}
},
{
"multi_match": {
"query": "{{query}}",
"fields": [
"query.titlesAndContributors.arabic",
"query.titlesAndContributors.bengali",
"query.titlesAndContributors.french",
"query.titlesAndContributors.german",
"query.titlesAndContributors.hindi",
"query.titlesAndContributors.italian"
],
"type": "best_fields",
"minimum_should_match": "-30%",
"operator": "Or",
"_name": "non-english titles and contributors"
}
}
]
}
},
{
"bool": {
"must": [
{
"multi_match": {
"query": "{{query}}",
"fields": [
"query.collectionPath.path.clean",
"query.collectionPath.label.cleanPath",
"query.collectionPath.label",
"query.collectionPath.path.keyword"
],
"operator": "Or",
"_name": "relations paths"
}
}
],
"should": [
{
"multi_match": {
"query": "{{query}}",
"fields": ["query.title^100.0", "query.description^10.0"],
"type": "cross_fields",
"operator": "Or",
"_name": "relations text"
}
}
]
}
},
{
"multi_match": {
"query": "{{query}}",
"fields": [
"query.contributors.agent.label^1000.0",
"query.subjects.concepts.label^10.0",
"query.genres.concepts.label^10.0",
"query.production.label^10.0",
"query.description",
"query.physicalDescription",
"query.languages.label",
"query.edition",
"query.notes.contents",
"query.lettering"
],
"type": "cross_fields",
"minimum_should_match": "-30%",
"operator": "Or",
"_name": "data"
}
},
{
"multi_match": {
"query": "{{query}}",
"fields": [
"query.title.shingles_cased^1000.0",
"query.alternativeTitles.shingles_cased^100.0",
"query.partOf.title.shingles_cased^10.0"
],
"type": "most_fields",
"minimum_should_match": "-30%",
"operator": "Or",
"_name": "shingles cased"
}
}
],
"filter": [
{
"term": {
"type": {
"value": "Visible"
}
}
}
],
"minimum_should_match": "1"
}
}
11 changes: 3 additions & 8 deletions search/src/main/scala/weco/api/search/SearchApi.scala
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,10 @@ import akka.http.scaladsl.server.{
ValidationRejection
}
import com.sksamuel.elastic4s.ElasticClient
import com.sksamuel.elastic4s.ElasticDsl._
import weco.api.search.elasticsearch.{
ElasticsearchService,
ImagesMultiMatcher,
WorksMultiMatcher
}
import weco.api.search.elasticsearch.{ElasticsearchService, ImagesMultiMatcher}
import weco.api.search.models._
import weco.api.search.rest._
import weco.api.search.services.WorksTemplateSearchBuilder
import weco.catalogue.display_model.rest.IdentifierDirectives
import weco.http.models.DisplayError

Expand Down Expand Up @@ -142,8 +138,7 @@ class SearchApi(
val worksSearchTemplate = SearchTemplate(
"multi_matcher_search_query",
elasticConfig.worksIndex.name,
WorksMultiMatcher("{{query}}")
.filter(termQuery(field = "type", value = "Visible"))
WorksTemplateSearchBuilder.queryTemplate
)

val imageSearchTemplate = SearchTemplate(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,19 @@ import com.sksamuel.elastic4s.{ElasticClient, Hit, Index, Response}
import grizzled.slf4j.Logging
import io.circe.Decoder
import weco.Tracing
import weco.api.search.elasticsearch.templateSearch.{
TemplateSearchHandlers,
TemplateSearchRequest
}

import scala.concurrent.{ExecutionContext, Future}
import scala.util.{Failure, Success}

class ElasticsearchService(elasticClient: ElasticClient)(
implicit ec: ExecutionContext
) extends Logging
with Tracing {
with Tracing
with TemplateSearchHandlers {

def findById[T](id: String)(
index: Index
Expand Down Expand Up @@ -91,6 +96,30 @@ class ElasticsearchService(elasticClient: ElasticClient)(
}
}

def executeTemplateSearchRequest(
request: TemplateSearchRequest
): Future[Either[ElasticsearchError, SearchResponse]] =
spanFuture(
name = "ElasticSearch#executeSearchRequest",
spanType = "request",
subType = "elastic",
action = "query"
) {
debug(s"Sending ES request: ${request.show}")
val transaction = Tracing.currentTransaction
withActiveTrace(elasticClient.execute(request))
.map(_.toEither)
.map {
case Right(response) =>
transaction.setLabel("elasticTook", response.took)
Right(response)

case Left(err) =>
warn(s"Error while making request=${request.show}, error=$err")
Left(ElasticsearchError(err))
}
}

def executeMultiSearchRequest(
request: MultiSearchRequest
): Future[Seq[Either[ElasticsearchError, SearchResponse]]] =
Expand Down
Loading

0 comments on commit 8b32e94

Please sign in to comment.