From 7d3af9d6627f394f935f837c607ce76075a4226f Mon Sep 17 00:00:00 2001 From: Rico Kahler Date: Sun, 18 Feb 2024 21:09:05 -0600 Subject: [PATCH] feat: implement hybrid approach (messy) --- dev/test-studio/sanity.config.ts | 2 +- .../core/search/hybrid/createHybridSearch.ts | 137 +++++++++++++++--- 2 files changed, 121 insertions(+), 18 deletions(-) diff --git a/dev/test-studio/sanity.config.ts b/dev/test-studio/sanity.config.ts index 17f0c50b5acb..5481058533a6 100644 --- a/dev/test-studio/sanity.config.ts +++ b/dev/test-studio/sanity.config.ts @@ -68,7 +68,7 @@ const sharedSettings = definePlugin({ }, search: { // eslint-disable-next-line camelcase - __experimental_strategy: 'text', + __experimental_strategy: 'hybrid', }, i18n: { diff --git a/packages/sanity/src/core/search/hybrid/createHybridSearch.ts b/packages/sanity/src/core/search/hybrid/createHybridSearch.ts index 5249b9886a13..171dd23af952 100644 --- a/packages/sanity/src/core/search/hybrid/createHybridSearch.ts +++ b/packages/sanity/src/core/search/hybrid/createHybridSearch.ts @@ -1,19 +1,19 @@ import {type SanityClient} from '@sanity/client' +import {type SanityDocument} from '@sanity/types' import {sortBy} from 'lodash' import {type Observable} from 'rxjs' import {map, tap} from 'rxjs/operators' import {removeDupes} from '../../util/draftUtils' -import {applyWeights} from '../weighted/applyWeights' +import {type TextSearchResponse} from '../text-search' +import {calculateScore} from '../weighted/applyWeights' +import {extractTermsFromQuery} from '../weighted/createSearchQuery' import { type SearchableType, - type SearchHit, type SearchOptions, type SearchTerms, - type WeightedHit, type WeightedSearchOptions, } from '../weighted/types' -import {createHybridSearchQuery} from './createHybridSearchQuery' function getSearchTerms(searchParams: string | SearchTerms, types: SearchableType[]) { if (typeof searchParams === 'string') { @@ -32,32 +32,135 @@ export function createHybridSearch( types: SearchableType[], client: SanityClient, commonOpts: WeightedSearchOptions = {}, -): (searchTerms: string | SearchTerms, searchOpts?: SearchOptions) => Observable { +): ( + searchTerms: string | SearchTerms, + searchOpts?: SearchOptions, +) => Observable<{hit: SanityDocument}[]> { // Search currently supports both strings (reference + cross dataset reference inputs) // or a SearchTerms object (omnisearch). return function search(searchParams, searchOpts = {}) { const searchTerms = getSearchTerms(searchParams, types) - const {searchSpec, terms, textSearchParams} = createHybridSearchQuery(searchTerms, { - ...commonOpts, - ...searchOpts, - }) + // const attributePaths = Array.from( + // new Set( + // types.flatMap((type) => + // type.__experimental_search.map((config) => + // // TODO: update `joinPath` implementation to default to `[]` paths + // joinPath(config.path.map((p) => (typeof p === 'number' ? [] : p))), + // ), + // ), + // ), + // ) + + const filters = [ + '_type in $__types', + searchOpts.includeDrafts === false && `!(_id in path('drafts.**'))`, + searchTerms.filter ? `(${searchTerms.filter})` : false, + ].filter((baseFilter): baseFilter is string => Boolean(baseFilter)) + + const terms = extractTermsFromQuery(searchTerms.query) - const searchRequest = client.observable.request({ + const searchRequest = client.observable.request({ uri: `/data/textsearch/${client.config().dataset}`, method: 'POST', json: true, - body: textSearchParams, + body: { + query: {string: terms.join(' ')}, + filter: filters.join(' && '), + params: { + __types: searchTerms.types.map((type) => type.name), + }, + // // TODO: this currently causes the backend to 500 + // includeAttributes: attributePaths, + limit: 1000, + }, }) return searchRequest.pipe( + map((i) => i.hits.map((hit) => hit.attributes)), commonOpts.unique ? map(removeDupes) : tap(), - // Assign weighting and scores based on current search terms. - // No scores will be assigned when terms are empty. - map((hits: SearchHit[]) => applyWeights(searchSpec, hits, terms)), - // Optionally skip client-side score sorting. - // This can be relevant when ordering results by specific fields, especially dates. - searchOpts?.skipSortByScore ? tap() : map((hits) => sortBy(hits, (hit) => -hit.score)), + map((documents) => applyWeights({types, documents, terms})), + map((hits) => sortBy(hits, ({hit}) => -hit.score)), ) } } + +interface Options { + types: SearchableType[] + documents: SanityDocument[] + terms: string[] +} + +function getPtText( + ptData: Array<{_type: 'block'; children: Array<{_type: 'span'; text: string}>}>, +) { + return ptData + .flatMap((block) => + block._type === 'block' + ? block.children.flatMap((child) => (child._type === 'span' ? [child.text] : [])) + : [], + ) + .join(' ') +} + +function applyWeights({documents, terms, types}: Options) { + const typesByName = types.reduce>((acc, next) => { + acc[next.name] = next + return acc + }, {}) + + function getValues(value: unknown, [current, ...rest]: (string | number | [])[]): unknown[] { + if (typeof current === 'undefined') return [value] + if (typeof value !== 'object') return [] + if (!value) return [] + if (Array.isArray(current)) { + if (!Array.isArray(value)) return [] + return value.flatMap((nestedValue) => getValues(nestedValue, rest)) + } + if (current in value) return getValues(value[current as keyof typeof value], rest) + return [] + } + + return documents + .map((doc) => ({ + ...doc, + score: typesByName[doc._type].__experimental_search + .map(({path, weight, mapWith}) => { + let values = getValues(doc, path) + + switch (mapWith) { + case undefined: { + break + } + case 'pt::text': { + values = [getPtText(values as any)] + break + } + case 'lower': { + values = values.map((value) => + typeof value === 'string' ? value.toLowerCase() : value, + ) + break + } + case 'upper': { + values = values.map((value) => + typeof value === 'string' ? value.toUpperCase() : value, + ) + break + } + default: { + throw new Error(`${mapWith}() not supported`) + } + } + + const score = calculateScore( + terms, + values.filter((value): value is string => typeof value === 'string').join(' '), + )[0] + + return score * weight + }) + .reduce((sum, next) => sum + next, 0), + })) + .map((x) => ({hit: x})) +}