From 6dccbc7f58e02dc66a4fe5659617f7f6d865a4e5 Mon Sep 17 00:00:00 2001 From: Patrick Browne Date: Mon, 19 Sep 2022 10:22:32 +0200 Subject: [PATCH] feat: Cube search does not use full text search engine --- app/rdf/query-search-score-utils.ts | 81 ++++++++++++----------------- app/rdf/query-search.spec.ts | 27 +++------- app/rdf/query-search.ts | 65 +++++++++++++---------- 3 files changed, 79 insertions(+), 94 deletions(-) diff --git a/app/rdf/query-search-score-utils.ts b/app/rdf/query-search-score-utils.ts index dc51619286..fb8bf5c383 100644 --- a/app/rdf/query-search-score-utils.ts +++ b/app/rdf/query-search-score-utils.ts @@ -1,5 +1,3 @@ -import { ResultRow } from "sparql-http-client/ResultParser"; - export const parseFloatZeroed = (s: string) => { const n = parseFloat(s); if (Number.isNaN(n)) { @@ -9,60 +7,47 @@ export const parseFloatZeroed = (s: string) => { } }; -const parseScoreRow = (x: ResultRow) => { - return { - cubeIri: x.cube.value, - scoreName: parseFloatZeroed(x.scoreName?.value), - scoreDescription: parseFloatZeroed(x.scoreDescription?.value), - scoreTheme: parseFloatZeroed(x.scoreTheme?.value), - scorePublisher: parseFloatZeroed(x.scorePublisher?.value), - scoreCreator: parseFloatZeroed(x.scoreCreator?.value), - }; -}; -type ScoreKey = Exclude, "cubeIri">; -const weights: Record = { - scoreName: 5, - scoreDescription: 2, - scoreTheme: 1, - scorePublisher: 1, - scoreCreator: 1, +const weights: Record = { + name: 5, + description: 2, + themeName: 1, + publisher: 1, + creatorLabel: 1, }; + /** - * From a list of scores where each row contains only one score, - * computes an index from cubeIri to weighted score. + * From a list of cube rows containing weighted fields */ - export const computeScores = ( scoresRaw: any[], - { keepZeros }: { keepZeros: boolean } + { query }: { query?: string } ) => { - const scores = scoresRaw.map((r) => parseScoreRow(r)); - - const infoPerCube = scores.reduce( - (acc, scoreRow) => { - let cubeScore = acc[scoreRow.cubeIri]?.score ?? 0; - for (let [key, weight] of Object.entries(weights)) { - const attrScore = scoreRow[key as ScoreKey] ?? 0; - if (attrScore > 0) { - cubeScore = cubeScore + scoreRow[key as ScoreKey] * weight; + const infoPerCube = {} as Record; + if (query) { + for (let scoreRow of scoresRaw) { + let score = 0; + for (let [field, weight] of Object.entries(weights)) { + const val = scoreRow[field]?.value; + if (!val) { + continue; + } + for (let tok of query.split(" ")) { + if (val.toLowerCase().includes(tok.toLowerCase())) { + score += weight; + } } } - if (cubeScore > 0 || keepZeros) { - acc[scoreRow.cubeIri] = acc[scoreRow.cubeIri] || { - score: 0, - }; - acc[scoreRow.cubeIri].score = cubeScore; - } - return acc; - }, - {} as Record< - string, - { - score: number; - highlights: Record; + infoPerCube[scoreRow.cube.value] = { score }; + } + for (let k of Object.keys(infoPerCube)) { + if (infoPerCube[k]?.score === 0) { + delete infoPerCube[k]; } - > - ); - + } + } else { + for (let scoreRow of scoresRaw) { + infoPerCube[scoreRow.cube.value] = { score: 1 }; + } + } return infoPerCube; }; diff --git a/app/rdf/query-search.spec.ts b/app/rdf/query-search.spec.ts index 8a39308ed2..2abdd71f53 100644 --- a/app/rdf/query-search.spec.ts +++ b/app/rdf/query-search.spec.ts @@ -13,29 +13,18 @@ jest.mock("@tpluscode/sparql-builder", () => ({})); describe("compute scores", () => { const scores = [ - { cube: "a", scoreName: 1 }, - { cube: "a", scoreDescription: 1 }, - { cube: "b", scoreName: 5 }, - { cube: "c", scoreCreator: 1 }, - { cube: "d", scoreCreator: 0 }, + { cube: "a", name: "national" }, + { cube: "b", name: "national", description: "economy" }, + { cube: "c", creatorLabel: "national" }, + { cube: "d", creatorLabel: "" }, ].map((x) => mapValues(x, (v) => ({ value: v }))); - it("should compute weighted score per cube from score rows, and discard cubes with score: 0", () => { + it("should compute weighted score per cube from score rows", () => { const reduced = computeScores(scores, { - keepZeros: true, + query: "national economy", }); - expect(reduced["a"].score).toEqual(7); - expect(reduced["b"].score).toEqual(25); - expect(reduced["c"].score).toEqual(1); - expect(reduced["d"].score).toEqual(0); - }); - - it("should compute weighted score per cube from score rows, , and keep cube with score: 0", () => { - const reduced = computeScores(scores, { - keepZeros: false, - }); - expect(reduced["a"].score).toEqual(7); - expect(reduced["b"].score).toEqual(25); + expect(reduced["a"].score).toEqual(5); + expect(reduced["b"].score).toEqual(7); expect(reduced["c"].score).toEqual(1); expect(reduced["d"]).toBeUndefined(); }); diff --git a/app/rdf/query-search.ts b/app/rdf/query-search.ts index f0a1ae0ed8..3d82c88415 100644 --- a/app/rdf/query-search.ts +++ b/app/rdf/query-search.ts @@ -88,11 +88,15 @@ const enhanceQuery = (rawQuery: string) => { // Filter out lowercase, small tokens .filter((t) => t.length > 2 || t.toLowerCase() !== t) // Wildcard Searches on each term - .map((t) => `${t}*`) + .map((t) => `${t}`) .join(" "); return enhancedQuery; }; +const contains = (left: string, right: string) => { + return `CONTAINS(LCASE(${left}), LCASE("${right}"))`; +}; + export const searchCubes = async ({ query: rawQuery, locale, @@ -124,10 +128,11 @@ export const searchCubes = async ({ filters?.filter((x) => x.type === "DataCubeAbout").map((v) => v.value) || []; - const scoresQuery = SELECT.DISTINCT`?cube ?versionHistory ?scoreName ?scoreDescription` + const scoresQuery = SELECT.DISTINCT`?cube ?versionHistory ?name ?description` .WHERE` ?cube a ${ns.cube.Cube}. ?cube ${ns.schema.name} ?name. + ?cube ${ns.schema.description} ?description. ?cube ${ns.dcat.theme} ?theme. ?cube ${ns.dcterms.creator} ?creator. @@ -146,23 +151,24 @@ export const searchCubes = async ({ ${makeInFilter("theme", themeValues)} ${makeInFilter("creator", creatorValues)} - ${ - query && query.length > 0 - ? sparql` - { (?name ?scoreName) "${query}". } - UNION { - OPTIONAL { - ?cube ${ns.schema.description} ?description. - (?description ?scoreDescription) "${query}" . - } + ${ + query + ? `FILTER( + ${query + ?.split(" ") + .slice(0, 1) + .map( + (x) => `${contains("?name", x)} || ${contains("?description", x)}` + ) + .join(" || ")} + + )` + : "" } - ` - : "" - } `; - const scoresQuery2 = SELECT.DISTINCT`?cube ?versionHistory ?scoreTheme ?scorePublisher ?scoreCreator` + const scoresQuery2 = SELECT.DISTINCT`?cube ?versionHistory ?publisher ?themeName ?creatorLabel` .WHERE` ?cube a ${ns.cube.Cube}. ?cube ${ns.schema.name} ?name. @@ -187,27 +193,32 @@ export const searchCubes = async ({ ${ query && query.length > 0 ? sparql` - UNION { + OPTIONAL { ?cube ${ns.dcterms.publisher} ?publisher. - (?publisher ?scorePublisher) "${query}" . + FILTER(${query + .split(" ") + .map((x) => contains("?publisher", x)) + .join(" || ")}) . } - } - UNION { + OPTIONAL { ?theme ${ns.schema.name} ?themeName. - (?themeName ?scoreTheme) - "${query}" . + FILTER(${query + .split(" ") + .map((x) => contains("?themeName", x)) + .join(" || ")}) . } - } - UNION { + + OPTIONAL { ?creator ${ns.schema.name} ?creatorLabel. - (?creatorLabel ?scoreCreator) - "${query}" . + FILTER(${query + .split(" ") + .map((x) => contains("?creatorLabel", x)) + .join(" || ")}) . } - } ` : "" } @@ -229,7 +240,7 @@ export const searchCubes = async ({ } const infoPerCube = computeScores(scoreResults.data, { - keepZeros: !query || query.length === 0, + query: query, }); // Find information on cubes