From b4b019e945d496880d0fec3ace19f8267ab6e74b Mon Sep 17 00:00:00 2001 From: Patrick Browne Date: Tue, 27 Sep 2022 23:36:53 +0200 Subject: [PATCH] fix: Can query on theme / publisher / creator --- app/rdf/query-search-score-utils.ts | 10 +- app/rdf/query-search.spec.ts | 5 +- app/rdf/query-search.ts | 139 ++++++++++++---------------- 3 files changed, 65 insertions(+), 89 deletions(-) diff --git a/app/rdf/query-search-score-utils.ts b/app/rdf/query-search-score-utils.ts index 2deb96cbd..7abc3491a 100644 --- a/app/rdf/query-search-score-utils.ts +++ b/app/rdf/query-search-score-utils.ts @@ -20,24 +20,24 @@ export const weights: Record = { */ export const computeScores = ( scoresRaw: any[], - { query }: { query?: string } + { query, identifierName }: { query?: string; identifierName: string } ) => { const infoPerCube = {} as Record; if (query) { for (let scoreRow of scoresRaw) { let score = 0; for (let [field, weight] of Object.entries(weights)) { - const val = scoreRow[field]?.value; + const val = scoreRow[field]; if (!val) { continue; } for (let tok of query.split(" ")) { - if (val.toLowerCase().includes(tok.toLowerCase())) { + if (val && val.toLowerCase().includes(tok.toLowerCase())) { score += weight; } } } - infoPerCube[scoreRow.cube.value] = { score }; + infoPerCube[scoreRow[identifierName]] = { score }; } for (let k of Object.keys(infoPerCube)) { if (infoPerCube[k]?.score === 0) { @@ -46,7 +46,7 @@ export const computeScores = ( } } else { for (let scoreRow of scoresRaw) { - infoPerCube[scoreRow.cube.value] = { score: 1 }; + infoPerCube[scoreRow[identifierName]] = { score: 1 }; } } return infoPerCube; diff --git a/app/rdf/query-search.spec.ts b/app/rdf/query-search.spec.ts index 299f0365c..3b30fc513 100644 --- a/app/rdf/query-search.spec.ts +++ b/app/rdf/query-search.spec.ts @@ -1,5 +1,3 @@ -import mapValues from "lodash/mapValues"; - import { computeScores, weights } from "./query-search-score-utils"; // jest.mock("rdf-ext", () => ({})); @@ -17,11 +15,12 @@ describe("compute scores", () => { { cube: "b", name: "national", description: "economy" }, { cube: "c", creatorLabel: "national" }, { cube: "d", creatorLabel: "" }, - ].map((x) => mapValues(x, (v) => ({ value: v }))); + ]; it("should compute weighted score per cube from score rows", () => { const reduced = computeScores(scores, { query: "national economy", + identifierName: "cube", }); expect(reduced["a"].score).toEqual(weights.name); expect(reduced["b"].score).toEqual(weights.name + weights.description); diff --git a/app/rdf/query-search.ts b/app/rdf/query-search.ts index 7cd840284..e614f64f6 100644 --- a/app/rdf/query-search.ts +++ b/app/rdf/query-search.ts @@ -1,4 +1,5 @@ -import { DESCRIBE, SELECT } from "@tpluscode/sparql-builder"; +import { TemplateResult } from "@tpluscode/rdf-string/lib/TemplateResult"; +import { DESCRIBE, SELECT, sparql } from "@tpluscode/sparql-builder"; import clownface from "clownface"; import { descending } from "d3"; import { Cube } from "rdf-cube-view-query"; @@ -10,7 +11,7 @@ import { truthy } from "@/domain/types"; import { DataCubeSearchFilter } from "@/graphql/resolver-types"; import { ResolvedDataCube } from "@/graphql/shared-types"; import * as ns from "@/rdf/namespace"; -import { parseCube, parseIri, parseVersionHistory } from "@/rdf/parse"; +import { parseCube, parseIri } from "@/rdf/parse"; import { fromStream } from "@/rdf/sparql-client"; import { computeScores, highlight } from "./query-search-score-utils"; @@ -23,7 +24,7 @@ const makeInFilter = (varName: string, values: string[]) => { return ` ${ values.length > 0 - ? `FILTER ( + ? `FILTER (bound(?${varName}) && ?${varName} IN (${values.map(toNamedNode)}) )` : "" @@ -72,10 +73,17 @@ const enhanceQuery = (rawQuery: string) => { return enhancedQuery; }; -const contains = (left: string, right: string) => { +const icontains = (left: string, right: string) => { return `CONTAINS(LCASE(${left}), LCASE("${right}"))`; }; +type ResultRow = Record; +const parseResultRow = (row: ResultRow) => + Object.fromEntries(Object.entries(row).map(([k, v]) => [k, v.value])); + +const identity = (str: TemplateResult) => str; +const optional = (str: TemplateResult) => sparql`OPTIONAL { ${str} }`; + export const searchCubes = async ({ query: rawQuery, locale, @@ -107,15 +115,11 @@ export const searchCubes = async ({ filters?.filter((x) => x.type === "DataCubeAbout").map((v) => v.value) || []; - const scoresQuery = SELECT.DISTINCT`?cube ?versionHistory ?name ?description` + const scoresQuery = SELECT.DISTINCT`?cube ?versionHistory ?name ?description ?publisher ?themeName ?creatorLabel` .WHERE` ?cube a ${ns.cube.Cube}. ?cube ${ns.schema.name} ?name. - - ?cube ${ns.dcat.theme} ?theme. - ?cube ${ns.dcterms.creator} ?creator. - OPTIONAL { ?cube ${ns.schema.description} ?description. } @@ -127,6 +131,20 @@ export const searchCubes = async ({ OPTIONAL { ?versionHistory ${ns.schema.hasPart} ?cube. } + + OPTIONAL { ?cube ${ns.dcterms.publisher} ?publisher. } + + ${(themeValues.length > 0 ? identity : optional)(sparql` + ?cube ${ns.dcat.theme} ?theme. + ?theme ${ns.schema.name} ?themeName. + `)} + + ${(creatorValues.length > 0 ? identity : optional)( + sparql` + ?cube ${ns.dcterms.creator} ?creator. + ?creator ${ns.schema.name} ?creatorLabel. + ` + )} ${makeVisualizeDatasetFilter({ includeDrafts: !!includeDrafts, @@ -144,89 +162,43 @@ export const searchCubes = async ({ ?.split(" ") .slice(0, 1) .map( - (x) => `${contains("?name", x)} || ${contains("?description", x)}` + (x) => `${icontains("?name", x)} || ${icontains("?description", x)}` ) .join(" || ")} - + + || (bound(?publisher) && ${query + .split(" ") + .map((x) => icontains("?publisher", x)) + .join(" || ")}) + + || (bound(?themeName) && ${query + .split(" ") + .map((x) => icontains("?themeName", x)) + .join(" || ")}) + + || (bound(?creatorLabel) && ${query + .split(" ") + .map((x) => icontains("?creatorLabel", x)) + .join(" || ")}) + )` : "" } - `; - const scoresQuery2 = SELECT.DISTINCT`?cube ?versionHistory ?publisher ?themeName ?creatorLabel` - .WHERE` - ?cube a ${ns.cube.Cube}. - ?cube ${ns.schema.name} ?name. - - ?cube ${ns.dcat.theme} ?theme. - ?cube ${ns.dcterms.creator} ?creator. - - OPTIONAL { - ?cube ${ns.schema.about} ?about. - } - - OPTIONAL { - ?versionHistory ${ns.schema.hasPart} ?cube. - } - - ${makeVisualizeFilter(!!includeDrafts)} - - ${makeInFilter("about", aboutValues)} - ${makeInFilter("theme", themeValues)} - ${makeInFilter("creator", creatorValues)} - - ${ - query && query.length > 0 - ? sparql` - - OPTIONAL { - ?cube ${ns.dcterms.publisher} ?publisher. - FILTER(${query - .split(" ") - .map((x) => contains("?publisher", x)) - .join(" || ")}) . - } - - OPTIONAL { - ?theme ${ns.schema.name} ?themeName. - FILTER(${query - .split(" ") - .map((x) => contains("?themeName", x)) - .join(" || ")}) . - } - - - OPTIONAL { - - ?creator ${ns.schema.name} ?creatorLabel. - FILTER(${query - .split(" ") - .map((x) => contains("?creatorLabel", x)) - .join(" || ")}) . - } - ` - : "" - } - - `; - - let scoreResults = await executeAndMeasure(sparqlClient, scoresQuery); + const scoreResults = await executeAndMeasure(sparqlClient, scoresQuery); queries.push({ ...scoreResults.meta, label: "scores1", }); - if (scoreResults.data.length === 0) { - scoreResults = await executeAndMeasure(sparqlClient, scoresQuery2); - queries.push({ - ...scoreResults.meta, - label: "scores2", - }); - } - - const infoPerCube = computeScores(scoreResults.data, { + const data = scoreResults.data.map((x) => parseResultRow(x as ResultRow)); + const versionHistoryPerCube = Object.fromEntries( + data.map((d) => [d.cube, d.versionHistory]) + ); + const infoPerCube = computeScores(data, { query: query, + identifierName: "cube", }); // Find information on cubes @@ -234,7 +206,12 @@ export const searchCubes = async ({ // under the maximum score and only retrieve those cubes // The query could also dedup directly the version of the cubes const cubeIris = Object.keys(infoPerCube); - const cubesQuery = DESCRIBE`${cubeIris.map((x) => `<${x}>`).join(" ")}`; + + const sortedCubeIris = cubeIris.sort((a, b) => + descending(infoPerCube[a].score, infoPerCube[b].score) + ); + + const cubesQuery = DESCRIBE`${sortedCubeIris.map((x) => `<${x}>`).join(" ")}`; if (!locale) { throw new Error("Must pass locale"); @@ -260,7 +237,7 @@ export const searchCubes = async ({ .map((cubeNode) => { const cube = cubeNode as unknown as Cube; const iri = parseIri(cube); - const versionHistory = parseVersionHistory(cube); + const versionHistory = versionHistoryPerCube[iri]; const dedupIdentifier = versionHistory || iri; if (seen.has(dedupIdentifier)) { return null;