Skip to content

Commit

Permalink
fix: Can query on theme / publisher / creator
Browse files Browse the repository at this point in the history
  • Loading branch information
ptbrowne committed Sep 29, 2022
1 parent 5d171fd commit b4b019e
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 89 deletions.
10 changes: 5 additions & 5 deletions app/rdf/query-search-score-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,24 +20,24 @@ export const weights: Record<string, number> = {
*/
export const computeScores = (
scoresRaw: any[],
{ query }: { query?: string }
{ query, identifierName }: { query?: string; identifierName: string }
) => {
const infoPerCube = {} as Record<string, { score: number }>;
if (query) {
for (let scoreRow of scoresRaw) {
let score = 0;
for (let [field, weight] of Object.entries(weights)) {
const val = scoreRow[field]?.value;
const val = scoreRow[field];
if (!val) {
continue;
}
for (let tok of query.split(" ")) {
if (val.toLowerCase().includes(tok.toLowerCase())) {
if (val && val.toLowerCase().includes(tok.toLowerCase())) {
score += weight;
}
}
}
infoPerCube[scoreRow.cube.value] = { score };
infoPerCube[scoreRow[identifierName]] = { score };
}
for (let k of Object.keys(infoPerCube)) {
if (infoPerCube[k]?.score === 0) {
Expand All @@ -46,7 +46,7 @@ export const computeScores = (
}
} else {
for (let scoreRow of scoresRaw) {
infoPerCube[scoreRow.cube.value] = { score: 1 };
infoPerCube[scoreRow[identifierName]] = { score: 1 };
}
}
return infoPerCube;
Expand Down
5 changes: 2 additions & 3 deletions app/rdf/query-search.spec.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import mapValues from "lodash/mapValues";

import { computeScores, weights } from "./query-search-score-utils";

// jest.mock("rdf-ext", () => ({}));
Expand All @@ -17,11 +15,12 @@ describe("compute scores", () => {
{ cube: "b", name: "national", description: "economy" },
{ cube: "c", creatorLabel: "national" },
{ cube: "d", creatorLabel: "" },
].map((x) => mapValues(x, (v) => ({ value: v })));
];

it("should compute weighted score per cube from score rows", () => {
const reduced = computeScores(scores, {
query: "national economy",
identifierName: "cube",
});
expect(reduced["a"].score).toEqual(weights.name);
expect(reduced["b"].score).toEqual(weights.name + weights.description);
Expand Down
139 changes: 58 additions & 81 deletions app/rdf/query-search.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { DESCRIBE, SELECT } from "@tpluscode/sparql-builder";
import { TemplateResult } from "@tpluscode/rdf-string/lib/TemplateResult";
import { DESCRIBE, SELECT, sparql } from "@tpluscode/sparql-builder";
import clownface from "clownface";
import { descending } from "d3";
import { Cube } from "rdf-cube-view-query";
Expand All @@ -10,7 +11,7 @@ import { truthy } from "@/domain/types";
import { DataCubeSearchFilter } from "@/graphql/resolver-types";
import { ResolvedDataCube } from "@/graphql/shared-types";
import * as ns from "@/rdf/namespace";
import { parseCube, parseIri, parseVersionHistory } from "@/rdf/parse";
import { parseCube, parseIri } from "@/rdf/parse";
import { fromStream } from "@/rdf/sparql-client";

import { computeScores, highlight } from "./query-search-score-utils";
Expand All @@ -23,7 +24,7 @@ const makeInFilter = (varName: string, values: string[]) => {
return `
${
values.length > 0
? `FILTER (
? `FILTER (bound(?${varName}) &&
?${varName} IN (${values.map(toNamedNode)})
)`
: ""
Expand Down Expand Up @@ -72,10 +73,17 @@ const enhanceQuery = (rawQuery: string) => {
return enhancedQuery;
};

const contains = (left: string, right: string) => {
const icontains = (left: string, right: string) => {
return `CONTAINS(LCASE(${left}), LCASE("${right}"))`;
};

type ResultRow = Record<string, { value: unknown }>;
const parseResultRow = (row: ResultRow) =>
Object.fromEntries(Object.entries(row).map(([k, v]) => [k, v.value]));

const identity = <T>(str: TemplateResult<T>) => str;
const optional = <T>(str: TemplateResult<T>) => sparql`OPTIONAL { ${str} }`;

export const searchCubes = async ({
query: rawQuery,
locale,
Expand Down Expand Up @@ -107,15 +115,11 @@ export const searchCubes = async ({
filters?.filter((x) => x.type === "DataCubeAbout").map((v) => v.value) ||
[];

const scoresQuery = SELECT.DISTINCT`?cube ?versionHistory ?name ?description`
const scoresQuery = SELECT.DISTINCT`?cube ?versionHistory ?name ?description ?publisher ?themeName ?creatorLabel`
.WHERE`
?cube a ${ns.cube.Cube}.
?cube ${ns.schema.name} ?name.
?cube ${ns.dcat.theme} ?theme.
?cube ${ns.dcterms.creator} ?creator.
OPTIONAL {
?cube ${ns.schema.description} ?description.
}
Expand All @@ -127,6 +131,20 @@ export const searchCubes = async ({
OPTIONAL {
?versionHistory ${ns.schema.hasPart} ?cube.
}
OPTIONAL { ?cube ${ns.dcterms.publisher} ?publisher. }
${(themeValues.length > 0 ? identity : optional)(sparql`
?cube ${ns.dcat.theme} ?theme.
?theme ${ns.schema.name} ?themeName.
`)}
${(creatorValues.length > 0 ? identity : optional)(
sparql`
?cube ${ns.dcterms.creator} ?creator.
?creator ${ns.schema.name} ?creatorLabel.
`
)}
${makeVisualizeDatasetFilter({
includeDrafts: !!includeDrafts,
Expand All @@ -144,97 +162,56 @@ export const searchCubes = async ({
?.split(" ")
.slice(0, 1)
.map(
(x) => `${contains("?name", x)} || ${contains("?description", x)}`
(x) => `${icontains("?name", x)} || ${icontains("?description", x)}`
)
.join(" || ")}
|| (bound(?publisher) && ${query
.split(" ")
.map((x) => icontains("?publisher", x))
.join(" || ")})
|| (bound(?themeName) && ${query
.split(" ")
.map((x) => icontains("?themeName", x))
.join(" || ")})
|| (bound(?creatorLabel) && ${query
.split(" ")
.map((x) => icontains("?creatorLabel", x))
.join(" || ")})
)`
: ""
}
`;

const scoresQuery2 = SELECT.DISTINCT`?cube ?versionHistory ?publisher ?themeName ?creatorLabel`
.WHERE`
?cube a ${ns.cube.Cube}.
?cube ${ns.schema.name} ?name.
?cube ${ns.dcat.theme} ?theme.
?cube ${ns.dcterms.creator} ?creator.
OPTIONAL {
?cube ${ns.schema.about} ?about.
}
OPTIONAL {
?versionHistory ${ns.schema.hasPart} ?cube.
}
${makeVisualizeFilter(!!includeDrafts)}
${makeInFilter("about", aboutValues)}
${makeInFilter("theme", themeValues)}
${makeInFilter("creator", creatorValues)}
${
query && query.length > 0
? sparql`
OPTIONAL {
?cube ${ns.dcterms.publisher} ?publisher.
FILTER(${query
.split(" ")
.map((x) => contains("?publisher", x))
.join(" || ")}) .
}
OPTIONAL {
?theme ${ns.schema.name} ?themeName.
FILTER(${query
.split(" ")
.map((x) => contains("?themeName", x))
.join(" || ")}) .
}
OPTIONAL {
?creator ${ns.schema.name} ?creatorLabel.
FILTER(${query
.split(" ")
.map((x) => contains("?creatorLabel", x))
.join(" || ")}) .
}
`
: ""
}
`;

let scoreResults = await executeAndMeasure(sparqlClient, scoresQuery);
const scoreResults = await executeAndMeasure(sparqlClient, scoresQuery);
queries.push({
...scoreResults.meta,
label: "scores1",
});

if (scoreResults.data.length === 0) {
scoreResults = await executeAndMeasure(sparqlClient, scoresQuery2);
queries.push({
...scoreResults.meta,
label: "scores2",
});
}

const infoPerCube = computeScores(scoreResults.data, {
const data = scoreResults.data.map((x) => parseResultRow(x as ResultRow));
const versionHistoryPerCube = Object.fromEntries(
data.map((d) => [d.cube, d.versionHistory])
);
const infoPerCube = computeScores(data, {
query: query,
identifierName: "cube",
});

// Find information on cubes
// Potential optimisation: filter out cubes that are below some threshold
// under the maximum score and only retrieve those cubes
// The query could also dedup directly the version of the cubes
const cubeIris = Object.keys(infoPerCube);
const cubesQuery = DESCRIBE`${cubeIris.map((x) => `<${x}>`).join(" ")}`;

const sortedCubeIris = cubeIris.sort((a, b) =>
descending(infoPerCube[a].score, infoPerCube[b].score)
);

const cubesQuery = DESCRIBE`${sortedCubeIris.map((x) => `<${x}>`).join(" ")}`;

if (!locale) {
throw new Error("Must pass locale");
Expand All @@ -260,7 +237,7 @@ export const searchCubes = async ({
.map((cubeNode) => {
const cube = cubeNode as unknown as Cube;
const iri = parseIri(cube);
const versionHistory = parseVersionHistory(cube);
const versionHistory = versionHistoryPerCube[iri];
const dedupIdentifier = versionHistory || iri;
if (seen.has(dedupIdentifier)) {
return null;
Expand Down

0 comments on commit b4b019e

Please sign in to comment.