Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Search improvements #745

Merged
merged 9 commits into from
Sep 29, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions app/rdf/query-search-score-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,24 +20,24 @@ export const weights: Record<string, number> = {
*/
export const computeScores = (
scoresRaw: any[],
{ query }: { query?: string }
{ query, identifierName }: { query?: string; identifierName: string }
) => {
const infoPerCube = {} as Record<string, { score: number }>;
if (query) {
for (let scoreRow of scoresRaw) {
let score = 0;
for (let [field, weight] of Object.entries(weights)) {
const val = scoreRow[field]?.value;
const val = scoreRow[field];
if (!val) {
continue;
}
for (let tok of query.split(" ")) {
if (val.toLowerCase().includes(tok.toLowerCase())) {
if (val && val.toLowerCase().includes(tok.toLowerCase())) {
score += weight;
}
}
}
infoPerCube[scoreRow.cube.value] = { score };
infoPerCube[scoreRow[identifierName]] = { score };
}
for (let k of Object.keys(infoPerCube)) {
if (infoPerCube[k]?.score === 0) {
Expand All @@ -46,7 +46,7 @@ export const computeScores = (
}
} else {
for (let scoreRow of scoresRaw) {
infoPerCube[scoreRow.cube.value] = { score: 1 };
infoPerCube[scoreRow[identifierName]] = { score: 1 };
}
}
return infoPerCube;
Expand Down
5 changes: 2 additions & 3 deletions app/rdf/query-search.spec.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import mapValues from "lodash/mapValues";

import { computeScores, weights } from "./query-search-score-utils";

// jest.mock("rdf-ext", () => ({}));
Expand All @@ -17,11 +15,12 @@ describe("compute scores", () => {
{ cube: "b", name: "national", description: "economy" },
{ cube: "c", creatorLabel: "national" },
{ cube: "d", creatorLabel: "" },
].map((x) => mapValues(x, (v) => ({ value: v })));
];

it("should compute weighted score per cube from score rows", () => {
const reduced = computeScores(scores, {
query: "national economy",
identifierName: "cube",
});
expect(reduced["a"].score).toEqual(weights.name);
expect(reduced["b"].score).toEqual(weights.name + weights.description);
Expand Down
139 changes: 58 additions & 81 deletions app/rdf/query-search.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { DESCRIBE, SELECT } from "@tpluscode/sparql-builder";
import { TemplateResult } from "@tpluscode/rdf-string/lib/TemplateResult";
import { DESCRIBE, SELECT, sparql } from "@tpluscode/sparql-builder";
import clownface from "clownface";
import { descending } from "d3";
import { Cube } from "rdf-cube-view-query";
Expand All @@ -10,7 +11,7 @@ import { truthy } from "@/domain/types";
import { DataCubeSearchFilter } from "@/graphql/resolver-types";
import { ResolvedDataCube } from "@/graphql/shared-types";
import * as ns from "@/rdf/namespace";
import { parseCube, parseIri, parseVersionHistory } from "@/rdf/parse";
import { parseCube, parseIri } from "@/rdf/parse";
import { fromStream } from "@/rdf/sparql-client";

import { computeScores, highlight } from "./query-search-score-utils";
Expand All @@ -23,7 +24,7 @@ const makeInFilter = (varName: string, values: string[]) => {
return `
${
values.length > 0
? `FILTER (
? `FILTER (bound(?${varName}) &&
?${varName} IN (${values.map(toNamedNode)})
)`
: ""
Expand Down Expand Up @@ -72,10 +73,17 @@ const enhanceQuery = (rawQuery: string) => {
return enhancedQuery;
};

const contains = (left: string, right: string) => {
const icontains = (left: string, right: string) => {
bprusinowski marked this conversation as resolved.
Show resolved Hide resolved
return `CONTAINS(LCASE(${left}), LCASE("${right}"))`;
};

type ResultRow = Record<string, { value: unknown }>;
const parseResultRow = (row: ResultRow) =>
Object.fromEntries(Object.entries(row).map(([k, v]) => [k, v.value]));

const identity = <T>(str: TemplateResult<T>) => str;
const optional = <T>(str: TemplateResult<T>) => sparql`OPTIONAL { ${str} }`;

export const searchCubes = async ({
query: rawQuery,
locale,
Expand Down Expand Up @@ -107,15 +115,11 @@ export const searchCubes = async ({
filters?.filter((x) => x.type === "DataCubeAbout").map((v) => v.value) ||
[];

const scoresQuery = SELECT.DISTINCT`?cube ?versionHistory ?name ?description`
const scoresQuery = SELECT.DISTINCT`?cube ?versionHistory ?name ?description ?publisher ?themeName ?creatorLabel`
.WHERE`
?cube a ${ns.cube.Cube}.
?cube ${ns.schema.name} ?name.


?cube ${ns.dcat.theme} ?theme.
?cube ${ns.dcterms.creator} ?creator.

OPTIONAL {
?cube ${ns.schema.description} ?description.
}
Expand All @@ -127,6 +131,20 @@ export const searchCubes = async ({
OPTIONAL {
?versionHistory ${ns.schema.hasPart} ?cube.
}

OPTIONAL { ?cube ${ns.dcterms.publisher} ?publisher. }

${(themeValues.length > 0 ? identity : optional)(sparql`
?cube ${ns.dcat.theme} ?theme.
?theme ${ns.schema.name} ?themeName.
`)}

${(creatorValues.length > 0 ? identity : optional)(
sparql`
?cube ${ns.dcterms.creator} ?creator.
?creator ${ns.schema.name} ?creatorLabel.
`
)}

${makeVisualizeDatasetFilter({
includeDrafts: !!includeDrafts,
Expand All @@ -144,97 +162,56 @@ export const searchCubes = async ({
?.split(" ")
.slice(0, 1)
.map(
(x) => `${contains("?name", x)} || ${contains("?description", x)}`
(x) => `${icontains("?name", x)} || ${icontains("?description", x)}`
)
.join(" || ")}


|| (bound(?publisher) && ${query
.split(" ")
.map((x) => icontains("?publisher", x))
.join(" || ")})

|| (bound(?themeName) && ${query
.split(" ")
.map((x) => icontains("?themeName", x))
.join(" || ")})

|| (bound(?creatorLabel) && ${query
.split(" ")
.map((x) => icontains("?creatorLabel", x))
.join(" || ")})

)`
: ""
}

`;

const scoresQuery2 = SELECT.DISTINCT`?cube ?versionHistory ?publisher ?themeName ?creatorLabel`
.WHERE`
?cube a ${ns.cube.Cube}.
?cube ${ns.schema.name} ?name.

?cube ${ns.dcat.theme} ?theme.
?cube ${ns.dcterms.creator} ?creator.

OPTIONAL {
?cube ${ns.schema.about} ?about.
}

OPTIONAL {
?versionHistory ${ns.schema.hasPart} ?cube.
}

${makeVisualizeFilter(!!includeDrafts)}

${makeInFilter("about", aboutValues)}
${makeInFilter("theme", themeValues)}
${makeInFilter("creator", creatorValues)}

${
query && query.length > 0
? sparql`

OPTIONAL {
?cube ${ns.dcterms.publisher} ?publisher.
FILTER(${query
.split(" ")
.map((x) => contains("?publisher", x))
.join(" || ")}) .
}

OPTIONAL {
?theme ${ns.schema.name} ?themeName.
FILTER(${query
.split(" ")
.map((x) => contains("?themeName", x))
.join(" || ")}) .
}


OPTIONAL {

?creator ${ns.schema.name} ?creatorLabel.
FILTER(${query
.split(" ")
.map((x) => contains("?creatorLabel", x))
.join(" || ")}) .
}
`
: ""
}

`;

let scoreResults = await executeAndMeasure(sparqlClient, scoresQuery);
const scoreResults = await executeAndMeasure(sparqlClient, scoresQuery);
queries.push({
...scoreResults.meta,
label: "scores1",
});

if (scoreResults.data.length === 0) {
scoreResults = await executeAndMeasure(sparqlClient, scoresQuery2);
queries.push({
...scoreResults.meta,
label: "scores2",
});
}

const infoPerCube = computeScores(scoreResults.data, {
const data = scoreResults.data.map((x) => parseResultRow(x as ResultRow));
const versionHistoryPerCube = Object.fromEntries(
data.map((d) => [d.cube, d.versionHistory])
);
const infoPerCube = computeScores(data, {
query: query,
identifierName: "cube",
});

// Find information on cubes
// Potential optimisation: filter out cubes that are below some threshold
// under the maximum score and only retrieve those cubes
// The query could also dedup directly the version of the cubes
const cubeIris = Object.keys(infoPerCube);
const cubesQuery = DESCRIBE`${cubeIris.map((x) => `<${x}>`).join(" ")}`;

const sortedCubeIris = cubeIris.sort((a, b) =>
descending(infoPerCube[a].score, infoPerCube[b].score)
);

const cubesQuery = DESCRIBE`${sortedCubeIris.map((x) => `<${x}>`).join(" ")}`;

if (!locale) {
throw new Error("Must pass locale");
Expand All @@ -260,7 +237,7 @@ export const searchCubes = async ({
.map((cubeNode) => {
const cube = cubeNode as unknown as Cube;
const iri = parseIri(cube);
const versionHistory = parseVersionHistory(cube);
const versionHistory = versionHistoryPerCube[iri];
const dedupIdentifier = versionHistory || iri;
if (seen.has(dedupIdentifier)) {
return null;
Expand Down