Skip to content

Commit

Permalink
Merge pull request #1022 from visualize-admin/feat/improve-search
Browse files Browse the repository at this point in the history
  • Loading branch information
bprusinowski authored Apr 14, 2023
2 parents 9820273 + e176adf commit c6b54b5
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 32 deletions.
45 changes: 39 additions & 6 deletions app/rdf/query-search-score-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,37 +7,70 @@ export const parseFloatZeroed = (s: string) => {
}
};

export const weights: Record<string, number> = {
export const weights = {
name: 5,
description: 2,
themeName: 1,
publisher: 1,
creatorLabel: 1,
};
export const langMultiplier = 1.5;
export const exactMatchPoints = weights["name"] * 2;

const isStopword = (d: string) => {
return d.length < 3 && d.toLowerCase() === d;
};

/**
* From a list of cube rows containing weighted fields
*/
export const computeScores = (
scoresRaw: any[],
{ query, identifierName }: { query?: string; identifierName: string }
{
query,
identifierName,
lang,
}: {
query?: string | null;
identifierName: string;
lang?: string | null;
}
) => {
const infoPerCube = {} as Record<string, { score: number }>;
if (query) {
for (let scoreRow of scoresRaw) {
let score = 0;
for (let [field, weight] of Object.entries(weights)) {
const val = scoreRow[field];
const val = scoreRow[field]?.toLowerCase();

if (!val) {
continue;
}
for (let tok of query.split(" ")) {
if (val && val.toLowerCase().includes(tok.toLowerCase())) {

for (let tok of query.split(" ").filter((d) => !isStopword(d))) {
if (val.includes(tok.toLowerCase())) {
score += weight;
}
}

// Bonus points for exact match.
if (val.includes(query.toLowerCase())) {
score += exactMatchPoints;
}
}

// Cubes with properties in the current language get a bonus,
// as generally we expect the user to be interested in those.
if (scoreRow["lang"] === lang) {
score *= langMultiplier;
}

if (
infoPerCube[scoreRow[identifierName]] === undefined ||
score > infoPerCube[scoreRow[identifierName]].score
) {
infoPerCube[scoreRow[identifierName]] = { score };
}
infoPerCube[scoreRow[identifierName]] = { score };
}
for (let k of Object.keys(infoPerCube)) {
if (infoPerCube[k]?.score === 0) {
Expand Down
26 changes: 19 additions & 7 deletions app/rdf/query-search.spec.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
import { computeScores, weights } from "./query-search-score-utils";
import {
computeScores,
exactMatchPoints,
langMultiplier,
weights,
} from "./query-search-score-utils";

// jest.mock("rdf-ext", () => ({}));
// jest.mock("@rdf-esm/data-model", () => ({}));
Expand All @@ -11,20 +16,27 @@ jest.mock("@tpluscode/sparql-builder", () => ({}));

describe("compute scores", () => {
const scores = [
{ cube: "a", name: "national" },
{ cube: "b", name: "national", description: "economy" },
{ cube: "c", creatorLabel: "national" },
{ cube: "d", creatorLabel: "" },
{ lang: "en", cube: "a", name: "national" },
{ lang: "en", cube: "b", name: "national", description: "economy" },
{ lang: "de", cube: "c", creatorLabel: "national" },
{ lang: "de", cube: "d", creatorLabel: "" },
{ lang: "en", cube: "e", name: "National Economy of Switzerland" },
];

it("should compute weighted score per cube from score rows", () => {
const reduced = computeScores(scores, {
query: "national economy",
identifierName: "cube",
lang: "en",
});
expect(reduced["a"].score).toEqual(weights.name);
expect(reduced["b"].score).toEqual(weights.name + weights.description);
expect(reduced["a"].score).toEqual(weights.name * langMultiplier);
expect(reduced["b"].score).toEqual(
(weights.name + weights.description) * langMultiplier
);
expect(reduced["c"].score).toEqual(weights.creatorLabel);
expect(reduced["d"]).toBeUndefined();
expect(reduced["e"].score).toEqual(
(weights.name * 2 + exactMatchPoints) * langMultiplier
);
});
});
24 changes: 5 additions & 19 deletions app/rdf/query-search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@ import { Quad, Stream } from "rdf-js";
import StreamClient from "sparql-http-client";
import ParsingClient from "sparql-http-client/ParsingClient";

import { truthy } from "@/domain/types";
import { Awaited } from "@/domain/types";
import { Awaited, truthy } from "@/domain/types";
import { RequestQueryMeta } from "@/graphql/query-meta";
import { DataCubeSearchFilter } from "@/graphql/resolver-types";
import { ResolvedDataCube } from "@/graphql/shared-types";
Expand Down Expand Up @@ -65,18 +64,6 @@ const executeAndMeasure = async <T extends SelectQuery | DescribeQuery>(
};
};

const enhanceQuery = (rawQuery: string) => {
const enhancedQuery = rawQuery
.toLowerCase()
.split(" ")
// Filter out lowercase, small tokens
.filter((t) => t.length > 2 || t.toLowerCase() !== t)
// Wildcard Searches on each term
.map((t) => `${t}`)
.join(" ");
return enhancedQuery;
};

const icontains = (left: string, right: string) => {
return `CONTAINS(LCASE(${left}), LCASE("${right}"))`;
};
Expand All @@ -98,7 +85,7 @@ const extractCubesFromStream = async (cubeStream: Stream<Quad>) => {
};

export const searchCubes = async ({
query: rawQuery,
query,
locale,
filters,
includeDrafts,
Expand All @@ -114,8 +101,6 @@ export const searchCubes = async ({
}) => {
const queries = [] as RequestQueryMeta[];

const query = rawQuery ? enhanceQuery(rawQuery) : undefined;

// Search cubeIris along with their score
const themeValues =
filters?.filter((x) => x.type === "DataCubeTheme").map((v) => v.value) ||
Expand All @@ -128,7 +113,7 @@ export const searchCubes = async ({
filters?.filter((x) => x.type === "DataCubeAbout").map((v) => v.value) ||
[];

const scoresQuery = SELECT.DISTINCT`?cube ?versionHistory ?name ?description ?publisher ?themeName ?creatorLabel`
const scoresQuery = SELECT.DISTINCT`?lang ?cube ?versionHistory ?name ?description ?publisher ?themeName ?creatorLabel`
.WHERE`
?cube a ${ns.cube.Cube}.
?cube ${ns.schema.name} ?name.
Expand Down Expand Up @@ -216,8 +201,9 @@ export const searchCubes = async ({
data.map((d) => [d.cube, d.versionHistory])
);
const infoPerCube = computeScores(data, {
query: query,
query,
identifierName: "cube",
lang: locale,
});

// Find information on cubes
Expand Down

1 comment on commit c6b54b5

@vercel
Copy link

@vercel vercel bot commented on c6b54b5 Apr 14, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

visualization-tool – ./

visualization-tool-alpha.vercel.app
visualization-tool-git-main-ixt1.vercel.app
visualization-tool-ixt1.vercel.app

Please sign in to comment.