Skip to content

Commit

Permalink
feat: Cube search does not use full text search engine
Browse files Browse the repository at this point in the history
  • Loading branch information
ptbrowne committed Sep 19, 2022
1 parent 1790c9d commit 6dccbc7
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 94 deletions.
81 changes: 33 additions & 48 deletions app/rdf/query-search-score-utils.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import { ResultRow } from "sparql-http-client/ResultParser";

export const parseFloatZeroed = (s: string) => {
const n = parseFloat(s);
if (Number.isNaN(n)) {
Expand All @@ -9,60 +7,47 @@ export const parseFloatZeroed = (s: string) => {
}
};

const parseScoreRow = (x: ResultRow) => {
return {
cubeIri: x.cube.value,
scoreName: parseFloatZeroed(x.scoreName?.value),
scoreDescription: parseFloatZeroed(x.scoreDescription?.value),
scoreTheme: parseFloatZeroed(x.scoreTheme?.value),
scorePublisher: parseFloatZeroed(x.scorePublisher?.value),
scoreCreator: parseFloatZeroed(x.scoreCreator?.value),
};
};
type ScoreKey = Exclude<keyof ReturnType<typeof parseScoreRow>, "cubeIri">;
const weights: Record<ScoreKey, number> = {
scoreName: 5,
scoreDescription: 2,
scoreTheme: 1,
scorePublisher: 1,
scoreCreator: 1,
const weights: Record<string, number> = {
name: 5,
description: 2,
themeName: 1,
publisher: 1,
creatorLabel: 1,
};

/**
* From a list of scores where each row contains only one score,
* computes an index from cubeIri to weighted score.
* From a list of cube rows containing weighted fields
*/

export const computeScores = (
scoresRaw: any[],
{ keepZeros }: { keepZeros: boolean }
{ query }: { query?: string }
) => {
const scores = scoresRaw.map((r) => parseScoreRow(r));

const infoPerCube = scores.reduce(
(acc, scoreRow) => {
let cubeScore = acc[scoreRow.cubeIri]?.score ?? 0;
for (let [key, weight] of Object.entries(weights)) {
const attrScore = scoreRow[key as ScoreKey] ?? 0;
if (attrScore > 0) {
cubeScore = cubeScore + scoreRow[key as ScoreKey] * weight;
const infoPerCube = {} as Record<string, { score: number }>;
if (query) {
for (let scoreRow of scoresRaw) {
let score = 0;
for (let [field, weight] of Object.entries(weights)) {
const val = scoreRow[field]?.value;
if (!val) {
continue;
}
for (let tok of query.split(" ")) {
if (val.toLowerCase().includes(tok.toLowerCase())) {
score += weight;
}
}
}
if (cubeScore > 0 || keepZeros) {
acc[scoreRow.cubeIri] = acc[scoreRow.cubeIri] || {
score: 0,
};
acc[scoreRow.cubeIri].score = cubeScore;
}
return acc;
},
{} as Record<
string,
{
score: number;
highlights: Record<ScoreKey, string>;
infoPerCube[scoreRow.cube.value] = { score };
}
for (let k of Object.keys(infoPerCube)) {
if (infoPerCube[k]?.score === 0) {
delete infoPerCube[k];
}
>
);

}
} else {
for (let scoreRow of scoresRaw) {
infoPerCube[scoreRow.cube.value] = { score: 1 };
}
}
return infoPerCube;
};
27 changes: 8 additions & 19 deletions app/rdf/query-search.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,29 +13,18 @@ jest.mock("@tpluscode/sparql-builder", () => ({}));

describe("compute scores", () => {
const scores = [
{ cube: "a", scoreName: 1 },
{ cube: "a", scoreDescription: 1 },
{ cube: "b", scoreName: 5 },
{ cube: "c", scoreCreator: 1 },
{ cube: "d", scoreCreator: 0 },
{ cube: "a", name: "national" },
{ cube: "b", name: "national", description: "economy" },
{ cube: "c", creatorLabel: "national" },
{ cube: "d", creatorLabel: "" },
].map((x) => mapValues(x, (v) => ({ value: v })));

it("should compute weighted score per cube from score rows, and discard cubes with score: 0", () => {
it("should compute weighted score per cube from score rows", () => {
const reduced = computeScores(scores, {
keepZeros: true,
query: "national economy",
});
expect(reduced["a"].score).toEqual(7);
expect(reduced["b"].score).toEqual(25);
expect(reduced["c"].score).toEqual(1);
expect(reduced["d"].score).toEqual(0);
});

it("should compute weighted score per cube from score rows, , and keep cube with score: 0", () => {
const reduced = computeScores(scores, {
keepZeros: false,
});
expect(reduced["a"].score).toEqual(7);
expect(reduced["b"].score).toEqual(25);
expect(reduced["a"].score).toEqual(5);
expect(reduced["b"].score).toEqual(7);
expect(reduced["c"].score).toEqual(1);
expect(reduced["d"]).toBeUndefined();
});
Expand Down
65 changes: 38 additions & 27 deletions app/rdf/query-search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,15 @@ const enhanceQuery = (rawQuery: string) => {
// Filter out lowercase, small tokens
.filter((t) => t.length > 2 || t.toLowerCase() !== t)
// Wildcard Searches on each term
.map((t) => `${t}*`)
.map((t) => `${t}`)
.join(" ");
return enhancedQuery;
};

const contains = (left: string, right: string) => {
return `CONTAINS(LCASE(${left}), LCASE("${right}"))`;
};

export const searchCubes = async ({
query: rawQuery,
locale,
Expand Down Expand Up @@ -124,10 +128,11 @@ export const searchCubes = async ({
filters?.filter((x) => x.type === "DataCubeAbout").map((v) => v.value) ||
[];

const scoresQuery = SELECT.DISTINCT`?cube ?versionHistory ?scoreName ?scoreDescription`
const scoresQuery = SELECT.DISTINCT`?cube ?versionHistory ?name ?description`
.WHERE`
?cube a ${ns.cube.Cube}.
?cube ${ns.schema.name} ?name.
?cube ${ns.schema.description} ?description.
?cube ${ns.dcat.theme} ?theme.
?cube ${ns.dcterms.creator} ?creator.
Expand All @@ -146,23 +151,24 @@ export const searchCubes = async ({
${makeInFilter("theme", themeValues)}
${makeInFilter("creator", creatorValues)}
${
query && query.length > 0
? sparql`
{ (?name ?scoreName) <tag:stardog:api:property:textMatch> "${query}". }
UNION {
OPTIONAL {
?cube ${ns.schema.description} ?description.
(?description ?scoreDescription) <tag:stardog:api:property:textMatch> "${query}" .
}
${
query
? `FILTER(
${query
?.split(" ")
.slice(0, 1)
.map(
(x) => `${contains("?name", x)} || ${contains("?description", x)}`
)
.join(" || ")}
)`
: ""
}
`
: ""
}
`;

const scoresQuery2 = SELECT.DISTINCT`?cube ?versionHistory ?scoreTheme ?scorePublisher ?scoreCreator`
const scoresQuery2 = SELECT.DISTINCT`?cube ?versionHistory ?publisher ?themeName ?creatorLabel`
.WHERE`
?cube a ${ns.cube.Cube}.
?cube ${ns.schema.name} ?name.
Expand All @@ -187,27 +193,32 @@ export const searchCubes = async ({
${
query && query.length > 0
? sparql`
UNION {
OPTIONAL {
?cube ${ns.dcterms.publisher} ?publisher.
(?publisher ?scorePublisher) <tag:stardog:api:property:textMatch> "${query}" .
FILTER(${query
.split(" ")
.map((x) => contains("?publisher", x))
.join(" || ")}) .
}
}
UNION {
OPTIONAL {
?theme ${ns.schema.name} ?themeName.
(?themeName ?scoreTheme)
<tag:stardog:api:property:textMatch> "${query}" .
FILTER(${query
.split(" ")
.map((x) => contains("?themeName", x))
.join(" || ")}) .
}
}
UNION {
OPTIONAL {
?creator ${ns.schema.name} ?creatorLabel.
(?creatorLabel ?scoreCreator)
<tag:stardog:api:property:textMatch> "${query}" .
FILTER(${query
.split(" ")
.map((x) => contains("?creatorLabel", x))
.join(" || ")}) .
}
}
`
: ""
}
Expand All @@ -229,7 +240,7 @@ export const searchCubes = async ({
}

const infoPerCube = computeScores(scoreResults.data, {
keepZeros: !query || query.length === 0,
query: query,
});

// Find information on cubes
Expand Down

0 comments on commit 6dccbc7

Please sign in to comment.