From 25c562ee165008a207e43ec37068d966a0a6f909 Mon Sep 17 00:00:00 2001 From: Bartosz Prusinowski Date: Thu, 19 Oct 2023 11:34:38 +0200 Subject: [PATCH 1/8] perf: Retrieve (min|max)Inclusive values stored behind sh:or --- CHANGELOG.md | 2 ++ app/domain/data.ts | 8 -------- app/rdf/queries.ts | 43 ++++++++++++++++++++++++++++++------------- 3 files changed, 32 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f50816e50..424fcd364 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ You can also check the [release page](https://github.com/visualize-admin/visuali ## Unreleased +- Performance + - (min & max)Inclusive values stored in `sh:or` are now also retrieved - Style - Map now outlines shapes on hover, instead of changing their colors diff --git a/app/domain/data.ts b/app/domain/data.ts index 034393846..5ca577f4b 100644 --- a/app/domain/data.ts +++ b/app/domain/data.ts @@ -78,7 +78,6 @@ export const parseRDFLiteral = (value: Literal): T => { case "string": case "boolean": return v as T; - // return v === "true" ? true : false; case "float": case "integer": case "long": @@ -96,13 +95,6 @@ export const parseRDFLiteral = (value: Literal): T => { case "unsignedShort": case "unsignedByte": return +v as T; - // TODO: Figure out how to preserve granularity of date (maybe include interval?) - // case "date": - // case "time": - // case "dateTime": - // case "gYear": - // case "gYearMonth": - // return new Date(v); default: return v as T; } diff --git a/app/rdf/queries.ts b/app/rdf/queries.ts index eeafe9469..33f510be0 100644 --- a/app/rdf/queries.ts +++ b/app/rdf/queries.ts @@ -218,19 +218,36 @@ export const getCubeDimensionValues = async ({ }): Promise => { const { dimension, cube, locale, data } = rdimension; - if ( - typeof dimension.minInclusive !== "undefined" && - typeof dimension.maxInclusive !== "undefined" && - data.dataKind !== "Time" && - data.scaleType !== "Ordinal" - ) { - const min = parseObservationValue({ value: dimension.minInclusive }) ?? 0; - const max = parseObservationValue({ value: dimension.maxInclusive }) ?? 0; - - return [ - { value: min, label: `${min}` }, - { value: max, label: `${max}` }, - ]; + if (data.dataKind !== "Time" && data.scaleType !== "Ordinal") { + if ( + typeof dimension.minInclusive !== "undefined" && + typeof dimension.maxInclusive !== "undefined" + ) { + const min = parseObservationValue({ value: dimension.minInclusive }) ?? 0; + const max = parseObservationValue({ value: dimension.maxInclusive }) ?? 0; + + return [ + { value: min, label: `${min}` }, + { value: max, label: `${max}` }, + ]; + } + + const firstPointer = dimension.out(ns.sh.or).out(ns.rdf.first); + const firstMin = firstPointer.out(ns.sh.minInclusive); + const firstMax = firstPointer.out(ns.sh.maxInclusive); + + if ( + typeof firstMin.value !== "undefined" && + typeof firstMax.value !== "undefined" + ) { + const min = +firstMin.value; + const max = +firstMax.value; + + return [ + { value: min, label: `${min}` }, + { value: max, label: `${max}` }, + ]; + } } if (shouldLoadMinMaxValues(rdimension)) { From a06f614f5287e9e84a5b9589d3919095ff56a95b Mon Sep 17 00:00:00 2001 From: Bartosz Prusinowski Date: Thu, 19 Oct 2023 11:55:59 +0200 Subject: [PATCH 2/8] fix: Also consider rest of the sh:or when retrieving min or max values --- app/rdf/queries.ts | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/app/rdf/queries.ts b/app/rdf/queries.ts index 33f510be0..c5ed0ace9 100644 --- a/app/rdf/queries.ts +++ b/app/rdf/queries.ts @@ -232,16 +232,18 @@ export const getCubeDimensionValues = async ({ ]; } - const firstPointer = dimension.out(ns.sh.or).out(ns.rdf.first); - const firstMin = firstPointer.out(ns.sh.minInclusive); - const firstMax = firstPointer.out(ns.sh.maxInclusive); + const listPointer = dimension + .out(ns.sh.or) + .out([ns.rdf.first, ns.rdf.rest]); + const listMin = listPointer.out(ns.sh.minInclusive); + const listMax = listPointer.out(ns.sh.maxInclusive); if ( - typeof firstMin.value !== "undefined" && - typeof firstMax.value !== "undefined" + typeof listMin.value !== "undefined" && + typeof listMax.value !== "undefined" ) { - const min = +firstMin.value; - const max = +firstMax.value; + const min = +listMin.value; + const max = +listMax.value; return [ { value: min, label: `${min}` }, From 632f86de940794c35ef5bf65a0451d91706ec592 Mon Sep 17 00:00:00 2001 From: Bartosz Prusinowski Date: Thu, 19 Oct 2023 15:29:17 +0200 Subject: [PATCH 3/8] feat: Also potentially retrieve min/max from rest of the sh:or values --- app/rdf/queries.ts | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/app/rdf/queries.ts b/app/rdf/queries.ts index c5ed0ace9..5c922aefc 100644 --- a/app/rdf/queries.ts +++ b/app/rdf/queries.ts @@ -232,23 +232,33 @@ export const getCubeDimensionValues = async ({ ]; } - const listPointer = dimension - .out(ns.sh.or) - .out([ns.rdf.first, ns.rdf.rest]); - const listMin = listPointer.out(ns.sh.minInclusive); - const listMax = listPointer.out(ns.sh.maxInclusive); + // Try to get min/max values from a list of values. + let listItemPointer = dimension.out(ns.sh.or); - if ( - typeof listMin.value !== "undefined" && - typeof listMax.value !== "undefined" + while ( + listItemPointer.out(ns.rdf.rest).value && + // Only try until we reach the end of the list. + !listItemPointer.out(ns.rdf.rest).term?.equals(ns.rdf.nil) ) { - const min = +listMin.value; - const max = +listMax.value; + const item = listItemPointer.out(ns.rdf.first); + const itemMin = item.out(ns.sh.minInclusive); + const itemMax = item.out(ns.sh.maxInclusive); - return [ - { value: min, label: `${min}` }, - { value: max, label: `${max}` }, - ]; + if ( + typeof itemMin.value !== "undefined" && + typeof itemMax.value !== "undefined" + ) { + const min = +itemMin.value; + const max = +itemMax.value; + + return [ + { value: min, label: `${min}` }, + { value: max, label: `${max}` }, + ]; + } + + // Move to next list item. + listItemPointer = listItemPointer.out(ns.rdf.rest); } } From 23768046fa01d05ba3137fe7da7e6d4a09496f35 Mon Sep 17 00:00:00 2001 From: Bartosz Prusinowski Date: Mon, 23 Oct 2023 12:56:15 +0200 Subject: [PATCH 4/8] perf: Move data download to client side ...to avoid sending the data back and forth. --- app/components/data-download.tsx | 33 ++++++++++++----- app/pages/api/download.ts | 61 -------------------------------- 2 files changed, 25 insertions(+), 69 deletions(-) delete mode 100644 app/pages/api/download.ts diff --git a/app/components/data-download.tsx b/app/components/data-download.tsx index 5a1293e16..c6ac9a0ab 100644 --- a/app/components/data-download.tsx +++ b/app/components/data-download.tsx @@ -7,6 +7,7 @@ import { Typography, } from "@mui/material"; import { ascending } from "d3"; +import { Workbook } from "exceljs"; import { saveAs } from "file-saver"; import keyBy from "lodash/keyBy"; import HoverMenu from "material-ui-popup-state/HoverMenu"; @@ -325,7 +326,7 @@ const DownloadMenuItem = ({ const urqlClient = useClient(); const [state, dispatch] = useDataDownloadState(); const download = useCallback( - ( + async ( componentsData: ComponentsQuery, observationsData: DataCubeObservationsQuery ) => { @@ -343,13 +344,29 @@ const DownloadMenuItem = ({ dimensionParsers, }); - return fetch("/api/download", { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ columnKeys, data, fileFormat }), - }).then((res) => - res.blob().then((blob) => saveAs(blob, `${fileName}.${fileFormat}`)) - ); + const workbook = new Workbook(); + const worksheet = workbook.addWorksheet("data"); + worksheet.columns = columnKeys.map((d) => ({ + header: d, + key: d, + })); + worksheet.addRows(data); + + switch (fileFormat) { + case "csv": + const csv = await workbook.csv.writeBuffer(); + saveAs(new Blob([csv], { type: "text/csv" }), `${fileName}.csv`); + break; + case "xlsx": + const xlsx = await workbook.xlsx.writeBuffer(); + saveAs( + new Blob([xlsx], { + type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + }), + `${fileName}.xlsx` + ); + break; + } }, [fileFormat, fileName, locale] ); diff --git a/app/pages/api/download.ts b/app/pages/api/download.ts deleted file mode 100644 index 869e22a79..000000000 --- a/app/pages/api/download.ts +++ /dev/null @@ -1,61 +0,0 @@ -import { Workbook } from "exceljs"; -import { NextApiRequest, NextApiResponse } from "next"; - -import { FileFormat } from "../../components/data-download"; -import { Observation } from "../../domain/data"; - -export default async function Download( - req: Omit & { - body: { - columnKeys: string[]; - data: Observation[]; - fileFormat: FileFormat; - }; - }, - res: NextApiResponse -) { - const { method } = req; - - switch (method) { - case "POST": - try { - const { columnKeys, data, fileFormat } = req.body; - const workbook = new Workbook(); - const worksheet = workbook.addWorksheet("data"); - worksheet.columns = columnKeys.map((d) => ({ - header: d, - key: d, - })); - worksheet.addRows(data); - - switch (fileFormat) { - case "csv": - await workbook.csv.write(res, { sheetId: worksheet.id }); - break; - case "xlsx": - await workbook.xlsx.write(res); - break; - } - - res.status(200); - res.end(); - } catch (e) { - console.error(e); - res.status(500).json({ message: "Something went wrong!" }); - } - - break; - default: - res.setHeader("Allow", ["POST"]); - res.status(405).end(`Method ${method} Not Allowed`); - } -} - -export const config = { - api: { - bodyParser: { - sizeLimit: "1024mb", - }, - reponseLimit: false, - }, -}; From 64223087a69cf69053a1e414e4b52c7f57c2525b Mon Sep 17 00:00:00 2001 From: Bartosz Prusinowski Date: Mon, 23 Oct 2023 12:57:01 +0200 Subject: [PATCH 5/8] perf: Parallelize component and observation queries (data download) --- app/components/data-download.tsx | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/app/components/data-download.tsx b/app/components/data-download.tsx index c6ac9a0ab..e0a014462 100644 --- a/app/components/data-download.tsx +++ b/app/components/data-download.tsx @@ -26,7 +26,7 @@ import { useContext, useState, } from "react"; -import { OperationResult, useClient } from "urql"; +import { useClient } from "urql"; import { getSortedColumns } from "@/browse/datatable"; import Flex from "@/components/flex"; @@ -380,8 +380,8 @@ const DownloadMenuItem = ({ dispatch({ isDownloading: true }); try { - const componentsResult: OperationResult = - await urqlClient + const [componentsResult, observationsResult] = await Promise.all([ + urqlClient .query( ComponentsDocument, { @@ -392,9 +392,8 @@ const DownloadMenuItem = ({ componentIris: undefined, } ) - .toPromise(); - const observationsResult: OperationResult = - await urqlClient + .toPromise(), + urqlClient .query< DataCubeObservationsQuery, DataCubeObservationsQueryVariables @@ -406,7 +405,8 @@ const DownloadMenuItem = ({ componentIris: undefined, filters, }) - .toPromise(); + .toPromise(), + ]); if (componentsResult.data && observationsResult.data) { await download(componentsResult.data, observationsResult.data); From 6581bd6d4b05499922aa5d5d5937cc1e5e66b0bc Mon Sep 17 00:00:00 2001 From: Bartosz Prusinowski Date: Mon, 23 Oct 2023 12:58:29 +0200 Subject: [PATCH 6/8] fix: Do not show empty creator tags --- app/browser/dataset-browse.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/browser/dataset-browse.tsx b/app/browser/dataset-browse.tsx index ded0dee85..a3573bfe7 100644 --- a/app/browser/dataset-browse.tsx +++ b/app/browser/dataset-browse.tsx @@ -959,7 +959,7 @@ export const DatasetResult = ({ )) : null} - {creator ? ( + {creator && creator.label ? ( Date: Mon, 23 Oct 2023 12:59:05 +0200 Subject: [PATCH 7/8] docs: Update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ac6c7293..65245649e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ You can also check the [release page](https://github.com/visualize-admin/visuali ## Unreleased +- Performance + - Improved performance of data download - Style - Map now outlines shapes on hover, instead of changing their colors - Maintenance From f4f2e658f4343dcdc1383f1e0357f3b4f8e974da Mon Sep 17 00:00:00 2001 From: Bartosz Prusinowski Date: Mon, 23 Oct 2023 12:59:13 +0200 Subject: [PATCH 8/8] chore: Update .env.development --- app/.env.development | 1 + 1 file changed, 1 insertion(+) diff --git a/app/.env.development b/app/.env.development index 5355b3a90..41f14b6b8 100644 --- a/app/.env.development +++ b/app/.env.development @@ -3,3 +3,4 @@ ENDPOINT=sparql+https://lindas.admin.ch/query SPARQL_GEO_ENDPOINT=https://geo.ld.admin.ch/query GRAPHQL_ENDPOINT=/api/graphql WHITELISTED_DATA_SOURCES=["Prod", "Int", "Test"] +SENTRY_IGNORE_API_RESOLUTION_ERROR=1 \ No newline at end of file