From 0815653c25d70812133bb40a65e70eb66fcd867d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Minh=20Nguy=E1=BB=85n?= Date: Wed, 4 Jan 2023 15:42:45 -0800 Subject: [PATCH] Format name lists in labels Added a function that returns an expression to find and replace a fixed number of occurrences of a substring. Added a function to format a list of semicolon-delimited tag values. Separate multiple names by newlines or bullets as necessary. --- src/constants/label.js | 145 +++++++++++++++++++++- src/layer/transportation_label.js | 2 +- test/spec/label.js | 199 ++++++++++++++++++++++++++---- 3 files changed, 316 insertions(+), 30 deletions(-) diff --git a/src/constants/label.js b/src/constants/label.js index 3799d3ad4..13c59b0ea 100644 --- a/src/constants/label.js +++ b/src/constants/label.js @@ -117,13 +117,134 @@ export function localizeLayers(layers, locales) { } /** - * The name in the user's preferred language. + * Returns an expression that replaces a finite number of occurrences of a + * substring expression withing a larger string expression, starting at a given + * index. + * + * This expression nests recursively by the maximum number of replacements. Take + * special care to minimize this limit, which exponentially increases the length + * of a property value in JSON. Excessive nesting causes acute performance + * problems when loading the style. + * + * The returned expression can be complex, so use it only once within a property + * value. To reuse the evaluated value, bind it to a variable in a let + * expression. + * + * @param haystack The overall string expression to search within. + * @param needle The string to search for, or an expression that evaluates to + * this string. + */ +export function replaceExpression( + haystack, + needle, + replacement, + haystackStart, + numReplacements = 1 +) { + let asIs = ["slice", haystack, haystackStart]; + if (numReplacements <= 0) { + return asIs; + } + + let needleStart = ["index-of", needle, haystack, haystackStart]; + let needleLength = + typeof needle === "object" ? ["length", needle] : needle.length; + let needleEnd = ["+", needleStart, needleLength]; + return [ + "case", + [">=", needleStart, 0], + [ + "concat", + ["slice", haystack, haystackStart, needleStart], + replacement, + replaceExpression( + haystack, + needle, + replacement, + needleEnd, + numReplacements - 1 + ), + ], + asIs, + ]; +} + +/** + * Maximum number of values in a semicolon-delimited list of values. + * + * Increasing this constant deepens recursion for replacing delimiters in the + * list, potentially affecting style loading performance. + */ +const maxValueListLength = 9; + +/** + * Returns an expression interpreting the given string as a list of tag values, + * pretty-printing the standard semicolon delimiter with the given separator. + * + * https://wiki.openstreetmap.org/wiki/Semi-colon_value_separator + * + * The returned expression can be complex, so use it only once within a property + * value. To reuse the evaluated value, bind it to a variable in a let + * expression. + * + * @param valueList A semicolon-delimited list of values. + * @param separator A string to insert between each value, or an expression that + * evaluates to this string. + */ +export function listValuesExpression(valueList, separator) { + let maxSeparators = maxValueListLength - 1; + // Replace the ;; escape sequence with a placeholder sequence unlikely to + // legitimately occur inside a value or separator. + const objReplacementChar = "\x91\ufffc\x92"; // https://overpass-turbo.eu/s/1pJx + let safeValueList = replaceExpression( + valueList, + ";;", + objReplacementChar, + 0, + maxSeparators + ); + // Pretty-print the ; delimiter. + let prettyValueList = replaceExpression( + ["var", "safeValueList"], + ";", + separator, + 0, + maxSeparators + ); + // Replace the placeholder sequence with an unescaped semicolon. + let prettySafeValueList = replaceExpression( + ["var", "prettyValueList"], + objReplacementChar, + ";", + 0, + maxSeparators + ); + return [ + "let", + "safeValueList", + safeValueList, + ["let", "prettyValueList", prettyValueList, prettySafeValueList], + ]; +} + +/** + * The names in the user's preferred language, each on a separate line. */ export const localizedName = [ "let", "localizedName", "", - ["var", "localizedName"], + listValuesExpression(["var", "localizedName"], "\n"), +]; + +/** + * The names in the user's preferred language, all on the same line. + */ +export const localizedNameInline = [ + "let", + "localizedName", + "", + listValuesExpression(["var", "localizedName"], " \u2022 "), ]; /** @@ -217,7 +338,7 @@ export const localizedNameWithLocalGloss = [ ["var", "localizedCollator"], ], // ...just pick one. - ["var", "localizedName"], + ["format", listValuesExpression(["var", "localizedName"], "\n")], // If the name in the preferred language is the same as the name in the // local language except for the omission of diacritics and/or the addition // of a suffix (e.g., "City" in English)... @@ -227,7 +348,13 @@ export const localizedNameWithLocalGloss = [ ["var", "diacriticInsensitiveCollator"] ), // ...then replace the common prefix with the local name. - overwritePrefixExpression(["var", "localizedName"], ["get", "name"]), + [ + "format", + overwritePrefixExpression( + ["var", "localizedName"], + listValuesExpression(["get", "name"], "\n") + ), + ], // If the name in the preferred language is the same as the name in the // local language except for the omission of diacritics and/or the addition // of a prefix (e.g., "City of" in English or "Ciudad de" in Spanish)... @@ -237,7 +364,13 @@ export const localizedNameWithLocalGloss = [ ["var", "diacriticInsensitiveCollator"] ), // ...then replace the common suffix with the local name. - overwriteSuffixExpression(["var", "localizedName"], ["get", "name"]), + [ + "format", + overwriteSuffixExpression( + ["var", "localizedName"], + listValuesExpression(["get", "name"], "\n") + ), + ], // Otherwise, gloss the name in the local language if it differs from the // localized name. [ @@ -252,7 +385,7 @@ export const localizedNameWithLocalGloss = [ // bother rendering it. ["concat", ["slice", ["var", "localizedName"], 0, 1], " "], { "font-scale": 0.001 }, - ["get", "name"], + listValuesExpression(["get", "name"], " \u2022 "), { "font-scale": 0.8 }, ["concat", " ", ["slice", ["var", "localizedName"], 0, 1]], { "font-scale": 0.001 }, diff --git a/src/layer/transportation_label.js b/src/layer/transportation_label.js index 0298f0b09..e3f45f4a1 100644 --- a/src/layer/transportation_label.js +++ b/src/layer/transportation_label.js @@ -80,7 +80,7 @@ export const label = { ["literal", ["OpenHistorical Italic"]], ["literal", ["OpenHistorical"]], ], - "text-field": [...Label.localizedName], + "text-field": [...Label.localizedNameInline], "text-max-angle": 20, "symbol-placement": "line", "text-size": [ diff --git a/test/spec/label.js b/test/spec/label.js index 22910f60b..28acf42d0 100644 --- a/test/spec/label.js +++ b/test/spec/label.js @@ -272,9 +272,9 @@ describe("label", function () { let evaluatedLabelAndGloss = (locales, properties) => { let evaluated = evaluatedExpression(locales, properties); if (typeof evaluated === "string") { - return [evaluated, null]; + return [evaluated]; } - return [evaluated.sections[0].text, evaluated.sections[4].text]; + return [evaluated.sections[0].text, evaluated.sections[4]?.text]; }; let expectGloss = ( @@ -295,11 +295,12 @@ describe("label", function () { }; it("puts an unlocalized name by itself", function () { - expect( - evaluatedExpression(["en"], { - name: "Null Island", - }) - ).to.be.eql("Null Island"); + let evaluated = evaluatedExpression(["en"], { + name: "Null Island", + }); + + expect(evaluated.sections.length).to.be.eql(1); + expect(evaluated.sections[0].text).to.be.eql("Null Island"); }); it("glosses an anglicized name with the local name", function () { let evaluated = evaluatedExpression(["en"], { @@ -321,26 +322,19 @@ describe("label", function () { expect(evaluated.sections[5].scale).to.be.below(0.1); }); it("deduplicates matching anglicized and local names", function () { - expectGloss("en", "Null Island", "Null Island", "Null Island", null); - expectGloss("en", "Null Island", "NULL Island", "Null Island", null); - expectGloss("en", "Montreal", "Montréal", "Montréal", null); - expectGloss("en", "Quebec City", "Québec", "Québec City", null); - expectGloss("en", "Da Nang", "Đà Nẵng", "Đà Nẵng", null); - expectGloss("en", "Nūll Island", "Ñüłl Íşlåńđ", "Ñüłl Íşlåńđ", null); - expectGloss("en", "New York City", "New York", "New York City", null); - expectGloss( - "en", - "Washington, D.C.", - "Washington", - "Washington, D.C.", - null - ); + expectGloss("en", "Null Island", "Null Island", "Null Island"); + expectGloss("en", "Null Island", "NULL Island", "Null Island"); + expectGloss("en", "Montreal", "Montréal", "Montréal"); + expectGloss("en", "Quebec City", "Québec", "Québec City"); + expectGloss("en", "Da Nang", "Đà Nẵng", "Đà Nẵng"); + expectGloss("en", "Nūll Island", "Ñüłl Íşlåńđ", "Ñüłl Íşlåńđ"); + expectGloss("en", "New York City", "New York", "New York City"); + expectGloss("en", "Washington, D.C.", "Washington", "Washington, D.C."); expectGloss( "en", "Santiago de Querétaro", "Querétaro", - "Santiago de Querétaro", - null + "Santiago de Querétaro" ); // Suboptimal but expected cases @@ -369,4 +363,163 @@ describe("label", function () { expectGloss("pl", "Jurmała", "Jūrmala", "Jurmała", "Jūrmala"); }); }); + + describe("#replaceExpression", function () { + let evaluatedExpression = ( + haystack, + needle, + replacement, + haystackStart, + numReplacements + ) => + expression + .createExpression( + localizedTextField( + [ + ...Label.replaceExpression( + haystack, + needle, + replacement, + haystackStart, + numReplacements + ), + ], + ["en"] + ) + ) + .value.expression.evaluate(expressionContext({})); + + it("returns the haystack verbatim when there is nothing to replace", function () { + expect(evaluatedExpression("ABC;DEF;GHI", ";", "*", 0, -1)).to.be.eql( + "ABC;DEF;GHI" + ); + expect(evaluatedExpression("ABC;DEF;GHI", ";", "*", 0, 0)).to.be.eql( + "ABC;DEF;GHI" + ); + }); + + it("returns an empty haystack verbatim", function () { + expect(evaluatedExpression("", ";", "*", 0, -1)).to.be.eql(""); + expect(evaluatedExpression("", ";", "*", 0, 0)).to.be.eql(""); + expect(evaluatedExpression("", ";", "*", 0, 1)).to.be.eql(""); + expect(evaluatedExpression("", ";", "*", 0, 2)).to.be.eql(""); + }); + + it("replaces one occurrence", function () { + expect(evaluatedExpression("ABC;DEF;GHI", ";", "*", 0, 1)).to.be.eql( + "ABC*DEF;GHI" + ); + }); + + it("replaces multiple occurrences", function () { + expect(evaluatedExpression("ABC;DEF;GHI", ";", "*", 0, 2)).to.be.eql( + "ABC*DEF*GHI" + ); + expect(evaluatedExpression("ABC;DEF;GHI", ";", "*", 0, 3)).to.be.eql( + "ABC*DEF*GHI" + ); + expect(evaluatedExpression("ABC;DEF;GHI", ";", "*", 0, 10)).to.be.eql( + "ABC*DEF*GHI" + ); + }); + + it("replaces adjacent occurrences", function () { + expect(evaluatedExpression("ABC;;;DEF;GHI", ";", "*", 0, 2)).to.be.eql( + "ABC**;DEF;GHI" + ); + }); + + it("replaces at the beginning of the haystack", function () { + expect(evaluatedExpression(";DEF;GHI", ";", "*", 0, 1)).to.be.eql( + "*DEF;GHI" + ); + }); + + it("replaces at the end of the haystack", function () { + expect(evaluatedExpression("ABC;", ";", "*", 0, 1)).to.be.eql("ABC*"); + }); + + it("replaces the whole haystack", function () { + expect(evaluatedExpression(";", ";", "*", 0, 1)).to.be.eql("*"); + expect(evaluatedExpression(";;;", ";", "*", 0, 3)).to.be.eql("***"); + }); + + it("is case-sensitive", function () { + expect(evaluatedExpression("ABC", "b", "*", 0, 1)).to.be.eql("ABC"); + }); + + it("replaces multiple characters", function () { + expect(evaluatedExpression("ABC;;DEF", ";;", "/", 0, 1)).to.be.eql( + "ABC/DEF" + ); + }); + + it("replaces needle expression", function () { + expect( + evaluatedExpression("ABC;DEF", ["concat", ";"], "*", 0, 1) + ).to.be.eql("ABC*DEF"); + }); + }); + + describe("#listValuesExpression", function () { + let evaluatedExpression = (valueList, separator) => + expression + .createExpression( + localizedTextField( + [...Label.listValuesExpression(valueList, separator)], + ["en"] + ) + ) + .value.expression.evaluate(expressionContext({})); + + it("lists an empty list", function () { + expect(evaluatedExpression("", ", ")).to.be.eql(""); + }); + + it("lists a single value", function () { + expect(evaluatedExpression("ABC", ", ")).to.be.eql("ABC"); + }); + + it("lists empty values", function () { + expect(evaluatedExpression(";", ", ")).to.be.eql(", "); + }); + + it("lists multiple values", function () { + expect(evaluatedExpression("ABC;DEF", ", ")).to.be.eql("ABC, DEF"); + expect(evaluatedExpression("ABC;DEF;GHI", ", ")).to.be.eql( + "ABC, DEF, GHI" + ); + }); + + it("ignores an escaped semicolon", function () { + expect(evaluatedExpression("ABC;;DEF", ", ")).to.be.eql("ABC;DEF"); + expect(evaluatedExpression("ABC;;DEF;GHI", ", ")).to.be.eql( + "ABC;DEF, GHI" + ); + expect(evaluatedExpression("ABC;DEF;;GHI", ", ")).to.be.eql( + "ABC, DEF;GHI" + ); + expect(evaluatedExpression("ABC;;DEF;;GHI", ", ")).to.be.eql( + "ABC;DEF;GHI" + ); + }); + + it("lists a maximum number of values", function () { + // https://www.openstreetmap.org/node/9816809799 + expect( + evaluatedExpression( + "马岔河村;菜园村;刘灿东村;后于口村;王石楼村;李岔河村;岔河新村;富康新村;前鱼口村", + "、" + ) + ).to.be.eql( + "马岔河村、菜园村、刘灿东村、后于口村、王石楼村、李岔河村、岔河新村、富康新村、前鱼口村" + ); + expect( + evaluatedExpression( + "one;two;three;four;five;six;seven;eight;nine;ten", + ", " + ) + ).to.be.eql("one, two, three, four, five, six, seven, eight, nine;ten"); + }); + }); });