diff --git a/rest/src/main/groovy/whelk/rest/api/SearchUtils2.java b/rest/src/main/groovy/whelk/rest/api/SearchUtils2.java index 23004e9ccb..612a5492fd 100644 --- a/rest/src/main/groovy/whelk/rest/api/SearchUtils2.java +++ b/rest/src/main/groovy/whelk/rest/api/SearchUtils2.java @@ -63,19 +63,12 @@ Map doSearch(Map queryParameters) throws Inval Map esQueryDsl = getEsQueryDsl(qTree, queryParams, appParams.statsRepr); - QueryResult queryRes = new QueryResult(queryUtil.query(esQueryDsl)); + QueryResult queryRes = new QueryResult(queryUtil.query(esQueryDsl), queryParams.debug); Map partialCollectionView = getPartialCollectionView(queryRes, qTree, queryParams, appParams); - Map debugView = new HashMap<>(); if (queryParams.debug.contains(QueryParams.Debug.ES_QUERY)) { - debugView.put(QueryParams.Debug.ES_QUERY, esQueryDsl); - } - if (queryParams.debug.contains(QueryParams.Debug.ES_SCORE)) { - debugView.put(QueryParams.Debug.ES_SCORE, queryRes.scores); - } - if (!debugView.isEmpty()) { - partialCollectionView.put(QueryParams.ApiParams.DEBUG, debugView); + partialCollectionView.put(QueryParams.ApiParams.DEBUG, Map.of(QueryParams.Debug.ES_QUERY, esQueryDsl)); } return partialCollectionView; diff --git a/whelk-core/src/main/groovy/whelk/search2/QueryResult.java b/whelk-core/src/main/groovy/whelk/search2/QueryResult.java index 89a5c1b167..46b5effb38 100644 --- a/whelk-core/src/main/groovy/whelk/search2/QueryResult.java +++ b/whelk-core/src/main/groovy/whelk/search2/QueryResult.java @@ -2,36 +2,46 @@ import whelk.Document; import whelk.JsonLd; +import whelk.util.DocumentUtil; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; import java.util.Collections; +import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.function.Function; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.Collectors; import static whelk.search2.QueryUtil.castToStringObjectMap; import static whelk.util.DocumentUtil.getAtPath; +import static whelk.util.DocumentUtil.traverse; public class QueryResult { public final int numHits; - private final List esItems; public final List aggs; public final List pAggs; public final List spell; - public final List> scores; - public QueryResult(Map esResponse) { + private final List esItems; + private final List debug; + + public QueryResult(Map esResponse, List debug) { var normResponse = normalizeResponse(esResponse); + this.debug = debug; this.numHits = getNumHits(normResponse); this.esItems = collectEsItems(normResponse); this.aggs = Aggs.collectAggResult(normResponse); this.pAggs = Aggs.collectPAggResult(normResponse); this.spell = Spell.collectSuggestions(normResponse); - this.scores = collectScores(normResponse); + } + + public QueryResult(Map esResponse) { + this(esResponse, List.of()); } public List> collectItems(Function, Map> applyLens) { @@ -42,28 +52,23 @@ private static int getNumHits(Map esResponse) { return (int) getAtPath(esResponse, List.of("hits", "total", "value"), 1); } - private static List collectEsItems(Map esResponse) { + private List collectEsItems(Map esResponse) { return ((List) getAtPath(esResponse, List.of("hits", "hits"), Collections.emptyList())) .stream() .map(Map.class::cast) .map(hit -> { var item = castToStringObjectMap(hit.get("_source")); item.put("_id", hit.get("_id")); + if (debug.contains(QueryParams.Debug.ES_SCORE)) { + item.put("_score", hit.get("_score")); + item.put("_explanation", hit.get("_explanation")); + } return item; }) .map(EsItem::new) .toList(); } - private static List> collectScores(Map esResponse) { - return ((List) getAtPath(esResponse, List.of("hits", "hits"), Collections.emptyList())) - .stream() - .filter(m -> ((Map) m).get("_score") != null) - .map(QueryUtil::castToStringObjectMap) - .filter(m -> m.keySet().retainAll(List.of("_id", "_score", "_explanation"))) - .toList(); - } - private static Map normalizeResponse(Map esResponse) { var norm = new LinkedHashMap(); esResponse.forEach((k, v) -> @@ -76,19 +81,17 @@ private static Map normalizeResponse(Map esResponse) { return norm; } - static class EsItem { - private final Map map; - - EsItem(Map map) { - this.map = map; - } - + private record EsItem(Map map) { private Map toLd(Function, Map> applyLens) { LdItem ldItem = new LdItem(applyLens.apply(map)); + // ISNIs and ORCIDs are indexed with and without spaces, remove the one with spaces. ldItem.normalizeIsniAndOrcid(); // reverseLinks must be re-added because they might get filtered out in applyLens(). getReverseLinks().ifPresent(ldItem::addReverseLinks); + + getScoreExplanation().ifPresent(ldItem::addScore); + return ldItem.map; } @@ -96,9 +99,14 @@ private Optional> getReverseLinks() { return Optional.ofNullable(map.get("reverseLinks")) .map(QueryUtil::castToStringObjectMap); } + + private Optional> getScoreExplanation() { + return Optional.ofNullable(map.get("_explanation")) + .map(QueryUtil::castToStringObjectMap); + } } - static class LdItem { + private static class LdItem { private final Map map; LdItem(Map map) { @@ -127,6 +135,43 @@ private void addReverseLinks(Map reverseLinks) { map.put("reverseLinks", reverseLinks); } + private void addScore(Map scoreExplanation) { + var scorePerField = getScorePerField(scoreExplanation); + var totalScore = scorePerField.values().stream().reduce((double) 0, Double::sum); + var scoreData = Map.of("_total", totalScore, "_perField", scorePerField, "_explain", scoreExplanation); + map.put("_debug", Map.of("_score", scoreData)); + } + + private static Map getScorePerField(Map scoreExplanation) { + Map scorePerField = new HashMap<>(); + + traverse(scoreExplanation, (value, path) -> { + if (value instanceof Map m) { + String description = (String) m.get("description"); + if (description.contains("[PerFieldSimilarity]")) { + Double score = (Double) m.get("value"); + if (score > 0) { + scorePerField.put(parseField(description), score); + } + } + } + return new DocumentUtil.Nop(); + }); + + return scorePerField.entrySet() + .stream() + .sorted(Map.Entry.comparingByValue(Collections.reverseOrder())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (o, n) -> n, LinkedHashMap::new)); + } + + private static String parseField(String description) { + Matcher m = Pattern.compile("^weight\\(.+:((\".+\")|[^ ]+)").matcher(description); + if (m.find()) { + return m.group().replace("weight(", ""); + } + return description; + } + private static String makeFindOLink(String iri) { return Document.getBASE_URI() .resolve("find?o=" + URLEncoder.encode(iri, StandardCharsets.UTF_8)) diff --git a/whelk-core/src/main/groovy/whelk/search2/parse/Lex.java b/whelk-core/src/main/groovy/whelk/search2/parse/Lex.java index 8b7eee5192..d9ea32e5ff 100644 --- a/whelk-core/src/main/groovy/whelk/search2/parse/Lex.java +++ b/whelk-core/src/main/groovy/whelk/search2/parse/Lex.java @@ -161,9 +161,6 @@ else if (c == '\\') { // char escaping ... // These words (when not quoted) are keywords switch (symbolValue.toString()) { - case "and": - case "or": - case "not": case "AND": case "OR": case "NOT": diff --git a/whelk-core/src/main/groovy/whelk/search2/querytree/InvalidKey.java b/whelk-core/src/main/groovy/whelk/search2/querytree/InvalidKey.java new file mode 100644 index 0000000000..49c29385c2 --- /dev/null +++ b/whelk-core/src/main/groovy/whelk/search2/querytree/InvalidKey.java @@ -0,0 +1,18 @@ +package whelk.search2.querytree; + +import java.util.LinkedHashMap; +import java.util.Map; + +import static whelk.JsonLd.TYPE_KEY; + +public sealed interface InvalidKey extends PropertyLike { + record UnrecognizedKey(String name) implements InvalidKey {} + record AmbiguousKey(String name) implements InvalidKey {} + + default Map definition() { + var m = new LinkedHashMap(); + m.put(TYPE_KEY, "_Invalid"); + m.put("label", name()); + return m; + } +} \ No newline at end of file diff --git a/whelk-core/src/main/groovy/whelk/search2/querytree/InvalidValue.java b/whelk-core/src/main/groovy/whelk/search2/querytree/InvalidValue.java new file mode 100644 index 0000000000..bfd7cd15db --- /dev/null +++ b/whelk-core/src/main/groovy/whelk/search2/querytree/InvalidValue.java @@ -0,0 +1,22 @@ +package whelk.search2.querytree; + +import java.util.LinkedHashMap; +import java.util.Map; + +import static whelk.JsonLd.TYPE_KEY; + +sealed interface InvalidValue extends Value { + record ForbiddenValue(String string) implements InvalidValue {} + record AmbiguousValue(String string) implements InvalidValue {} + + @Override + String string(); + + @Override + default Object description() { + var m = new LinkedHashMap(); + m.put(TYPE_KEY, "_Invalid"); + m.put("label", string()); + return m; + } +} diff --git a/whelk-core/src/main/groovy/whelk/search2/querytree/Path.java b/whelk-core/src/main/groovy/whelk/search2/querytree/Path.java index 0c71bc00ab..56f299493e 100644 --- a/whelk-core/src/main/groovy/whelk/search2/querytree/Path.java +++ b/whelk-core/src/main/groovy/whelk/search2/querytree/Path.java @@ -26,7 +26,7 @@ public Path(List path) { @Override public String toString() { return path.stream() - .map(x -> x instanceof Property ? ((Property) x).name() : (String) x) + .map(x -> x instanceof PropertyLike p ? p.name() : (String) x) .map(this::substitute) .collect(Collectors.joining(".")); } diff --git a/whelk-core/src/main/groovy/whelk/search2/querytree/PathValue.java b/whelk-core/src/main/groovy/whelk/search2/querytree/PathValue.java index d52177aec3..7037acd77c 100644 --- a/whelk-core/src/main/groovy/whelk/search2/querytree/PathValue.java +++ b/whelk-core/src/main/groovy/whelk/search2/querytree/PathValue.java @@ -54,14 +54,10 @@ public Map toSearchMapping(QueryTree qt, Map non var propertyChainAxiom = new LinkedList<>(); for (int i = getPath().size() - 1; i >= 0; i--) { - var property = Optional.of(getPath().get(i)) - .filter(x -> x instanceof Property) - .map(Property.class::cast); - - if (property.isPresent()) { + if (getPath().get(i) instanceof PropertyLike property) { propertyChainAxiom.push(i > 0 && getPath().get(i - 1).equals(JsonLd.REVERSE_KEY) - ? Map.of("inverseOf", property.get().definition()) - : property.get().definition()); + ? Map.of("inverseOf", property.definition()) + : property.definition()); } } diff --git a/whelk-core/src/main/groovy/whelk/search2/querytree/Property.java b/whelk-core/src/main/groovy/whelk/search2/querytree/Property.java index 4fe2f7d10d..80baecd3cf 100644 --- a/whelk-core/src/main/groovy/whelk/search2/querytree/Property.java +++ b/whelk-core/src/main/groovy/whelk/search2/querytree/Property.java @@ -13,7 +13,8 @@ import static whelk.JsonLd.asList; import static whelk.search2.Disambiguate.Rdfs.RDF_TYPE; -public class Property { + +public class Property implements PropertyLike { private final String name; private Map definition; private boolean isVocabTerm; diff --git a/whelk-core/src/main/groovy/whelk/search2/querytree/PropertyLike.java b/whelk-core/src/main/groovy/whelk/search2/querytree/PropertyLike.java new file mode 100644 index 0000000000..a3a2801e8d --- /dev/null +++ b/whelk-core/src/main/groovy/whelk/search2/querytree/PropertyLike.java @@ -0,0 +1,8 @@ +package whelk.search2.querytree; + +import java.util.Map; + +public interface PropertyLike { + String name(); + Map definition(); +} diff --git a/whelk-core/src/main/groovy/whelk/search2/querytree/QueryTreeBuilder.java b/whelk-core/src/main/groovy/whelk/search2/querytree/QueryTreeBuilder.java index 64c7ce6129..81b1244e3a 100644 --- a/whelk-core/src/main/groovy/whelk/search2/querytree/QueryTreeBuilder.java +++ b/whelk-core/src/main/groovy/whelk/search2/querytree/QueryTreeBuilder.java @@ -91,10 +91,9 @@ private static PathValue buildPathValue(Ast.Code c, Disambiguate disambiguate) t } else { var ambiguous = disambiguate.getAmbiguousPropertyMapping(part); if (ambiguous.isEmpty()) { - throw new InvalidQueryException("Unrecognized property alias: " + part); + path.add(new InvalidKey.UnrecognizedKey(part)); } else { - throw new InvalidQueryException("\"" + part + "\" maps to multiple properties: " + ambiguous + "," + - " please specify which one is meant."); + path.add(new InvalidKey.AmbiguousKey(part)); } } } @@ -117,26 +116,18 @@ private static Value buildValue(Property property, String value, Disambiguate di if (mappedType.isPresent()) { return new VocabTerm(mappedType.get(), disambiguate.getDefinition(mappedType.get())); } else { - var ambiguous = disambiguate.getAmbiguousClassMapping(value); - if (ambiguous.isEmpty()) { - throw new InvalidQueryException("Unrecognized type: " + value); - } else { - throw new InvalidQueryException("\"" + value + "\" maps to multiple types: " + ambiguous + "," + - " please specify which one is meant."); - } + return disambiguate.getAmbiguousClassMapping(value).isEmpty() + ? new InvalidValue.ForbiddenValue(value) + : new InvalidValue.AmbiguousValue(value); } } else if (property.isVocabTerm()) { Optional mappedEnum = disambiguate.mapToEnum(value); if (mappedEnum.isPresent()) { return new VocabTerm(mappedEnum.get(), disambiguate.getDefinition(mappedEnum.get())); } else { - var ambiguous = disambiguate.getAmbiguousEnumMapping(value); - if (ambiguous.isEmpty()) { - throw new InvalidQueryException("Invalid value " + value + " for property " + property); - } else { - throw new InvalidQueryException("\"" + value + "\" maps to multiple types: " + ambiguous + "," + - " please specify which one is meant."); - } + return disambiguate.getAmbiguousEnumMapping(value).isEmpty() + ? new InvalidValue.ForbiddenValue(value) + : new InvalidValue.AmbiguousValue(value); } } // Expand and encode URIs, e.g. sao:Hästar -> https://id.kb.se/term/sao/H%C3%A4star diff --git a/whelk-core/src/main/groovy/whelk/search2/querytree/Value.java b/whelk-core/src/main/groovy/whelk/search2/querytree/Value.java index 8df6b4cb54..ac1a1c295c 100644 --- a/whelk-core/src/main/groovy/whelk/search2/querytree/Value.java +++ b/whelk-core/src/main/groovy/whelk/search2/querytree/Value.java @@ -1,6 +1,6 @@ package whelk.search2.querytree; -public sealed interface Value permits Link, Literal, VocabTerm { +public sealed interface Value permits Link, Literal, InvalidValue, VocabTerm { String string(); Object description(); diff --git a/whelk-core/src/test/groovy/whelk/search2/parse/AstSpec.groovy b/whelk-core/src/test/groovy/whelk/search2/parse/AstSpec.groovy index a73251f695..2aacc06281 100644 --- a/whelk-core/src/test/groovy/whelk/search2/parse/AstSpec.groovy +++ b/whelk-core/src/test/groovy/whelk/search2/parse/AstSpec.groovy @@ -8,7 +8,7 @@ class AstSpec extends Specification { def "normal tree"() { given: - def input = "AAA BBB and (CCC or DDD)" + def input = "AAA BBB AND (CCC OR DDD)" def lexedSymbols = Lex.lexQuery(input) Parse.OrComb parseTree = Parse.parseQuery(lexedSymbols) Ast.Node ast = Ast.buildFrom(parseTree) @@ -114,7 +114,7 @@ class AstSpec extends Specification { def "Flatten code groups"() { given: - def input = "AAA:(BBB and CCC)" + def input = "AAA:(BBB AND CCC)" def lexedSymbols = Lex.lexQuery(input) Parse.OrComb parseTree = Parse.parseQuery(lexedSymbols) Ast.Node ast = Ast.buildFrom(parseTree) @@ -130,7 +130,7 @@ class AstSpec extends Specification { def "Flatten code groups2"() { given: - def input = "author:(Alice and (Bob or Cecilia))" + def input = "author:(Alice AND (Bob OR Cecilia))" def lexedSymbols = Lex.lexQuery(input) Parse.OrComb parseTree = Parse.parseQuery(lexedSymbols) Ast.Node ast = Ast.buildFrom(parseTree) @@ -149,7 +149,7 @@ class AstSpec extends Specification { def "Flatten code groups3"() { given: - def input = "author:(Alice and (Bob or Cecilia) and not David)" + def input = "author:(Alice AND (Bob OR Cecilia) AND NOT David)" def lexedSymbols = Lex.lexQuery(input) Parse.OrComb parseTree = Parse.parseQuery(lexedSymbols) Ast.Node ast = Ast.buildFrom(parseTree) @@ -169,7 +169,7 @@ class AstSpec extends Specification { def "Flatten code groups4"() { given: - def input = "\"everything\" or author:(Alice and (Bob or Cecilia) and not David)" + def input = "\"everything\" OR author:(Alice AND (Bob OR Cecilia) AND NOT David)" def lexedSymbols = Lex.lexQuery(input) Parse.OrComb parseTree = Parse.parseQuery(lexedSymbols) Ast.Node ast = Ast.buildFrom(parseTree) @@ -193,7 +193,7 @@ class AstSpec extends Specification { def "flatten negations"() { given: - def input = "\"everything\" and not (author:Alice and published > 2022)" + def input = "\"everything\" AND NOT (author:Alice AND published > 2022)" def lexedSymbols = Lex.lexQuery(input) Parse.OrComb parseTree = Parse.parseQuery(lexedSymbols) Ast.Node ast = Ast.flattenCodes(Ast.buildFrom(parseTree)) @@ -214,7 +214,7 @@ class AstSpec extends Specification { def "flatten negations 2"() { given: - def input = "\"everything\" and !(author:Alice and not published: 2022)" + def input = "\"everything\" AND !(author:Alice AND NOT published: 2022)" def lexedSymbols = Lex.lexQuery(input) Parse.OrComb parseTree = Parse.parseQuery(lexedSymbols) Ast.Node ast = Ast.flattenCodes(Ast.buildFrom(parseTree)) @@ -235,7 +235,7 @@ class AstSpec extends Specification { def "flatten negations 3"() { given: - def input = "!(author:Alice and \"everything\" and not \"something\")" + def input = "!(author:Alice AND \"everything\" AND NOT \"something\")" def lexedSymbols = Lex.lexQuery(input) Parse.OrComb parseTree = Parse.parseQuery(lexedSymbols) Ast.Node ast = Ast.flattenCodes(Ast.buildFrom(parseTree)) diff --git a/whelk-core/src/test/groovy/whelk/search2/parse/ParseSpec.groovy b/whelk-core/src/test/groovy/whelk/search2/parse/ParseSpec.groovy index 02238c043b..ab6fb1b519 100644 --- a/whelk-core/src/test/groovy/whelk/search2/parse/ParseSpec.groovy +++ b/whelk-core/src/test/groovy/whelk/search2/parse/ParseSpec.groovy @@ -9,7 +9,7 @@ class ParseSpec extends Specification { def "normal parse"() { given: - def input = "AAA BBB and (CCC or DDD)" + def input = "AAA BBB AND (CCC OR DDD)" def lexedSymbols = Lex.lexQuery(input) Parse.OrComb parseTree = Parse.parseQuery(lexedSymbols) @@ -19,7 +19,7 @@ class ParseSpec extends Specification { def "implicit and group"() { given: - def input = "AAA BBB (CCC or DDD)" + def input = "AAA BBB (CCC OR DDD)" def lexedSymbols = Lex.lexQuery(input) Parse.OrComb parseTree = Parse.parseQuery(lexedSymbols) @@ -39,7 +39,7 @@ class ParseSpec extends Specification { def "parse negative2"() { given: - def input = "not AAA" + def input = "NOT AAA" def lexedSymbols = Lex.lexQuery(input) Parse.OrComb parseTree = Parse.parseQuery(lexedSymbols) @@ -49,7 +49,7 @@ class ParseSpec extends Specification { def "parse negative3"() { given: - def input = "not (AAA)" + def input = "NOT (AAA)" def lexedSymbols = Lex.lexQuery(input) Parse.OrComb parseTree = Parse.parseQuery(lexedSymbols) @@ -59,7 +59,7 @@ class ParseSpec extends Specification { def "crazy grouping"() { given: - def input = "AAA BBB and (CCC or DDD or (EEE) AND (FFF OR GGG))" + def input = "AAA BBB AND (CCC OR DDD OR (EEE) AND (FFF OR GGG))" def lexedSymbols = Lex.lexQuery(input) Parse.OrComb parseTree = Parse.parseQuery(lexedSymbols) @@ -69,7 +69,7 @@ class ParseSpec extends Specification { def "fail crazy grouping with bad parens"() { given: - def input = "AAA BBB and (CCC or DDD or (EEE) AND (FFF OR GGG)" + def input = "AAA BBB AND (CCC OR DDD OR (EEE) AND (FFF OR GGG)" def lexedSymbols = Lex.lexQuery(input) when: @@ -171,7 +171,7 @@ class ParseSpec extends Specification { def "code group2"() { given: - def input = "förf:(AAA or BBB and CCC)" + def input = "förf:(AAA OR BBB AND CCC)" def lexedSymbols = Lex.lexQuery(input) Parse.OrComb parseTree = Parse.parseQuery(lexedSymbols) @@ -192,7 +192,7 @@ class ParseSpec extends Specification { def "Bad use of code2"() { given: - def input = "AAA or förf:" + def input = "AAA OR förf:" def lexedSymbols = Lex.lexQuery(input) when: @@ -203,7 +203,7 @@ class ParseSpec extends Specification { def "Don't parse missing or-tail"() { given: - def input = "AAA BBB and (CCC or)" + def input = "AAA BBB AND (CCC OR)" def lexedSymbols = Lex.lexQuery(input) when: @@ -214,7 +214,7 @@ class ParseSpec extends Specification { def "Don't parse missing and-tail"() { given: - def input = "AAA BBB and" + def input = "AAA BBB AND" def lexedSymbols = Lex.lexQuery(input) when: