Skip to content

Commit

Permalink
Merge branch 'develop' into feature/vocab-guided-query-expansion
Browse files Browse the repository at this point in the history
  • Loading branch information
kwahlin committed Dec 19, 2024
2 parents 0329c0c + bb09334 commit 32af60a
Show file tree
Hide file tree
Showing 13 changed files with 150 additions and 79 deletions.
11 changes: 2 additions & 9 deletions rest/src/main/groovy/whelk/rest/api/SearchUtils2.java
Original file line number Diff line number Diff line change
Expand Up @@ -63,19 +63,12 @@ Map<String, Object> doSearch(Map<String, String[]> queryParameters) throws Inval

Map<String, Object> esQueryDsl = getEsQueryDsl(qTree, queryParams, appParams.statsRepr);

QueryResult queryRes = new QueryResult(queryUtil.query(esQueryDsl));
QueryResult queryRes = new QueryResult(queryUtil.query(esQueryDsl), queryParams.debug);

Map<String, Object> partialCollectionView = getPartialCollectionView(queryRes, qTree, queryParams, appParams);

Map<String, Object> debugView = new HashMap<>();
if (queryParams.debug.contains(QueryParams.Debug.ES_QUERY)) {
debugView.put(QueryParams.Debug.ES_QUERY, esQueryDsl);
}
if (queryParams.debug.contains(QueryParams.Debug.ES_SCORE)) {
debugView.put(QueryParams.Debug.ES_SCORE, queryRes.scores);
}
if (!debugView.isEmpty()) {
partialCollectionView.put(QueryParams.ApiParams.DEBUG, debugView);
partialCollectionView.put(QueryParams.ApiParams.DEBUG, Map.of(QueryParams.Debug.ES_QUERY, esQueryDsl));
}

return partialCollectionView;
Expand Down
89 changes: 67 additions & 22 deletions whelk-core/src/main/groovy/whelk/search2/QueryResult.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,46 @@

import whelk.Document;
import whelk.JsonLd;
import whelk.util.DocumentUtil;

import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import static whelk.search2.QueryUtil.castToStringObjectMap;
import static whelk.util.DocumentUtil.getAtPath;
import static whelk.util.DocumentUtil.traverse;

public class QueryResult {
public final int numHits;
private final List<EsItem> esItems;
public final List<Aggs.Aggregation> aggs;
public final List<Aggs.Bucket> pAggs;
public final List<Spell.Suggestion> spell;
public final List<Map<String, Object>> scores;

public QueryResult(Map<?, ?> esResponse) {
private final List<EsItem> esItems;
private final List<String> debug;

public QueryResult(Map<?, ?> esResponse, List<String> debug) {
var normResponse = normalizeResponse(esResponse);
this.debug = debug;
this.numHits = getNumHits(normResponse);
this.esItems = collectEsItems(normResponse);
this.aggs = Aggs.collectAggResult(normResponse);
this.pAggs = Aggs.collectPAggResult(normResponse);
this.spell = Spell.collectSuggestions(normResponse);
this.scores = collectScores(normResponse);
}

public QueryResult(Map<?, ?> esResponse) {
this(esResponse, List.of());
}

public List<Map<String, Object>> collectItems(Function<Map<String, Object>, Map<String, Object>> applyLens) {
Expand All @@ -42,28 +52,23 @@ private static int getNumHits(Map<String, Object> esResponse) {
return (int) getAtPath(esResponse, List.of("hits", "total", "value"), 1);
}

private static List<EsItem> collectEsItems(Map<String, Object> esResponse) {
private List<EsItem> collectEsItems(Map<String, Object> esResponse) {
return ((List<?>) getAtPath(esResponse, List.of("hits", "hits"), Collections.emptyList()))
.stream()
.map(Map.class::cast)
.map(hit -> {
var item = castToStringObjectMap(hit.get("_source"));
item.put("_id", hit.get("_id"));
if (debug.contains(QueryParams.Debug.ES_SCORE)) {
item.put("_score", hit.get("_score"));
item.put("_explanation", hit.get("_explanation"));
}
return item;
})
.map(EsItem::new)
.toList();
}

private static List<Map<String, Object>> collectScores(Map<String, Object> esResponse) {
return ((List<?>) getAtPath(esResponse, List.of("hits", "hits"), Collections.emptyList()))
.stream()
.filter(m -> ((Map<?, ?>) m).get("_score") != null)
.map(QueryUtil::castToStringObjectMap)
.filter(m -> m.keySet().retainAll(List.of("_id", "_score", "_explanation")))
.toList();
}

private static Map<String, Object> normalizeResponse(Map<?, ?> esResponse) {
var norm = new LinkedHashMap<String, Object>();
esResponse.forEach((k, v) ->
Expand All @@ -76,29 +81,32 @@ private static Map<String, Object> normalizeResponse(Map<?, ?> esResponse) {
return norm;
}

static class EsItem {
private final Map<String, Object> map;

EsItem(Map<String, Object> map) {
this.map = map;
}

private record EsItem(Map<String, Object> map) {
private Map<String, Object> toLd(Function<Map<String, Object>, Map<String, Object>> applyLens) {
LdItem ldItem = new LdItem(applyLens.apply(map));

// ISNIs and ORCIDs are indexed with and without spaces, remove the one with spaces.
ldItem.normalizeIsniAndOrcid();
// reverseLinks must be re-added because they might get filtered out in applyLens().
getReverseLinks().ifPresent(ldItem::addReverseLinks);

getScoreExplanation().ifPresent(ldItem::addScore);

return ldItem.map;
}

private Optional<Map<String, Object>> getReverseLinks() {
return Optional.ofNullable(map.get("reverseLinks"))
.map(QueryUtil::castToStringObjectMap);
}

private Optional<Map<String, Object>> getScoreExplanation() {
return Optional.ofNullable(map.get("_explanation"))
.map(QueryUtil::castToStringObjectMap);
}
}

static class LdItem {
private static class LdItem {
private final Map<String, Object> map;

LdItem(Map<String, Object> map) {
Expand Down Expand Up @@ -127,6 +135,43 @@ private void addReverseLinks(Map<String, Object> reverseLinks) {
map.put("reverseLinks", reverseLinks);
}

private void addScore(Map<String, Object> scoreExplanation) {
var scorePerField = getScorePerField(scoreExplanation);
var totalScore = scorePerField.values().stream().reduce((double) 0, Double::sum);
var scoreData = Map.of("_total", totalScore, "_perField", scorePerField, "_explain", scoreExplanation);
map.put("_debug", Map.of("_score", scoreData));
}

private static Map<String, Double> getScorePerField(Map<String, Object> scoreExplanation) {
Map<String, Double> scorePerField = new HashMap<>();

traverse(scoreExplanation, (value, path) -> {
if (value instanceof Map<?, ?> m) {
String description = (String) m.get("description");
if (description.contains("[PerFieldSimilarity]")) {
Double score = (Double) m.get("value");
if (score > 0) {
scorePerField.put(parseField(description), score);
}
}
}
return new DocumentUtil.Nop();
});

return scorePerField.entrySet()
.stream()
.sorted(Map.Entry.comparingByValue(Collections.reverseOrder()))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (o, n) -> n, LinkedHashMap::new));
}

private static String parseField(String description) {
Matcher m = Pattern.compile("^weight\\(.+:((\".+\")|[^ ]+)").matcher(description);
if (m.find()) {
return m.group().replace("weight(", "");
}
return description;
}

private static String makeFindOLink(String iri) {
return Document.getBASE_URI()
.resolve("find?o=" + URLEncoder.encode(iri, StandardCharsets.UTF_8))
Expand Down
3 changes: 0 additions & 3 deletions whelk-core/src/main/groovy/whelk/search2/parse/Lex.java
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,6 @@ else if (c == '\\') { // char escaping ...

// These words (when not quoted) are keywords
switch (symbolValue.toString()) {
case "and":
case "or":
case "not":
case "AND":
case "OR":
case "NOT":
Expand Down
18 changes: 18 additions & 0 deletions whelk-core/src/main/groovy/whelk/search2/querytree/InvalidKey.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package whelk.search2.querytree;

import java.util.LinkedHashMap;
import java.util.Map;

import static whelk.JsonLd.TYPE_KEY;

public sealed interface InvalidKey extends PropertyLike {
record UnrecognizedKey(String name) implements InvalidKey {}
record AmbiguousKey(String name) implements InvalidKey {}

default Map<String, Object> definition() {
var m = new LinkedHashMap<String, Object>();
m.put(TYPE_KEY, "_Invalid");
m.put("label", name());
return m;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package whelk.search2.querytree;

import java.util.LinkedHashMap;
import java.util.Map;

import static whelk.JsonLd.TYPE_KEY;

sealed interface InvalidValue extends Value {
record ForbiddenValue(String string) implements InvalidValue {}
record AmbiguousValue(String string) implements InvalidValue {}

@Override
String string();

@Override
default Object description() {
var m = new LinkedHashMap<String, Object>();
m.put(TYPE_KEY, "_Invalid");
m.put("label", string());
return m;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public Path(List<Object> path) {
@Override
public String toString() {
return path.stream()
.map(x -> x instanceof Property ? ((Property) x).name() : (String) x)
.map(x -> x instanceof PropertyLike p ? p.name() : (String) x)
.map(this::substitute)
.collect(Collectors.joining("."));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,10 @@ public Map<String, Object> toSearchMapping(QueryTree qt, Map<String, String> non
var propertyChainAxiom = new LinkedList<>();

for (int i = getPath().size() - 1; i >= 0; i--) {
var property = Optional.of(getPath().get(i))
.filter(x -> x instanceof Property)
.map(Property.class::cast);

if (property.isPresent()) {
if (getPath().get(i) instanceof PropertyLike property) {
propertyChainAxiom.push(i > 0 && getPath().get(i - 1).equals(JsonLd.REVERSE_KEY)
? Map.of("inverseOf", property.get().definition())
: property.get().definition());
? Map.of("inverseOf", property.definition())
: property.definition());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
import static whelk.JsonLd.asList;
import static whelk.search2.Disambiguate.Rdfs.RDF_TYPE;

public class Property {

public class Property implements PropertyLike {
private final String name;
private Map<String, Object> definition;
private boolean isVocabTerm;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package whelk.search2.querytree;

import java.util.Map;

public interface PropertyLike {
String name();
Map<String, Object> definition();
}
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,9 @@ private static PathValue buildPathValue(Ast.Code c, Disambiguate disambiguate) t
} else {
var ambiguous = disambiguate.getAmbiguousPropertyMapping(part);
if (ambiguous.isEmpty()) {
throw new InvalidQueryException("Unrecognized property alias: " + part);
path.add(new InvalidKey.UnrecognizedKey(part));
} else {
throw new InvalidQueryException("\"" + part + "\" maps to multiple properties: " + ambiguous + "," +
" please specify which one is meant.");
path.add(new InvalidKey.AmbiguousKey(part));
}
}
}
Expand All @@ -117,26 +116,18 @@ private static Value buildValue(Property property, String value, Disambiguate di
if (mappedType.isPresent()) {
return new VocabTerm(mappedType.get(), disambiguate.getDefinition(mappedType.get()));
} else {
var ambiguous = disambiguate.getAmbiguousClassMapping(value);
if (ambiguous.isEmpty()) {
throw new InvalidQueryException("Unrecognized type: " + value);
} else {
throw new InvalidQueryException("\"" + value + "\" maps to multiple types: " + ambiguous + "," +
" please specify which one is meant.");
}
return disambiguate.getAmbiguousClassMapping(value).isEmpty()
? new InvalidValue.ForbiddenValue(value)
: new InvalidValue.AmbiguousValue(value);
}
} else if (property.isVocabTerm()) {
Optional<String> mappedEnum = disambiguate.mapToEnum(value);
if (mappedEnum.isPresent()) {
return new VocabTerm(mappedEnum.get(), disambiguate.getDefinition(mappedEnum.get()));
} else {
var ambiguous = disambiguate.getAmbiguousEnumMapping(value);
if (ambiguous.isEmpty()) {
throw new InvalidQueryException("Invalid value " + value + " for property " + property);
} else {
throw new InvalidQueryException("\"" + value + "\" maps to multiple types: " + ambiguous + "," +
" please specify which one is meant.");
}
return disambiguate.getAmbiguousEnumMapping(value).isEmpty()
? new InvalidValue.ForbiddenValue(value)
: new InvalidValue.AmbiguousValue(value);
}
}
// Expand and encode URIs, e.g. sao:Hästar -> https://id.kb.se/term/sao/H%C3%A4star
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package whelk.search2.querytree;

public sealed interface Value permits Link, Literal, VocabTerm {
public sealed interface Value permits Link, Literal, InvalidValue, VocabTerm {
String string();

Object description();
Expand Down
Loading

0 comments on commit 32af60a

Please sign in to comment.