diff --git a/app/controllers/HomeController.java b/app/controllers/HomeController.java index b4461e2..82fdba6 100644 --- a/app/controllers/HomeController.java +++ b/app/controllers/HomeController.java @@ -17,11 +17,8 @@ import org.apache.jena.atlas.web.HttpException; import org.elasticsearch.action.get.GetResponse; -import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.aggregations.Aggregation; -import org.elasticsearch.search.aggregations.AggregationBuilders; import org.elasticsearch.search.aggregations.bucket.terms.Terms; import org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket; @@ -51,7 +48,7 @@ */ public class HomeController extends Controller { - private static final String TYPE = "type"; + public static final String TYPE = "type"; @Inject Environment env; @@ -146,10 +143,7 @@ public Result gnd(String id) { } public Result search(String q, int from, int size, String format) { - SearchRequestBuilder requestBuilder = index.client().prepareSearch(config("index.name")) - .setQuery(QueryBuilders.queryStringQuery(q)).setFrom(from).setSize(size); - requestBuilder.addAggregation(AggregationBuilders.terms(TYPE).field(TYPE + ".raw").size(1000)); - SearchResponse response = requestBuilder.get(); + SearchResponse response = index.query(q, from, size); response().setHeader("Access-Control-Allow-Origin", "*"); return format.equals("html") ? htmlSearch(q, from, size, format, response) : ok(returnAsJson(q, response)).as(config("index.content")); diff --git a/app/modules/IndexComponent.java b/app/modules/IndexComponent.java index 2f03875..09a4149 100644 --- a/app/modules/IndexComponent.java +++ b/app/modules/IndexComponent.java @@ -23,20 +23,34 @@ import org.elasticsearch.action.admin.indices.refresh.RefreshRequest; import org.elasticsearch.action.bulk.BulkRequestBuilder; import org.elasticsearch.action.bulk.BulkResponse; +import org.elasticsearch.action.search.SearchRequestBuilder; +import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.Client; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.query.MatchQueryBuilder; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.index.query.QueryStringQueryBuilder; import org.elasticsearch.node.Node; import org.elasticsearch.node.internal.InternalSettingsPreparer; import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.search.aggregations.AggregationBuilders; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; +import controllers.HomeController; import play.Logger; import play.inject.ApplicationLifecycle; public interface IndexComponent { Client client(); + + SearchResponse query(String q, int from, int size); + + public default SearchResponse query(String q) { + return query(q, 0, 10); + } } class EmbeddedIndex implements IndexComponent { @@ -86,7 +100,8 @@ private void startup() { indexData(client, pathToJson, indexName); } } else { - Logger.info("Index exists. Delete the 'data/' directory to reindexfrom " + pathToJson); + Logger.info("Index exists. Delete the '" + config("index.home") + "/data' directory to reindex from " + + pathToJson); } if (new File(pathToUpdates).exists()) { Logger.info("Indexing updates from " + pathToUpdates); @@ -178,4 +193,18 @@ private static void executeBulk(int pendingIndexRequests) { } Logger.info("Indexed {} docs, took: {}", pendingIndexRequests, bulkResponse.getTook()); } + + @Override + public SearchResponse query(String q, int from, int size) { + MatchQueryBuilder preferredName = QueryBuilders.matchQuery("preferredName", q).boost(2); + QueryStringQueryBuilder queryStringQuery = QueryBuilders.queryStringQuery(q); + QueryBuilder query = QueryBuilders.boolQuery().should(preferredName).must(queryStringQuery) + .minimumNumberShouldMatch(0); + SearchRequestBuilder requestBuilder = client().prepareSearch(config("index.name")).setQuery(query).setFrom(from) + .setSize(size); + requestBuilder.addAggregation( + AggregationBuilders.terms(HomeController.TYPE).field(HomeController.TYPE + ".raw").size(1000)); + SearchResponse response = requestBuilder.get(); + return response; + } } \ No newline at end of file diff --git a/test/modules/IndexTest.java b/test/modules/IndexTest.java index 7617d19..391421b 100644 --- a/test/modules/IndexTest.java +++ b/test/modules/IndexTest.java @@ -5,8 +5,10 @@ import java.io.FileWriter; import java.io.IOException; +import org.elasticsearch.action.search.SearchResponse; import org.junit.AfterClass; import org.junit.Assert; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -15,26 +17,33 @@ import com.hp.hpl.jena.rdf.model.ModelFactory; import apps.Convert; +import models.AuthorityResource; import play.Application; +import play.api.inject.BindingKey; +import play.api.inject.DefaultApplicationLifecycle; import play.inject.guice.GuiceApplicationBuilder; import play.libs.Json; import play.test.WithApplication; public class IndexTest extends WithApplication { + private static final File[] TEST_FILES = new File("test/ttl").listFiles(); private static final String PATH = "GND.jsonl"; + private IndexComponent index; + @Override protected Application provideApplication() { // See // https://www.playframework.com/documentation/2.6.1/JavaDependencyInjection + // https://www.playframework.com/documentation/2.6.x/JavaTestingWithGuice return new GuiceApplicationBuilder().build(); } @BeforeClass public static void convert() throws IOException { try (FileWriter out = new FileWriter(PATH)) { - for (File file : new File("test/ttl").listFiles()) { + for (File file : TEST_FILES) { Model sourceModel = ModelFactory.createDefaultModel(); sourceModel.read(new FileReader(file), null, "TTL"); String id = file.getName().split("\\.")[0]; @@ -50,14 +59,44 @@ public static void convert() throws IOException { @AfterClass public static void cleanup() throws IOException { // TODO: use separate dir, override config in provideApplication + // https://www.playframework.com/documentation/2.6.x/JavaTestingWithGuice#configuration // FileUtils.deleteDirectory(new File("data")); } + @Before + public void setup() { + app.injector().instanceOf(DefaultApplicationLifecycle.class).stop(); + index = app.injector().instanceOf(new BindingKey<>(IndexComponent.class)); + } + @Test - public void test() { + public void testIndexData() { Assert.assertTrue("Index data file should exist", new File(PATH).exists()); System.out.println("Indexed from: " + PATH); - // TODO: inject index, check number of docs + } + + @Test + public void testTotalHits() { + Assert.assertEquals(TEST_FILES.length, index.query("*").getHits().getTotalHits()); + } + + @Test + public void testFieldQuery() { + Assert.assertEquals(1, index.query("preferredName:\"Weizenbaum, Joseph\"").getHits().getTotalHits()); + Assert.assertEquals(0, index.query("id:\"Weizenbaum, Joseph\"").getHits().getTotalHits()); + } + + @Test + public void testContextQuery() { + Assert.assertEquals(0, index.query("jsonld").getHits().getTotalHits()); + } + + @Test + public void testPerfectFieldMatch() { + SearchResponse response = index.query("london"); + Assert.assertEquals(2, response.getHits().getTotalHits()); + Assert.assertEquals("London", Json.fromJson(Json.parse(response.getHits().getHits()[0].getSourceAsString()), + AuthorityResource.class).preferredName.get(0)); } } diff --git a/test/ttl/1045328480.ttl b/test/ttl/1045328480.ttl new file mode 100644 index 0000000..77692d2 --- /dev/null +++ b/test/ttl/1045328480.ttl @@ -0,0 +1,37 @@ +@prefix schema: . +@prefix gndo: . +@prefix lib: . +@prefix marcRole: . +@prefix owl: . +@prefix skos: . +@prefix dcmitype: . +@prefix rdfs: . +@prefix geo: . +@prefix umbel: . +@prefix dbp: . +@prefix dnbt: . +@prefix rdau: . +@prefix sf: . +@prefix dnb_intern: . +@prefix rdf: . +@prefix v: . +@prefix dcterms: . +@prefix bibo: . +@prefix gbv: . +@prefix isbd: . +@prefix foaf: . +@prefix dc: . + + a gndo:BuildingOrMemorial ; + foaf:page ; + gndo:gndIdentifier "1045328480" ; + gndo:architect , ; + gndo:broaderTermInstantial ; + gndo:gndSubjectCategory ; + gndo:geographicAreaCode ; + gndo:definition "72-stöckiges u. 310 m hohes, multifunktionales Hochhaus am Südufer d. Themse i.d. Nähe d. London Bridge in London-Southwark"@de ; + gndo:dateOfProduction "16.03.2009-01.02.2013" ; + gndo:variantNameForThePlaceOrGeographicName "London Bridge Tower (London)" , "Shard London Bridge (London)" , "Shard of Glass (London)" , "32 London Bridge (London)" , "Thirty-two London Bridge (London)" ; + gndo:preferredNameForThePlaceOrGeographicName "The Shard (London)" ; + gndo:place . +