Skip to content

Commit

Permalink
Set up OpenRefine reconciliation endpoint
Browse files Browse the repository at this point in the history
See #65
  • Loading branch information
fsteeg committed Jun 25, 2018
1 parent 0e2dd40 commit 57c6125
Show file tree
Hide file tree
Showing 4 changed files with 216 additions and 7 deletions.
117 changes: 117 additions & 0 deletions app/controllers/Reconcile.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/* Copyright 2014-2018, hbz. Licensed under the Eclipse Public License 1.0 */

package controllers;

import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.stream.Collectors;

import javax.inject.Inject;

import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.search.SearchHits;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;

import modules.IndexComponent;
import play.Logger;
import play.libs.Json;
import play.mvc.Controller;
import play.mvc.Result;

/**
* OpenRefine reconciliation service controller.
*
* Serves reconciliation service meta data and multi query requests.
*
* See https://github.com/OpenRefine/OpenRefine/wiki/Reconciliation and
* https://github.com/OpenRefine/OpenRefine/wiki/Reconciliation-Service-API
*
* @author Fabian Steeg (fsteeg)
*
*/
public class Reconcile extends Controller {

@Inject
IndexComponent index;

private static final JsonNode TYPES = Json.toJson(Arrays.asList("lobid-gnd"));

/**
* @param callback
* The name of the JSONP function to wrap the response in
* @return OpenRefine reconciliation endpoint meta data, wrapped in
* `callback`
*/
public Result meta(String callback) {
ObjectNode result = Json.newObject();
result.put("name", "lobid-gnd reconciliation");
result.put("identifierSpace", "http://lobid.org/gnd");
result.put("schemaSpace", "http://lobid.org/gnd");
result.set("defaultTypes", TYPES);
result.set("view", Json.newObject()//
.put("url", "http://lobid.org/gnd/{{id}}"));
return callback.isEmpty() ? ok(result)
: ok(String.format("/**/%s(%s);", callback, result.toString())).as("application/json");
}

/** @return Reconciliation data for the queries in the request */
public Result reconcile() {
JsonNode request = Json.parse(request().body().asFormUrlEncoded().get("queries")[0]);
Iterator<Entry<String, JsonNode>> inputQueries = request.fields();
ObjectNode response = Json.newObject();
while (inputQueries.hasNext()) {
Entry<String, JsonNode> inputQuery = inputQueries.next();
Logger.debug("q: " + inputQuery);
SearchResponse searchResponse = executeQuery(inputQuery, buildQueryString(inputQuery));
List<JsonNode> results = mapToResults(mainQuery(inputQuery), searchResponse.getHits());
ObjectNode resultsForInputQuery = Json.newObject();
resultsForInputQuery.set("result", Json.toJson(results));
Logger.debug("r: " + resultsForInputQuery);
response.set(inputQuery.getKey(), resultsForInputQuery);
}
return ok(response);
}

private List<JsonNode> mapToResults(String mainQuery, SearchHits searchHits) {
return Arrays.asList(searchHits.getHits()).stream().map(hit -> {
Map<String, Object> map = hit.getSource();
ObjectNode resultForHit = Json.newObject();
resultForHit.put("id", hit.getId());
Object nameObject = map.get("preferredName");
String name = nameObject == null ? "" : nameObject + "";
resultForHit.put("name", name);
resultForHit.put("score", hit.getScore());
resultForHit.put("match", mainQuery.equalsIgnoreCase(name));
resultForHit.set("type", TYPES);
return resultForHit;
}).collect(Collectors.toList());
}

private SearchResponse executeQuery(Entry<String, JsonNode> entry, String queryString) {
JsonNode limitNode = entry.getValue().get("limit");
int limit = limitNode == null ? -1 : limitNode.asInt();
SearchResponse response = index.query(queryString, "", 0, limit);
return response;
}

private String buildQueryString(Entry<String, JsonNode> entry) {
String queryString = mainQuery(entry);
JsonNode props = entry.getValue().get("properties");
if (props != null) {
for (JsonNode p : props) {
queryString += " " + p.get("v").asText();
}
}
return queryString;
}

private String mainQuery(Entry<String, JsonNode> entry) {
return entry.getValue().get("query").asText();
}

}
32 changes: 25 additions & 7 deletions app/views/api.scala.html
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
@main("", "lobid-gnd - API") {
<h1>lobid-gnd</h1>

<h2>Search: <code>@Html(controllers.routes.HomeController.search("text").toString)</code></h2>
<h2>Suche: <code>@Html(controllers.routes.HomeController.search("text").toString)</code></h2>

<dl>
@for((key,value) <- searchSamples) {
Expand All @@ -38,7 +38,7 @@ <h2>Search: <code>@Html(controllers.routes.HomeController.search("text").toStrin
}
</dl>

<h2>GET by ID: <code>@Html(controllers.routes.HomeController.authorityDotFormat("&lt;id&gt;", "json").toString)</code></h2>
<h2>Direktzugriff: <code>@Html(controllers.routes.HomeController.authorityDotFormat("&lt;id&gt;", "json").toString)</code></h2>

<dl>
@for((key,value) <- getSamples) {
Expand All @@ -50,7 +50,7 @@ <h2>GET by ID: <code>@Html(controllers.routes.HomeController.authorityDotFormat(
<h2 id='content_types'>Inhaltstypen <small><a href='#content_types'><span class='glyphicon glyphicon-link'></span></a></small></h2>

<p>Standardmäßig liefert dieser Dienst strukturierte API-Antworten (als JSON):</p>
<p><code>curl http://test.[email protected]("4074335-4")</code></p>
<p><code>curl http://[email protected]("4074335-4")</code></p>
<p>Er unterstützt Content-Negotiation über den Accept-Header für JSON (application/json), JSON lines (application/x-jsonlines) oder HTML (text/html):</p>
<p><code>curl --header "Accept: application/json" http://[email protected]("london")</code></p>
<p><code>curl --header "Accept: application/x-jsonlines" "http://[email protected](routes.HomeController.search("type:Country").toString)" > countries.jsonl</code></p>
Expand Down Expand Up @@ -82,13 +82,31 @@ <h2 id='jsonld'>JSON-LD <small><a href='#jsonld'><span class='glyphicon glyphico
<p>JSON-API-Anfragen liefern <a href="http://json-ld.org/">JSON-LD</a>. Um damit zu experimentieren können sie das JSON-LD oder URLs im <a href="http://json-ld.org/playground/">JSON-LD Playground</a> einfügen.</p>
<dt>JSON-LD Context</dt> <dd><a href='@routes.HomeController.context'>@java.net.URLDecoder.decode(routes.HomeController.context.toString)</a></dd>
<dt>RDF-Konvertierung</dt><dd>Mit einem JSON-LD-Prozessor können sie API-Antworten in andere RDF-Serialisierungen umwandeln, z.B. in N-Quads mit <a href="https://github.com/digitalbazaar/jsonld-cli">jsonld-cli</a>:</dd>
<p><code>jsonld format --nquads http://test.[email protected]("4074335-4")</code></p>
<p><code>jsonld format --nquads http://[email protected]("4074335-4")</code></p>
<p>Einzeltreffer können per <code>Accept</code> Header auch direkt als RDF/XML, Turtle oder N-Triples angefordert werden:</p>
<p><code>curl --header "Accept: application/rdf+xml" http://test.[email protected]("4074335-4")</code></p>
<p><code>curl --header "Accept: text/turtle" http://test.[email protected]("4074335-4")</code></p>
<p><code>curl --header "Accept: application/n-triples" http://test.[email protected]("4074335-4")</code></p>
<p><code>curl --header "Accept: application/rdf+xml" http://[email protected]("4074335-4")</code></p>
<p><code>curl --header "Accept: text/turtle" http://[email protected]("4074335-4")</code></p>
<p><code>curl --header "Accept: application/n-triples" http://[email protected]("4074335-4")</code></p>
<p>Dabei werden wie <a href='#content_types'>im Abschnitt zu Inhaltstypen beschrieben</a> neben dem Format-Parameter auch URLs mit Formatendungen unterstützt:
<p><a href='@routes.HomeController.authorityDotFormat("4074335-4", format="rdf")'>@routes.HomeController.authorityDotFormat("4074335-4", format="rdf")</a></p>
<p><a href='@routes.HomeController.authorityDotFormat("4074335-4", format="ttl")'>@routes.HomeController.authorityDotFormat("4074335-4", format="ttl")</a></p>
<p><a href='@routes.HomeController.authorityDotFormat("4074335-4", format="nt")'>@routes.HomeController.authorityDotFormat("4074335-4", format="nt")</a></p>

<h2 id="openrefine">OpenRefine <small><a href="#openrefine"><span class="glyphicon glyphicon-link"></span></a></small></h2>

<p>Dieser Dienst bietet eine Implementierung der OpenRefine <a href="https://github.com/OpenRefine/OpenRefine/wiki/Reconciliation">Reconciliation</a> <a href="https://github.com/OpenRefine/OpenRefine/wiki/Reconciliation-Service-API">API</a> an.</p>
@desc("Metadaten-Anfrage (\"callback\": optionaler JSONP-Callback, in den die Antwort verpackt wird)", routes.Reconcile.meta("jsonp"))
@desc("Reconciliation-Anfrage (erwartet Formulardaten per HTTP POST, verwenden Sie diese URL in OpenRefine)", routes.Reconcile.reconcile())
<p>Intern sendet OpenRefine folgende Art von Anfrage:</p>
<pre>curl --data 'queries={"q1":{"query":"Twain, Mark"}}' http://[email protected]()</pre>
<p><b>Kurzanleitung</b></p>
<p>In OpenRefine -> <i>Create Project</i> -> <i>Clipboard</i>, dort einfügen:</p>
<pre>name;beruf;ort
J. Weizenbaum;Informatiker;Berlin
Twain, Mark;Schriftsteller;
Kumar, Lalit;;
Jemand;;</pre>
<p><i>Next</i> -> <i>Create Project</i> -> Spalte 'name' -> <i>Reconcile</i> -> <i>Start Reconciling</i></p>
<p><i>Add Standard Service...</i> -> <i>http://[email protected]()</i> -> <i>Add Service</i> -> <i>Start Reconciling</i></p>
<p>Eine detaillierte Anleitung zur OpenRefine Reconciliation mit lobid-organisations finden Sie <a href="http://hbz.github.io/slides/swib-15/#/28">hier</a>.</p>
}
5 changes: 5 additions & 0 deletions conf/routes
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
# Handle trailing slashes
GET /*path/ controllers.HomeController.redirectSlash(path: String)

#OpenRefine reconciliation endpoint
GET /gnd/reconcile controllers.Reconcile.meta(callback ?= "")

POST /gnd/reconcile controllers.Reconcile.reconcile()

GET /gnd controllers.HomeController.index

GET /gnd/api controllers.HomeController.api
Expand Down
69 changes: 69 additions & 0 deletions test/controllers/ReconcileTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/* Copyright 2014-2018, hbz. Licensed under the Eclipse Public License 1.0 */

package controllers;

import static org.hamcrest.CoreMatchers.containsString;
import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.CoreMatchers.startsWith;
import static org.hamcrest.core.IsEqual.equalTo;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertThat;
import static play.test.Helpers.GET;
import static play.test.Helpers.POST;
import static play.test.Helpers.contentAsString;
import static play.test.Helpers.fakeApplication;
import static play.test.Helpers.fakeRequest;
import static play.test.Helpers.route;
import static play.test.Helpers.running;

import org.junit.Test;

import com.google.common.collect.ImmutableMap;

import modules.IndexTest;
import play.Application;
import play.Logger;
import play.libs.Json;
import play.mvc.Result;

@SuppressWarnings("javadoc")
public class ReconcileTest extends IndexTest {

@Test
public void reconcileMetadataRequestNoCallback() {
Application application = fakeApplication();
running(application, () -> {
Result result = route(application, fakeRequest(GET, "/gnd/reconcile"));
assertNotNull(result);
assertThat(result.contentType().get(), is(equalTo("application/json")));
assertNotNull(Json.parse(contentAsString(result)));
});
}

@Test
public void reconcileMetadataRequestWithCallback() {
Application application = fakeApplication();
running(application, () -> {
Result result = route(application, fakeRequest(GET, "/gnd/reconcile?callback=jsonp"));
assertNotNull(result);
assertThat(result.contentType().get(), is(equalTo("application/json")));
assertThat(contentAsString(result), startsWith("/**/jsonp("));
});
}

@Test
// curl --data 'queries={"q99":{"query":"*"}}' localhost:9000/gnd/reconcile
public void reconcileRequest() {
Application application = fakeApplication();
running(application, () -> {
Result result = route(application, fakeRequest(POST, "/gnd/reconcile")
.bodyForm(ImmutableMap.of("queries", "{\"q99\":{\"query\":\"Twain, Mark\"}}")));
String content = contentAsString(result);
Logger.debug(Json.prettyPrint(Json.parse(content)));
assertThat(content, containsString("q99"));
assertThat(content, containsString("\"match\":false"));
assertThat(content, containsString("\"match\":true"));
});
}

}

0 comments on commit 57c6125

Please sign in to comment.