+ * This class is exposed as a Spring Component via + * {@link AnnotationBrowserSidebarAutoConfiguration#annotationBrowserSidebarFactory}. + *
+ */ public class DiamSidebarFactory extends AnnotationSidebarFactory_ImplBase + implements ClientSideUserPreferencesProvider { + private WatchedResourceFile@@ -60,9 +56,14 @@ public String getCommand() public DefaultAjaxResponse handle(AjaxRequestTarget aTarget, Request aRequest) { try { - AnnotationPageBase page = getPage(); - CAS cas = page.getEditorCas(); - actionSpan(aTarget, aRequest.getRequestParameters(), cas); + var page = getPage(); + var cas = page.getEditorCas(); + var state = getAnnotatorState(); + var range = getRangeFromRequest(state, aRequest.getRequestParameters(), cas); + + state.getSelection().selectSpan(cas, range.getBegin(), range.getEnd()); + page.getAnnotationActionHandler().actionCreateOrUpdate(aTarget, cas); + return new DefaultAjaxResponse(getAction(aRequest)); } catch (Exception e) { @@ -70,43 +71,17 @@ public DefaultAjaxResponse handle(AjaxRequestTarget aTarget, Request aRequest) } } - private void actionSpan(AjaxRequestTarget aTarget, IRequestParameters aRequestParameters, - CAS aCas) - throws IOException, AnnotationException - { - AnnotationPageBase page = (AnnotationPageBase) aTarget.getPage(); - AnnotatorState state = getAnnotatorState(); - - // This is the span the user has marked in the browser in order to create a new slot-filler - // annotation OR the span of an existing annotation which the user has selected. - var userSelectedSpan = getOffsetsFromRequest(state, aRequestParameters, aCas); - - if (state.isSlotArmed()) { - // When filling a slot, the current selection is *NOT* changed. The - // Span annotation which owns the slot that is being filled remains - // selected! - page.getAnnotationActionHandler().actionFillSlot(aTarget, aCas, - userSelectedSpan.getBegin(), userSelectedSpan.getEnd(), VID.NONE_ID); - return; - } - - Selection selection = state.getSelection(); - selection.selectSpan(aCas, userSelectedSpan.getBegin(), userSelectedSpan.getEnd()); - page.getAnnotationActionHandler().actionCreateOrUpdate(aTarget, aCas); - } - /** * Extract offset information from the current request. These are either offsets of an existing * selected annotations or offsets contained in the request for the creation of a new * annotation. */ - static Range getOffsetsFromRequest(AnnotatorState aState, IRequestParameters request, CAS aCas) + static Range getRangeFromRequest(AnnotatorState aState, IRequestParameters request, CAS aCas) throws IOException { - String offsets = request.getParameterValue(PARAM_OFFSETS).toString(); + var offsets = request.getParameterValue(PARAM_OFFSETS).toString(); - CompactRangeList offsetLists = JSONUtil.getObjectMapper().readValue(offsets, - CompactRangeList.class); + var offsetLists = JSONUtil.getObjectMapper().readValue(offsets, CompactRangeList.class); int annotationBegin = aState.getWindowBeginOffset() + offsetLists.get(0).getBegin(); int annotationEnd = aState.getWindowBeginOffset() diff --git a/inception/inception-diam/src/main/java/de/tudarmstadt/ukp/inception/diam/editor/actions/FillSlotWithExistingAnnotationHandler.java b/inception/inception-diam/src/main/java/de/tudarmstadt/ukp/inception/diam/editor/actions/FillSlotWithExistingAnnotationHandler.java index 1c9f9af380e..ff7aed02ee6 100644 --- a/inception/inception-diam/src/main/java/de/tudarmstadt/ukp/inception/diam/editor/actions/FillSlotWithExistingAnnotationHandler.java +++ b/inception/inception-diam/src/main/java/de/tudarmstadt/ukp/inception/diam/editor/actions/FillSlotWithExistingAnnotationHandler.java @@ -17,12 +17,10 @@ */ package de.tudarmstadt.ukp.inception.diam.editor.actions; -import org.apache.uima.cas.CAS; import org.apache.wicket.ajax.AjaxRequestTarget; import org.apache.wicket.request.Request; import org.springframework.core.annotation.Order; -import de.tudarmstadt.ukp.clarin.webanno.api.annotation.page.AnnotationPageBase; import de.tudarmstadt.ukp.inception.diam.editor.config.DiamAutoConfig; import de.tudarmstadt.ukp.inception.diam.model.ajax.DefaultAjaxResponse; @@ -48,13 +46,12 @@ public String getCommand() public DefaultAjaxResponse handle(AjaxRequestTarget aTarget, Request aRequest) { try { - AnnotationPageBase page = getPage(); - CAS cas = page.getEditorCas(); - - // When filling a slot, the current selection is *NOT* changed. The - // Span annotation which owns the slot that is being filled remains - // selected! - page.getAnnotationActionHandler().actionFillSlot(aTarget, cas, 0, 0, getVid(aRequest)); + var page = getPage(); + var cas = page.getEditorCas(); + var slotFillerId = getVid(aRequest); + // When filling a slot, the current selection is *NOT* changed. The Span annotation + // which owns the slot that is being filled remains selected! + page.getAnnotationActionHandler().actionFillSlot(aTarget, cas, slotFillerId); return new DefaultAjaxResponse(getAction(aRequest)); } @@ -66,6 +63,7 @@ public DefaultAjaxResponse handle(AjaxRequestTarget aTarget, Request aRequest) @Override public boolean accepts(Request aRequest) { - return super.accepts(aRequest) && getAnnotatorState().isSlotArmed(); + return super.accepts(aRequest) && getAnnotatorState().isSlotArmed() + && getVid(aRequest).isSet(); } } diff --git a/inception/inception-diam/src/main/java/de/tudarmstadt/ukp/inception/diam/editor/actions/FillSlotWithNewAnnotationHandler.java b/inception/inception-diam/src/main/java/de/tudarmstadt/ukp/inception/diam/editor/actions/FillSlotWithNewAnnotationHandler.java index aefe6d378f5..89164f24f02 100644 --- a/inception/inception-diam/src/main/java/de/tudarmstadt/ukp/inception/diam/editor/actions/FillSlotWithNewAnnotationHandler.java +++ b/inception/inception-diam/src/main/java/de/tudarmstadt/ukp/inception/diam/editor/actions/FillSlotWithNewAnnotationHandler.java @@ -31,8 +31,6 @@ import de.tudarmstadt.ukp.inception.diam.model.ajax.DefaultAjaxResponse; import de.tudarmstadt.ukp.inception.diam.model.compact.CompactRange; import de.tudarmstadt.ukp.inception.diam.model.compact.CompactRangeList; -import de.tudarmstadt.ukp.inception.rendering.editorstate.AnnotatorState; -import de.tudarmstadt.ukp.inception.rendering.vmodel.VID; import de.tudarmstadt.ukp.inception.schema.adapter.AnnotationException; /** @@ -77,17 +75,14 @@ private void actionSpan(AjaxRequestTarget aTarget, IRequestParameters aRequestPa CAS aCas) throws IOException, AnnotationException { - AnnotationPageBase page = (AnnotationPageBase) aTarget.getPage(); - // This is the span the user has marked in the browser in order to create a new slot-filler // annotation OR the span of an existing annotation which the user has selected. - CompactRange userSelectedSpan = getOffsetsFromRequest(aTarget, aRequestParameters, aCas); + var range = getRangeFromRequest(aTarget, aRequestParameters, aCas); - // When filling a slot, the current selection is *NOT* changed. The - // Span annotation which owns the slot that is being filled remains - // selected! - page.getAnnotationActionHandler().actionFillSlot(aTarget, aCas, userSelectedSpan.getBegin(), - userSelectedSpan.getEnd(), VID.NONE_ID); + // When filling a slot, the current selection is *NOT* changed. The Span annotation which + // owns the slot that is being filled remains selected! + getPage().getAnnotationActionHandler().actionFillSlot(aTarget, aCas, range.getBegin(), + range.getEnd()); } /** @@ -95,18 +90,17 @@ private void actionSpan(AjaxRequestTarget aTarget, IRequestParameters aRequestPa * selected annotations or offsets contained in the request for the creation of a new * annotation. */ - private CompactRange getOffsetsFromRequest(AjaxRequestTarget aTarget, - IRequestParameters request, CAS aCas) + private CompactRange getRangeFromRequest(AjaxRequestTarget aTarget, IRequestParameters request, + CAS aCas) throws IOException { // Create new span annotation - in this case we get the offset information from the // request - String offsets = request.getParameterValue(PARAM_OFFSETS).toString(); + var offsets = request.getParameterValue(PARAM_OFFSETS).toString(); - CompactRangeList offsetLists = JSONUtil.getObjectMapper().readValue(offsets, - CompactRangeList.class); + var offsetLists = JSONUtil.getObjectMapper().readValue(offsets, CompactRangeList.class); - AnnotatorState state = getAnnotatorState(); + var state = getAnnotatorState(); int annotationBegin = state.getWindowBeginOffset() + offsetLists.get(0).getBegin(); int annotationEnd = state.getWindowBeginOffset() + offsetLists.get(offsetLists.size() - 1).getEnd(); diff --git a/inception/inception-diam/src/main/java/de/tudarmstadt/ukp/inception/diam/editor/actions/LazyDetailsHandler.java b/inception/inception-diam/src/main/java/de/tudarmstadt/ukp/inception/diam/editor/actions/LazyDetailsHandler.java index d3faa9c1879..b22f7530900 100644 --- a/inception/inception-diam/src/main/java/de/tudarmstadt/ukp/inception/diam/editor/actions/LazyDetailsHandler.java +++ b/inception/inception-diam/src/main/java/de/tudarmstadt/ukp/inception/diam/editor/actions/LazyDetailsHandler.java @@ -67,9 +67,9 @@ public AjaxResponse handle(AjaxRequestTarget aTarget, Request aRequest) final VID paramId = getVid(aRequest); AnnotatorState state = page.getModelObject(); - var result = lazyDetailsLookupService.lookupLazyDetails( - aRequest.getRequestParameters(), paramId, casProvider, state.getDocument(), - state.getUser(), state.getWindowBeginOffset(), state.getWindowEndOffset()); + var result = lazyDetailsLookupService.lookupLazyDetails(aRequest.getRequestParameters(), + paramId, casProvider, state.getDocument(), state.getUser(), + state.getWindowBeginOffset(), state.getWindowEndOffset()); attachResponse(aTarget, aRequest, toInterpretableJsonString(result)); return result; } diff --git a/inception/inception-diam/src/main/java/de/tudarmstadt/ukp/inception/diam/editor/actions/LoadAnnotationsHandler.java b/inception/inception-diam/src/main/java/de/tudarmstadt/ukp/inception/diam/editor/actions/LoadAnnotationsHandler.java index 8e89a293f94..0534af7211c 100644 --- a/inception/inception-diam/src/main/java/de/tudarmstadt/ukp/inception/diam/editor/actions/LoadAnnotationsHandler.java +++ b/inception/inception-diam/src/main/java/de/tudarmstadt/ukp/inception/diam/editor/actions/LoadAnnotationsHandler.java @@ -25,6 +25,7 @@ import org.apache.wicket.request.Request; import org.springframework.core.annotation.Order; +import de.tudarmstadt.ukp.clarin.webanno.security.UserDao; import de.tudarmstadt.ukp.inception.diam.editor.config.DiamAutoConfig; import de.tudarmstadt.ukp.inception.diam.model.ajax.AjaxResponse; import de.tudarmstadt.ukp.inception.diam.model.ajax.DefaultAjaxResponse; @@ -55,12 +56,15 @@ public class LoadAnnotationsHandler private final RenderingPipeline renderingPipeline; private final VDocumentSerializerExtensionPoint vDocumentSerializerExtensionPoint; + private final UserDao userService; public LoadAnnotationsHandler(RenderingPipeline aRenderingPipeline, - VDocumentSerializerExtensionPoint aVDocumentSerializerExtensionPoint) + VDocumentSerializerExtensionPoint aVDocumentSerializerExtensionPoint, + UserDao aUserService) { renderingPipeline = aRenderingPipeline; vDocumentSerializerExtensionPoint = aVDocumentSerializerExtensionPoint; + userService = aUserService; } @Override @@ -100,6 +104,7 @@ private RenderRequest prepareRenderRequest(Request aRequest) throws IOException RenderRequest request = RenderRequest.builder() // .withState(state) // + .withSessionOwner(userService.getCurrentUser()) // .withCas(page.getEditorCas()) // .withWindow(begin, end) // .withText(includeText) // diff --git a/inception/inception-diam/src/main/java/de/tudarmstadt/ukp/inception/diam/editor/actions/LoadPreferences.java b/inception/inception-diam/src/main/java/de/tudarmstadt/ukp/inception/diam/editor/actions/LoadPreferences.java new file mode 100644 index 00000000000..da10ea025ff --- /dev/null +++ b/inception/inception-diam/src/main/java/de/tudarmstadt/ukp/inception/diam/editor/actions/LoadPreferences.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.diam.editor.actions; + +import java.util.Map; + +import org.apache.wicket.ajax.AjaxRequestTarget; +import org.apache.wicket.request.Request; +import org.springframework.core.annotation.Order; + +import de.tudarmstadt.ukp.clarin.webanno.security.UserDao; +import de.tudarmstadt.ukp.clarin.webanno.support.JSONUtil; +import de.tudarmstadt.ukp.inception.diam.editor.config.DiamAutoConfig; +import de.tudarmstadt.ukp.inception.diam.model.ajax.AjaxResponse; +import de.tudarmstadt.ukp.inception.diam.model.ajax.DefaultAjaxResponse; +import de.tudarmstadt.ukp.inception.preferences.ClientSidePreferencesKey; +import de.tudarmstadt.ukp.inception.preferences.PreferencesService; + +/** + *
+ * This class is exposed as a Spring Component via {@link DiamAutoConfig#loadPreferences}. + *
+ */ +@Order(EditorAjaxRequestHandler.PRIO_RENDER_HANDLER) +public class LoadPreferences + extends EditorAjaxRequestHandlerBase +{ + public static final String COMMAND = "loadPreferences"; + + public static final String PARAM_KEY = "key"; + + private final PreferencesService preferencesService; + private final UserDao userService; + + public LoadPreferences(UserDao aUserService, PreferencesService aPreferencesService) + { + userService = aUserService; + preferencesService = aPreferencesService; + } + + @Override + public String getCommand() + { + return COMMAND; + } + + @Override + public AjaxResponse handle(AjaxRequestTarget aTarget, Request aRequest) + { + try { + var key = new ClientSidePreferencesKey
@@ -308,7 +309,7 @@ public CAS importCasFromFile(File aFile, SourceDocument aDocument, String aForma
// Prepare a CAS with the project type system
CAS cas = WebAnnoCasUtil.createCas(tsd);
- format.read(WebAnnoCasUtil.getRealCas(cas), aFile);
+ format.read(aDocument.getProject(), WebAnnoCasUtil.getRealCas(cas), aFile);
// Create sentence / token annotations if they are missing - sentences first because
// tokens are then generated inside the sentences
@@ -394,23 +395,56 @@ private void splitSenencesIfNecssaryAndCheckQuota(CAS cas, FormatSupport aFormat
public static void splitSentences(CAS aCas)
{
- BreakIterator bi = BreakIterator.getSentenceInstance(Locale.US);
- bi.setText(aCas.getDocumentText());
- int last = bi.first();
- int cur = bi.next();
- while (cur != BreakIterator.DONE) {
- int[] span = new int[] { last, cur };
- trim(aCas.getDocumentText(), span);
- if (!isEmpty(span[0], span[1])) {
- aCas.addFsToIndexes(createSentence(aCas, span[0], span[1]));
+ splitSentences(aCas, null);
+ }
+
+ public static void splitSentences(CAS aCas, Iterable extends AnnotationFS> aZones)
+ {
+ if (aCas.getDocumentText() == null) {
+ return;
+ }
+
+ int[] sortedZoneBoundaries = null;
+
+ if (aZones != null) {
+ var zoneBoundaries = new IntArrayList();
+ for (var zone : aZones) {
+ zoneBoundaries.add(zone.getBegin());
+ zoneBoundaries.add(zone.getEnd());
+ }
+
+ sortedZoneBoundaries = zoneBoundaries.intStream().distinct().sorted().toArray();
+ }
+
+ if (sortedZoneBoundaries == null || sortedZoneBoundaries.length < 2) {
+ sortedZoneBoundaries = new int[] { 0, aCas.getDocumentText().length() };
+ }
+
+ for (int i = 1; i < sortedZoneBoundaries.length; i++) {
+ var begin = sortedZoneBoundaries[i - 1];
+ var end = sortedZoneBoundaries[i];
+ BreakIterator bi = BreakIterator.getSentenceInstance(Locale.US);
+ bi.setText(aCas.getDocumentText().substring(begin, end));
+ int last = bi.first();
+ int cur = bi.next();
+ while (cur != BreakIterator.DONE) {
+ int[] span = new int[] { last + begin, cur + begin };
+ trim(aCas.getDocumentText(), span);
+ if (!isEmpty(span[0], span[1])) {
+ aCas.addFsToIndexes(createSentence(aCas, span[0], span[1]));
+ }
+ last = cur;
+ cur = bi.next();
}
- last = cur;
- cur = bi.next();
}
}
public static void tokenize(CAS aCas)
{
+ if (aCas.getDocumentText() == null) {
+ return;
+ }
+
BreakIterator bi = BreakIterator.getWordInstance(Locale.US);
for (AnnotationFS s : selectSentences(aCas)) {
bi.setText(s.getCoveredText());
diff --git a/inception/inception-export/src/test/java/de/tudarmstadt/ukp/inception/export/SegmentationTest.java b/inception/inception-export/src/test/java/de/tudarmstadt/ukp/inception/export/SegmentationTest.java
index 2cd4993ef52..88af663c2d4 100644
--- a/inception/inception-export/src/test/java/de/tudarmstadt/ukp/inception/export/SegmentationTest.java
+++ b/inception/inception-export/src/test/java/de/tudarmstadt/ukp/inception/export/SegmentationTest.java
@@ -17,15 +17,17 @@
*/
package de.tudarmstadt.ukp.inception.export;
-import static java.util.Arrays.asList;
import static org.apache.uima.fit.util.CasUtil.toText;
import static org.apache.uima.fit.util.JCasUtil.select;
-import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.assertj.core.api.Assertions.assertThat;
import org.apache.uima.fit.factory.JCasFactory;
import org.apache.uima.jcas.JCas;
import org.junit.jupiter.api.Test;
+import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Div;
+import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Heading;
+import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Paragraph;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
@@ -38,7 +40,21 @@ public void testSplitSentences() throws Exception
DocumentImportExportServiceImpl.splitSentences(jcas.getCas());
- assertEquals(asList("I am one.", "I am two."), toText(select(jcas, Sentence.class)));
+ assertThat(toText(select(jcas, Sentence.class))) //
+ .containsExactly("I am one.", "I am two.");
+ }
+
+ @Test
+ public void testSplitSentencesWithZones() throws Exception
+ {
+ JCas jcas = JCasFactory.createText("Heading I am two.", "en");
+ new Heading(jcas, 0, 7).addToIndexes();
+ new Paragraph(jcas, 8, 17).addToIndexes();
+
+ DocumentImportExportServiceImpl.splitSentences(jcas.getCas(), jcas.select(Div.class));
+
+ assertThat(toText(select(jcas, Sentence.class))) //
+ .containsExactly("Heading", "I am two.");
}
@Test
@@ -46,13 +62,14 @@ public void testTokenize() throws Exception
{
JCas jcas = JCasFactory.createText("i am one.i am two.", "en");
new Sentence(jcas, 0, 9).addToIndexes();
- ;
new Sentence(jcas, 9, 18).addToIndexes();
DocumentImportExportServiceImpl.tokenize(jcas.getCas());
- assertEquals(asList("i am one.", "i am two."), toText(select(jcas, Sentence.class)));
- assertEquals(asList("i", "am", "one", ".", "i", "am", "two", "."),
- toText(select(jcas, Token.class)));
+ assertThat(toText(select(jcas, Sentence.class))) //
+ .containsExactly("i am one.", "i am two.");
+
+ assertThat(toText(select(jcas, Token.class))) //
+ .containsExactly("i", "am", "one", ".", "i", "am", "two", ".");
}
}
diff --git a/inception/inception-external-editor/pom.xml b/inception/inception-external-editor/pom.xml
index c226288a2e5..e462d53fef9 100644
--- a/inception/inception-external-editor/pom.xml
+++ b/inception/inception-external-editor/pom.xml
@@ -20,7 +20,7 @@
+ * This class is exposed as a Spring Component via + * {@link PubMedDocumentRepositoryAutoConfiguration#pubMedCentralProviderFactory}. + *
+ */ +@Order(100) +public class PubMedCentralProviderFactory + implements BeanNameAware, ExternalSearchProviderFactory* This class is exposed as a Spring Component via - * {@link HtmlAnnotationEditorSupportAutoConfiguration#htmlAnnotationEditorFactory()}. + * {@link AnnotatorJsAnnotationEditorSupportAutoConfiguration#annotatorJsHtmlAnnotationEditorFactory()}. *
*/ public class AnnotatorJsHtmlAnnotationEditorFactory @@ -43,7 +44,10 @@ public class AnnotatorJsHtmlAnnotationEditorFactory @Override public String getDisplayName() { - return "HTML (AnnotatorJS)"; + // return Strings.getString(Messages_.ANNOTATORJS_EDITOR_NAME); + // FIXME: For some reason the annotation processor does not seem to work in Eclipse full + // builds. Needs to be debugged.... + return Strings.getString("annotatorjs-editor.name"); } @Override @@ -62,7 +66,8 @@ public int accepts(Project aProject, String aFormat) public AnnotationEditorBase create(String aId, IModel- * This class is exposed as a Spring Component via - * {@link HtmlAnnotationEditorSupportAutoConfiguration#htmlDocumentIFrameViewFactory}. - *
- * - * @deprecated Use {@link XHtmlXmlDocumentIFrameViewFactory} instead - */ -@Deprecated -public class HtmlDocumentIFrameViewFactory - implements DocumentViewFactory -{ - public static final String ID = "iframe:cas+html"; - - @Override - public String getId() - { - return ID; - } - - @Override - public boolean accepts(AnnotationDocument aContext) - { - return HtmlFormatSupport.ID.equals(aContext.getDocument().getFormat()); - } - - @Override - public Component createView(String aId, IModel"; - nodes.add(startNode); - - Node endNode = new Node(); - endNode.position = div.getEnd(); - endNode.type = "
"; - nodes.add(endNode); - } - if (div.getType().getName().equals(Heading.class.getName())) { - Node startNode = new Node(); - startNode.position = div.getBegin(); - startNode.type = "+ * This class is exposed as a Spring Component via {@link BioCAutoConfiguration#bioCFormatSupport}. + *
+ */ +public class BioCFormatSupport + implements FormatSupport +{ + public static final String ID = "bioc"; + public static final String NAME = "BioC XML (experimental)"; + + @Override + public String getId() + { + return ID; + } + + @Override + public String getName() + { + return NAME; + } + + @Override + public boolean isReadable() + { + return true; + } + + @Override + public CollectionReaderDescription getReaderDescription(Project aProject, + TypeSystemDescription aTSD) + throws ResourceInitializationException + { + return createReaderDescription(BioCReader.class, aTSD); + } +} diff --git a/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/BioCReader.java b/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/BioCReader.java new file mode 100644 index 00000000000..866f31731b3 --- /dev/null +++ b/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/BioCReader.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.io.bioc; + +import static de.tudarmstadt.ukp.inception.io.bioc.BioCComponent.addCollectionMetadataField; + +import java.io.IOException; +import java.util.Optional; + +import javax.xml.bind.JAXBContext; +import javax.xml.bind.JAXBException; +import javax.xml.bind.Unmarshaller; +import javax.xml.stream.XMLStreamException; + +import org.apache.uima.UimaContext; +import org.apache.uima.collection.CollectionException; +import org.apache.uima.fit.factory.JCasBuilder; +import org.apache.uima.jcas.JCas; +import org.apache.uima.resource.ResourceInitializationException; + +import de.tudarmstadt.ukp.inception.io.bioc.model.BioCDocument; +import de.tudarmstadt.ukp.inception.io.bioc.model.BioCToCas; + +public class BioCReader + extends BioCReaderImplBase +{ + private JAXBContext context; + private Unmarshaller unmarshaller; + private Optional.xml
. If the suffix is not
+ * needed, provide an empty string as value.
+ */
+ public static final String PARAM_FILENAME_EXTENSION = ComponentParameters.PARAM_FILENAME_EXTENSION;
+ @ConfigurationParameter(name = PARAM_FILENAME_EXTENSION, mandatory = true, defaultValue = ".xml")
+ private String filenameSuffix;
+
+ /**
+ * Character encoding of the output data.
+ */
+ public static final String PARAM_TARGET_ENCODING = ComponentParameters.PARAM_TARGET_ENCODING;
+ @ConfigurationParameter(name = PARAM_TARGET_ENCODING, mandatory = true, //
+ defaultValue = ComponentParameters.DEFAULT_ENCODING)
+ private String targetEncoding;
+
+ private JAXBContext context;
+
+ @Override
+ public void initialize(UimaContext aContext) throws ResourceInitializationException
+ {
+ super.initialize(aContext);
+
+ try {
+ context = JAXBContext.newInstance(BioCCollection.class);
+ }
+ catch (JAXBException e) {
+ throw new ResourceInitializationException(e);
+ }
+ }
+
+ @Override
+ public void process(JCas aJCas) throws AnalysisEngineProcessException
+ {
+ try (var docOS = getOutputStream(aJCas, filenameSuffix)) {
+ Marshaller marshaller = context.createMarshaller();
+ marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);
+ // Set to fragment mode to omit XML declaration
+ marshaller.setProperty(Marshaller.JAXB_FRAGMENT, true);
+
+ var bioCCollection = new BioCCollection();
+
+ // Base-information - may be overwritten by the metadata fields below
+ var dmd = DocumentMetaData.get(aJCas);
+ bioCCollection.setSource(dmd.getCollectionId());
+
+ // Use BioC metadata fields if available
+ getCollectionMetadataField(aJCas.getCas(), E_SOURCE)
+ .ifPresent($ -> bioCCollection.setSource($.getValue()));
+ getCollectionMetadataField(aJCas.getCas(), E_KEY)
+ .ifPresent($ -> bioCCollection.setKey($.getValue()));
+ getCollectionMetadataField(aJCas.getCas(), E_DATE)
+ .ifPresent($ -> bioCCollection.setDate($.getValue()));
+
+ new CasToBioC().convert(aJCas, bioCCollection);
+
+ marshaller.marshal(bioCCollection, docOS);
+ }
+ catch (Exception e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+ }
+}
diff --git a/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/BioCXmlDocumentFormatSupport.java b/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/BioCXmlDocumentFormatSupport.java
new file mode 100644
index 00000000000..ec9bf4536e1
--- /dev/null
+++ b/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/BioCXmlDocumentFormatSupport.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Technische Universität Darmstadt under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The Technische Universität Darmstadt
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package de.tudarmstadt.ukp.inception.io.bioc;
+
+import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
+import static org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription;
+
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.collection.CollectionReaderDescription;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.metadata.TypeSystemDescription;
+
+import de.tudarmstadt.ukp.clarin.webanno.api.format.FormatSupport;
+import de.tudarmstadt.ukp.clarin.webanno.model.Project;
+import de.tudarmstadt.ukp.inception.io.bioc.config.BioCAutoConfiguration;
+
+/**
+ * Support for BioC format.
+ * + * This class is exposed as a Spring Component via + * {@link BioCAutoConfiguration#bioCXmlDocumentFormatSupport}. + *
+ */ +public class BioCXmlDocumentFormatSupport + implements FormatSupport +{ + public static final String ID = "bioc-xml"; + public static final String NAME = "BioC XML Document (experimental)"; + + @Override + public String getId() + { + return ID; + } + + @Override + public String getName() + { + return NAME; + } + + @Override + public boolean isReadable() + { + return true; + } + + @Override + public boolean isWritable() + { + return true; + } + + @Override + public CollectionReaderDescription getReaderDescription(Project aProject, + TypeSystemDescription aTSD) + throws ResourceInitializationException + { + return createReaderDescription(BioCXmlDocumentReader.class, aTSD); + } + + @Override + public AnalysisEngineDescription getWriterDescription(Project aProject, + TypeSystemDescription aTSD, CAS aCAS) + throws ResourceInitializationException + { + return createEngineDescription(BioCXmlDocumentWriter.class, aTSD); + } +} diff --git a/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/BioCXmlDocumentReader.java b/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/BioCXmlDocumentReader.java new file mode 100644 index 00000000000..1219a61bc72 --- /dev/null +++ b/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/BioCXmlDocumentReader.java @@ -0,0 +1,157 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.io.bioc; + +import static de.tudarmstadt.ukp.inception.io.bioc.BioCComponent.addCollectionMetadataField; +import static de.tudarmstadt.ukp.inception.io.bioc.xml.DocumentWrappingXmlInputReader.wrapInDocument; + +import java.io.IOException; + +import javax.xml.stream.XMLStreamException; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerConfigurationException; +import javax.xml.transform.TransformerException; +import javax.xml.transform.sax.SAXResult; +import javax.xml.transform.stax.StAXSource; + +import org.apache.uima.UimaContext; +import org.apache.uima.collection.CollectionException; +import org.apache.uima.jcas.JCas; +import org.apache.uima.resource.ResourceInitializationException; +import org.dkpro.core.api.xml.type.XmlDocument; +import org.dkpro.core.api.xml.type.XmlElement; +import org.xml.sax.SAXException; + +import de.tudarmstadt.ukp.inception.io.bioc.xml.BioC2XmlCas; +import de.tudarmstadt.ukp.inception.io.xml.dkprocore.CasXmlHandler; +import de.tudarmstadt.ukp.inception.io.xml.dkprocore.CasXmlHandler.ElementListener; +import de.tudarmstadt.ukp.inception.support.xml.XmlParserUtils; + +public class BioCXmlDocumentReader + extends BioCReaderImplBase +{ + private Transformer transformer; + private boolean documentAvailable = false; + + @Override + public void initialize(UimaContext aContext) throws ResourceInitializationException + { + super.initialize(aContext); + + try { + transformer = XmlParserUtils.newTransformerFactory().newTransformer(); + } + catch (TransformerConfigurationException e) { + throw new ResourceInitializationException(e); + } + + try { + documentAvailable = seekNextBioCDocument(); + } + catch (XMLStreamException | CollectionException | IOException e) { + throw new ResourceInitializationException(e); + } + } + + @Override + public boolean hasNext() throws IOException, CollectionException + { + return documentAvailable; + } + + @Override + public void getNext(JCas aJCas) throws IOException, CollectionException + { + initCas(aJCas, currentResource()); + + addCollectionMetadataField(aJCas, E_KEY, getCollectionKey()); + addCollectionMetadataField(aJCas, E_SOURCE, getCollectionSource()); + addCollectionMetadataField(aJCas, E_DATE, getCollectionDate()); + + CasXmlHandler handler = new CasXmlHandler(aJCas); + handler.addListener(newElementFilter(handler)); + + try { + transformer.transform(new StAXSource(wrapInDocument(getXmlEventReader())), + new SAXResult(handler)); + } + catch (TransformerException | XMLStreamException e) { + throw new IOException(e); + } + + new BioC2XmlCas().transferAnnotations(aJCas); + + try { + documentAvailable = seekNextBioCDocument(); + } + catch (XMLStreamException e) { + throw new IOException(e); + } + } + + private ElementListener newElementFilter(CasXmlHandler handler) + { + return new ElementListener() + { + @Override + public void startDocument(XmlDocument aDocument) throws SAXException + { + handler.startElement(null, null, E_COLLECTION, null); + handler.captureText(false); + } + + @Override + public void endDocument(XmlDocument aDocument) throws SAXException + { + handler.endElement(null, null, E_COLLECTION); + } + + @Override + public void startElement(XmlElement aElement) + { + var parent = aElement.getParent(); + if (parent != null + && (E_PASSAGE.equals(parent.getQName()) + || E_SENTENCE.equals(parent.getQName())) + && E_TEXT.equals(aElement.getQName())) { + handler.captureText(true); + } + else { + handler.captureText(false); + } + } + }; + } + + private boolean seekNextBioCDocument() + throws XMLStreamException, CollectionException, IOException + { + if (!isFileOpen()) { + openNextFile(); + + readCollectionMetdata(); + } + + if (isFileOpen()) { + return seekNextBioCDocumentInFile(); + } + + return false; + } + +} diff --git a/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/BioCXmlDocumentWriter.java b/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/BioCXmlDocumentWriter.java new file mode 100644 index 00000000000..3bc07e09fd4 --- /dev/null +++ b/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/BioCXmlDocumentWriter.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.io.bioc; + +import static javax.xml.transform.OutputKeys.INDENT; +import static javax.xml.transform.OutputKeys.METHOD; +import static javax.xml.transform.OutputKeys.OMIT_XML_DECLARATION; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.StringReader; +import java.io.StringWriter; + +import javax.xml.transform.TransformerConfigurationException; +import javax.xml.transform.TransformerException; +import javax.xml.transform.stream.StreamResult; +import javax.xml.transform.stream.StreamSource; + +import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.fit.descriptor.ConfigurationParameter; +import org.apache.uima.jcas.JCas; +import org.dkpro.core.api.io.JCasFileWriter_ImplBase; +import org.xml.sax.SAXException; + +import de.tudarmstadt.ukp.inception.io.bioc.xml.Cas2BioCSaxEvents; +import de.tudarmstadt.ukp.inception.support.xml.XmlParserUtils; + +public class BioCXmlDocumentWriter + extends JCasFileWriter_ImplBase +{ + /** + * Indent output . + */ + public static final String PARAM_INDENT = "indent"; + @ConfigurationParameter(name = PARAM_INDENT, mandatory = true, defaultValue = "false") + private boolean indent; + + @Override + public void process(JCas aJCas) throws AnalysisEngineProcessException + { + if (indent) { + String xmlString; + try (var stringWriter = new StringWriter()) { + var tf = XmlParserUtils.newTransformerFactory(); + var th = tf.newTransformerHandler(); + th.setResult(new StreamResult(stringWriter)); + var serializer = new Cas2BioCSaxEvents(th); + serializer.process(aJCas); + xmlString = stringWriter.toString(); + } + catch (IOException | SAXException | TransformerConfigurationException e) { + throw new AnalysisEngineProcessException(e); + } + + try (OutputStream docOS = getOutputStream(aJCas, ".xml")) { + var tf = XmlParserUtils.newTransformerFactory(); + tf.setAttribute("indent-number", 2); + var transformer = tf.newTransformer(); + transformer.setOutputProperty(OMIT_XML_DECLARATION, "yes"); + transformer.setOutputProperty(INDENT, "yes"); + transformer.transform(new StreamSource(new StringReader(xmlString)), + new StreamResult(docOS)); + } + catch (IOException | TransformerException e) { + throw new AnalysisEngineProcessException(e); + } + } + else { + try (OutputStream docOS = getOutputStream(aJCas, ".xml")) { + var tf = XmlParserUtils.newTransformerFactory(); + var th = tf.newTransformerHandler(); + th.getTransformer().setOutputProperty(OMIT_XML_DECLARATION, "yes"); + th.getTransformer().setOutputProperty(METHOD, "xml"); + th.getTransformer().setOutputProperty(INDENT, "no"); + th.setResult(new StreamResult(docOS)); + + var serializer = new Cas2BioCSaxEvents(th); + serializer.process(aJCas); + } + catch (IOException | SAXException | TransformerConfigurationException e) { + throw new AnalysisEngineProcessException(e); + } + } + } +} diff --git a/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/config/BioCAutoConfiguration.java b/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/config/BioCAutoConfiguration.java new file mode 100644 index 00000000000..e23233b8cb2 --- /dev/null +++ b/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/config/BioCAutoConfiguration.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.io.bioc.config; + +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import de.tudarmstadt.ukp.inception.io.bioc.BioCFormatSupport; +import de.tudarmstadt.ukp.inception.io.bioc.BioCXmlDocumentFormatSupport; + +@Configuration +public class BioCAutoConfiguration +{ + @Bean + @ConditionalOnProperty(prefix = "format.bioc", name = "enabled", havingValue = "true", matchIfMissing = false) + public BioCFormatSupport bioCFormatSupport() + { + return new BioCFormatSupport(); + } + + @Bean + @ConditionalOnProperty(prefix = "format.bioc-xml", name = "enabled", havingValue = "true", matchIfMissing = false) + public BioCXmlDocumentFormatSupport bioCXmlDocumentFormatSupport() + { + return new BioCXmlDocumentFormatSupport(); + } +} diff --git a/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/model/BioCAnnotation.java b/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/model/BioCAnnotation.java new file mode 100644 index 00000000000..0c40f26458b --- /dev/null +++ b/inception/inception-io-bioc/src/main/java/de/tudarmstadt/ukp/inception/io/bioc/model/BioCAnnotation.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.io.bioc.model; + +import java.util.List; + +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlElement; + +public class BioCAnnotation + extends BioCObject +{ + private String id; + private String text; + private List.ttl
. The file format will be
- * chosen depending on the file suffice.
+ * chosen depending on the file suffice.
*
* @see RDFLanguages
*/
- public static final String PARAM_FILENAME_EXTENSION =
- ComponentParameters.PARAM_FILENAME_EXTENSION;
+ public static final String PARAM_FILENAME_EXTENSION = ComponentParameters.PARAM_FILENAME_EXTENSION;
@ConfigurationParameter(name = PARAM_FILENAME_EXTENSION, mandatory = true, defaultValue = ".ttl")
private String filenameSuffix;
@Override
- public void process(JCas aJCas)
- throws AnalysisEngineProcessException
+ public void process(JCas aJCas) throws AnalysisEngineProcessException
{
OntModel model = ModelFactory.createOntologyModel();
model.setNsPrefix(NIF.PREFIX_NIF, NIF.NS_NIF);
model.setNsPrefix(ITS.PREFIX_ITS, ITS.NS_ITS);
-
+
DKPro2Nif.convert(aJCas, model);
-
+
try (OutputStream docOS = getOutputStream(aJCas, filenameSuffix)) {
RDFDataMgr.write(docOS, model.getBaseModel(),
RDFLanguages.fileExtToLang(filenameSuffix));
diff --git a/inception/inception-io-nif/src/main/java/org/dkpro/core/io/nif/internal/DKPro2Nif.java b/inception/inception-io-nif/src/main/java/org/dkpro/core/io/nif/internal/DKPro2Nif.java
index ba9b16236db..2ebcf8bcb6b 100644
--- a/inception/inception-io-nif/src/main/java/org/dkpro/core/io/nif/internal/DKPro2Nif.java
+++ b/inception/inception-io-nif/src/main/java/org/dkpro/core/io/nif/internal/DKPro2Nif.java
@@ -51,7 +51,7 @@ public static void convert(JCas aJCas, OntModel aTarget)
final Resource tParagraph = m.createResource(NIF.TYPE_PARAGRAPH);
final Resource tEntityOccurrence = m.createResource(NIF.TYPE_ENTITY_OCCURRENCE);
final Resource tOffsetBasedString = m.createResource(NIF.TYPE_OFFSET_BASED_STRING);
-
+
final Property pReferenceContext = m.createProperty(NIF.PROP_REFERENCE_CONTEXT);
final Property pIsString = m.createProperty(NIF.PROP_IS_STRING);
final Property pAnchorOf = m.createProperty(NIF.PROP_ANCHOR_OF);
@@ -73,7 +73,7 @@ public static void convert(JCas aJCas, OntModel aTarget)
DocumentMetaData dmd = DocumentMetaData.get(aJCas);
String docuri = dmd.getDocumentUri() != null ? dmd.getDocumentUri()
: "urn:" + dmd.getDocumentId();
-
+
// Convert document -> context node
Individual context;
{
@@ -81,14 +81,12 @@ public static void convert(JCas aJCas, OntModel aTarget)
aJCas.getDocumentText().length());
context = m.createIndividual(uri, tContext);
context.addRDFType(tOffsetBasedString);
- context.addLiteral(pIsString,
- m.createTypedLiteral(aJCas.getDocumentText(), XSDstring));
- context.addLiteral(pBeginIndex,
- m.createTypedLiteral(0, XSDnonNegativeInteger));
+ context.addLiteral(pIsString, m.createTypedLiteral(aJCas.getDocumentText(), XSDstring));
+ context.addLiteral(pBeginIndex, m.createTypedLiteral(0, XSDnonNegativeInteger));
context.addLiteral(pEndIndex,
m.createTypedLiteral(aJCas.getDocumentText().length(), XSDnonNegativeInteger));
}
-
+
// Convert headings/titles
for (Heading uimaHeading : select(aJCas, Heading.class)) {
String headingUri = String.format("%s#offset_%d_%d", docuri, uimaHeading.getBegin(),
@@ -97,7 +95,7 @@ public static void convert(JCas aJCas, OntModel aTarget)
nifTitle.addRDFType(tOffsetBasedString);
nifTitle.addProperty(pReferenceContext, context);
nifTitle.addLiteral(pAnchorOf, uimaHeading.getCoveredText());
- nifTitle.addLiteral(pBeginIndex,
+ nifTitle.addLiteral(pBeginIndex,
m.createTypedLiteral(uimaHeading.getBegin(), XSDnonNegativeInteger));
nifTitle.addLiteral(pEndIndex,
m.createTypedLiteral(uimaHeading.getEnd(), XSDnonNegativeInteger));
@@ -111,12 +109,12 @@ public static void convert(JCas aJCas, OntModel aTarget)
nifParagraph.addRDFType(tOffsetBasedString);
nifParagraph.addProperty(pReferenceContext, context);
nifParagraph.addLiteral(pAnchorOf, uimaParagraph.getCoveredText());
- nifParagraph.addLiteral(pBeginIndex,
+ nifParagraph.addLiteral(pBeginIndex,
m.createTypedLiteral(uimaParagraph.getBegin(), XSDnonNegativeInteger));
nifParagraph.addLiteral(pEndIndex,
m.createTypedLiteral(uimaParagraph.getEnd(), XSDnonNegativeInteger));
}
-
+
// Convert sentences
Individual previousNifSentence = null;
for (Sentence uimaSentence : select(aJCas, Sentence.class)) {
@@ -126,18 +124,18 @@ public static void convert(JCas aJCas, OntModel aTarget)
nifSentence.addRDFType(tOffsetBasedString);
nifSentence.addProperty(pReferenceContext, context);
nifSentence.addLiteral(pAnchorOf, uimaSentence.getCoveredText());
- nifSentence.addLiteral(pBeginIndex,
+ nifSentence.addLiteral(pBeginIndex,
m.createTypedLiteral(uimaSentence.getBegin(), XSDnonNegativeInteger));
nifSentence.addLiteral(pEndIndex,
m.createTypedLiteral(uimaSentence.getEnd(), XSDnonNegativeInteger));
-
+
// Link word sequence
if (previousNifSentence != null) {
previousNifSentence.addProperty(pNextSentence, nifSentence);
nifSentence.addProperty(pPreviousSentence, previousNifSentence);
}
previousNifSentence = nifSentence;
-
+
// Convert tokens
Individual previousNifWord = null;
for (Token uimaToken : selectCovered(Token.class, uimaSentence)) {
@@ -147,39 +145,39 @@ public static void convert(JCas aJCas, OntModel aTarget)
nifWord.addRDFType(tOffsetBasedString);
nifWord.addProperty(pReferenceContext, context);
nifWord.addLiteral(pAnchorOf, uimaToken.getText());
- nifWord.addLiteral(pBeginIndex,
+ nifWord.addLiteral(pBeginIndex,
m.createTypedLiteral(uimaToken.getBegin(), XSDnonNegativeInteger));
nifWord.addLiteral(pEndIndex,
m.createTypedLiteral(uimaToken.getEnd(), XSDnonNegativeInteger));
-
+
// Link sentence <-> word
nifWord.addProperty(pSentence, nifSentence);
nifSentence.addProperty(pWord, nifWord);
-
+
// Link word sequence
if (previousNifWord != null) {
previousNifWord.addProperty(pNextWord, nifWord);
nifWord.addProperty(pPreviousWord, previousNifWord);
}
previousNifWord = nifWord;
-
+
// Convert stem
if (uimaToken.getStemValue() != null) {
nifWord.addProperty(pStem, uimaToken.getStemValue());
}
-
+
// Convert lemma
if (uimaToken.getLemmaValue() != null) {
nifWord.addProperty(pLemma, uimaToken.getLemmaValue());
}
-
+
// Convert posTag (this is discouraged, the better alternative should be oliaLink)
if (uimaToken.getPosValue() != null) {
nifWord.addProperty(pPosTag, uimaToken.getPosValue());
}
}
}
-
+
// Convert named entities
//
// Actually, the named entity in NIF is different from the one in DKPro Core. NIF uses
@@ -188,36 +186,36 @@ public static void convert(JCas aJCas, OntModel aTarget)
// uses, we'd need a named entity linker, not just a recognizer.
//
// We create NEs using the NIF 2.1 class "EntityOccurence".
- //
+ //
// So here, we check if the DKPro Core NE value/identifier looks like a URI and if yes, then
// we store it into the NIF taIdentRef property - otherwise we ignore it because NIF does
// not have the concept of a NE category.
for (NamedEntity uimaNamedEntity : select(aJCas, NamedEntity.class)) {
String neClass = uimaNamedEntity.getValue();
String neIdentifier = uimaNamedEntity.getIdentifier();
-
+
boolean neClassIsUri = neClass != null && IRIs.check(neClass);
boolean neIdentifierIsUri = neIdentifier != null && IRIs.check(neIdentifier);
-
+
if (!neClassIsUri && !neIdentifierIsUri) {
continue;
}
-
+
String neUri = String.format("%s#offset_%d_%d", docuri, uimaNamedEntity.getBegin(),
uimaNamedEntity.getEnd());
Individual nifNamedEntity = m.createIndividual(neUri, tEntityOccurrence);
nifNamedEntity.addRDFType(tOffsetBasedString);
nifNamedEntity.addProperty(pReferenceContext, context);
nifNamedEntity.addLiteral(pAnchorOf, uimaNamedEntity.getCoveredText());
- nifNamedEntity.addLiteral(pBeginIndex,
+ nifNamedEntity.addLiteral(pBeginIndex,
m.createTypedLiteral(uimaNamedEntity.getBegin(), XSDnonNegativeInteger));
nifNamedEntity.addLiteral(pEndIndex,
m.createTypedLiteral(uimaNamedEntity.getEnd(), XSDnonNegativeInteger));
-
+
if (neClassIsUri) {
nifNamedEntity.addProperty(pTaClassRef, m.createResource(neClass));
}
-
+
if (neIdentifierIsUri) {
nifNamedEntity.addProperty(pTaIdentRef, m.createResource(neIdentifier));
}
diff --git a/inception/inception-io-nif/src/main/java/org/dkpro/core/io/nif/internal/ITS.java b/inception/inception-io-nif/src/main/java/org/dkpro/core/io/nif/internal/ITS.java
index 693dbdbe1fa..1c9243fb2f3 100644
--- a/inception/inception-io-nif/src/main/java/org/dkpro/core/io/nif/internal/ITS.java
+++ b/inception/inception-io-nif/src/main/java/org/dkpro/core/io/nif/internal/ITS.java
@@ -25,10 +25,10 @@
public class ITS
{
public static final String PREFIX_ITS = "itsrdf";
-
+
public static final String NS_ITS = "http://www.w3.org/2005/11/its/rdf#";
public static final String PROP_TA_IDENT_REF = NS_ITS + "taIdentRef";
-
+
public static final String PROP_TA_CLASS_REF = NS_ITS + "taClassRef";
}
diff --git a/inception/inception-io-nif/src/main/java/org/dkpro/core/io/nif/internal/NIF.java b/inception/inception-io-nif/src/main/java/org/dkpro/core/io/nif/internal/NIF.java
index f9c760bd8f7..b48446a48c3 100644
--- a/inception/inception-io-nif/src/main/java/org/dkpro/core/io/nif/internal/NIF.java
+++ b/inception/inception-io-nif/src/main/java/org/dkpro/core/io/nif/internal/NIF.java
@@ -30,7 +30,7 @@
public class NIF
{
public static final String PREFIX_NIF = "nif";
-
+
public static final String NS_NIF = "http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#";
/**
@@ -41,7 +41,7 @@ public class NIF
* first character of a text).
*/
public static final String PROP_BEGIN_INDEX = NS_NIF + "beginIndex";
-
+
/**
* The end index of a character range as defined in
* RFC 5147 Section 2.2.1 and
@@ -50,7 +50,7 @@ public class NIF
* first character of a text).
*/
public static final String PROP_END_INDEX = NS_NIF + "endIndex";
-
+
/**
* Links a URI of a string to its reference context of type nif:Context. The reference context
* determines the calculation of begin and end index
@@ -58,7 +58,7 @@ public class NIF
* Each String that is not an instance of nif:Context MUST have exactly one reference context.
*/
public static final String PROP_REFERENCE_CONTEXT = NS_NIF + "referenceContext";
-
+
/**
* The reference text as rdf:Literal for this nif:Context resource.
*
@@ -71,33 +71,33 @@ public class NIF
* character position and indices.
*/
public static final String PROP_IS_STRING = NS_NIF + "isString";
-
+
/**
* The string, which the URI is representing as an RDF Literal. Some use cases require this
* property, as it is necessary for certain sparql queries.
*/
public static final String PROP_ANCHOR_OF = NS_NIF + "anchorOf";
-
+
/**
* This property links sentences to their words.
*/
public static final String PROP_WORD = NS_NIF + "word";
-
+
/**
* See nif:nextSentence
*/
public static final String PROP_NEXT_WORD = NS_NIF + "nextWord";
-
+
/**
* see nif:nextSentence
*/
public static final String PROP_PREVIOUS_WORD = NS_NIF + "previousWord";
-
+
/**
* This property links words and other structures to their sentence.
*/
public static final String PROP_SENTENCE = NS_NIF + "sentence";
-
+
/**
* This property (and nif:previousSentence, nif:nextWord, nif:previousWord and their transitive
* extension) can be used to make resources of nif:Sentence and nif:Word traversable, it can not
@@ -107,22 +107,22 @@ public class NIF
* skos:broader and skos:broaderTransitive.
*/
public static final String PROP_NEXT_SENTENCE = NS_NIF + "nextSentence";
-
+
/**
* see nif:nextSentence
*/
public static final String PROP_PREVIOUS_SENTENCE = NS_NIF + "previousSentence";
-
+
/**
* The lemma(s) of the nif:String.
*/
public static final String PROP_LEMMA = NS_NIF + "lemma";
-
+
/**
* The stem(s) of the nif:String.
*/
public static final String PROP_STEM = NS_NIF + "stem";
-
+
/**
* To include the pos tag as it comes out of the NLP tool as RDF Literal. This property is
* discouraged to use alone, please use oliaLink and oliaCategory. We included it, because some
@@ -133,7 +133,7 @@ public class NIF
*/
@Deprecated
public static final String PROP_POS_TAG = NS_NIF + "posTag";
-
+
/**
* The confidence of an annotation as decimal between 0 and 1.
*/
@@ -150,7 +150,7 @@ public class NIF
* 2.0 Core Ontology
*/
public static final String PROP_TA_MS_CLASS_REF = NS_NIF + "taMsClassRef";
-
+
/**
* A title within a text.
*
@@ -159,7 +159,7 @@ public class NIF
* 2.0 Core Ontology
*/
public static final String TYPE_TITLE = NS_NIF + "Title";
-
+
/**
* A paragraph.
*
@@ -168,7 +168,7 @@ public class NIF
* 2.0 Core Ontology
*/
public static final String TYPE_PARAGRAPH = NS_NIF + "Paragraph";
-
+
/**
* The Word class represents strings that are tokens or words. A string is a Word, if it is a
* word. We don't nitpic about whether it is a a pronoun, a name, a punctuation mark or an
@@ -210,14 +210,16 @@ public class NIF
* 2.0 Core Ontology
*/
public static final String TYPE_WORD = NS_NIF + "Word";
-
+
/**
* A sentence.
*
- * @see NIF 2.0 Core Ontology
+ * @see NIF
+ * 2.0 Core Ontology
*/
public static final String TYPE_SENTENCE = NS_NIF + "Sentence";
-
+
/**
* The string that serves as a context for its substrings. The Unicode String given in the
* nif:isString property must be used to calculate the begin and endIndex for all nif:Strings
@@ -229,7 +231,7 @@ public class NIF
* 2.0 Core Ontology
*/
public static final String TYPE_CONTEXT = NS_NIF + "Context";
-
+
/**
* Individuals of this class are a string, i.e. Unicode characters, who have been given a URI
* and are used in the subject of an RDF statement.
@@ -246,7 +248,7 @@ public class NIF
* 2.0 Core Ontology
*/
public static final String TYPE_STRING = NS_NIF + "String";
-
+
/**
* A nif:Phrase can be a nif:String, that is a chunk of several words or a word itself (e.g. a
* NounPhrase as a Named Entity). The term is underspecified and can be compatible with many
@@ -258,7 +260,7 @@ public class NIF
* 2.0 Core Ontology
*/
public static final String TYPE_PHRASE = NS_NIF + "Phrase";
-
+
/**
* cf. Linked-Data Aware URI Schemes for Referencing Text Fragments by Sebastian Hellmann, Jens
* Lehmann und Sören Auer in EKAW 2012 http://jens-lehmann.org/files/2012/ekaw_nif.pdf
@@ -270,15 +272,14 @@ public class NIF
* 2.0 Core Ontology
*/
public static final String TYPE_OFFSET_BASED_STRING = NS_NIF + "OffsetBasedString";
-
+
/**
* Text span annotation denoting that a word or phrase has been detected as occurrence of a
* named entity. (Use this without further annotation property assertions if you just want to
* express the detection of the occurrence when neither the mentioned entity nor its category
* was identified.)
*
- * @see NIF
+ * @see NIF
* 2.1 Core Ontology
*/
public static final String TYPE_ENTITY_OCCURRENCE = NS_NIF + "EntityOccurrence";
diff --git a/inception/inception-io-nif/src/main/java/org/dkpro/core/io/nif/internal/Nif2DKPro.java b/inception/inception-io-nif/src/main/java/org/dkpro/core/io/nif/internal/Nif2DKPro.java
index cd20ed34e41..6bec99760ef 100644
--- a/inception/inception-io-nif/src/main/java/org/dkpro/core/io/nif/internal/Nif2DKPro.java
+++ b/inception/inception-io-nif/src/main/java/org/dkpro/core/io/nif/internal/Nif2DKPro.java
@@ -51,7 +51,7 @@ public void setPosMappingProvider(MappingProvider aPosMappingProvider)
{
posMappingProvider = aPosMappingProvider;
}
-
+
public void convert(Statement aContext, JCas aJCas)
{
Model m = aContext.getModel();
@@ -60,7 +60,7 @@ public void convert(Statement aContext, JCas aJCas)
final Resource tWord = m.createResource(NIF.TYPE_WORD);
final Resource tTitle = m.createResource(NIF.TYPE_TITLE);
final Resource tParagraph = m.createResource(NIF.TYPE_PARAGRAPH);
-
+
final Property pReferenceContext = m.createProperty(NIF.PROP_REFERENCE_CONTEXT);
final Property pIsString = m.createProperty(NIF.PROP_IS_STRING);
final Property pBeginIndex = m.createProperty(NIF.PROP_BEGIN_INDEX);
@@ -73,44 +73,39 @@ public void convert(Statement aContext, JCas aJCas)
final Property pTaClassRef = m.createProperty(ITS.PROP_TA_CLASS_REF);
// Convert context node -> document text
- String text = m
- .getProperty(aContext.getSubject(), pIsString)
- .getString();
+ String text = m.getProperty(aContext.getSubject(), pIsString).getString();
aJCas.setDocumentText(text);
// Convert headings/titles
- Iterator+
- -
+