-
Notifications
You must be signed in to change notification settings - Fork 156
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* 0.14.x: #1587 - Disable fact linking support #1587 - Disable fact linking support No issue. Depend on WebAnno 4.0.0 beta 9 for next release. #1587 - Disable fact linking support #1582 - Support IMS CWB format #1582 - Support IMS CWB format #1581 - Update and modularize format documentation % Conflicts: % pom.xml
- Loading branch information
Showing
12 changed files
with
441 additions
and
108 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
72 changes: 72 additions & 0 deletions
72
...on-app-webapp/src/main/java/de/tudarmstadt/ukp/inception/formats/ImsCwbFormatSupport.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
/* | ||
* Copyright 2019 | ||
* Ubiquitous Knowledge Processing (UKP) Lab | ||
* Technische Universität Darmstadt | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package de.tudarmstadt.ukp.inception.formats; | ||
|
||
import static org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription; | ||
|
||
import org.apache.uima.collection.CollectionReaderDescription; | ||
import org.apache.uima.resource.ResourceInitializationException; | ||
import org.dkpro.core.io.imscwb.ImsCwbReader; | ||
import org.springframework.stereotype.Component; | ||
|
||
import de.tudarmstadt.ukp.clarin.webanno.api.format.FormatSupport; | ||
|
||
@Component | ||
public class ImsCwbFormatSupport | ||
implements FormatSupport | ||
{ | ||
public static final String ID = "imscwb"; | ||
public static final String NAME = "Corpus Workbench Format (aka VRT)"; | ||
|
||
@Override | ||
public String getId() | ||
{ | ||
return ID; | ||
} | ||
|
||
@Override | ||
public String getName() | ||
{ | ||
return NAME; | ||
} | ||
|
||
@Override | ||
public boolean isReadable() | ||
{ | ||
return true; | ||
} | ||
|
||
// @Override | ||
// public boolean isWritable() | ||
// { | ||
// return true; | ||
// } | ||
|
||
@Override | ||
public CollectionReaderDescription getReaderDescription() throws ResourceInitializationException | ||
{ | ||
return createReaderDescription(ImsCwbReader.class); | ||
} | ||
|
||
// @Override | ||
// public AnalysisEngineDescription getWriterDescription(Project aProject, CAS aCAS) | ||
// throws ResourceInitializationException | ||
// { | ||
// return createEngineDescription(ImsCwbWriter.class); | ||
// } | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
76 changes: 76 additions & 0 deletions
76
inception-doc/src/main/resources/META-INF/asciidoc/user-guide/formats-imscwb.adoc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
// Copyright 2019 | ||
// Ubiquitous Knowledge Processing (UKP) Lab and FG Language Technology | ||
// Technische Universität Darmstadt | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
[[sect_formats_imscwb]] | ||
= IMS CWB (aka VRT) | ||
|
||
The "verticalized XML" format used by the link:http://cwb.sourceforge.net[IMS Open Corpus Workbench], | ||
a linguistic search engine. It uses a tab-separated format with limited markup (e.g. for sentences, | ||
documents, but not recursive structures like parse-trees). In principle, it is a generic format - | ||
i.e. there can be arbitrary columns, pseudo-XML elements and attributes. However, support is limited | ||
to a specific set of columns that must appear exactly in a specific order: *token text*, | ||
*part-of-speech tag*, *lemma*. Also only specific pseudo-XML elements and attributes are supported: | ||
`text` (including an `id` attribute), `s`. | ||
|
||
[cols="2,1,1,1,3"] | ||
|==== | ||
| Format | Read | Write | Custom Layers | Description | ||
|
||
| IMS CWB VRT | ||
| yes | ||
| no | ||
| no | ||
| | ||
|==== | ||
|
||
.Example | ||
[source,text] | ||
---- | ||
<text id="http://www.epguides.de/nikita.htm"> | ||
<s> | ||
Nikita NE Nikita | ||
( $( ( | ||
La FM La | ||
Femme NN Femme | ||
Nikita NE Nikita | ||
) $( ) | ||
Dieser PDS dies | ||
Episodenführer NN Episodenführer | ||
wurde VAFIN werden | ||
von APPR von | ||
September NN September | ||
1998 CARD 1998 | ||
bis APPR bis | ||
Mai NN Mai | ||
1999 CARD 1999 | ||
von APPR von | ||
Konstantin NE Konstantin | ||
C.W. NE C.W. | ||
Volkmann NE Volkmann | ||
geschrieben VVPP schreiben | ||
und KON und | ||
im APPRART im | ||
Mai NN Mai | ||
2000 CARD 2000 | ||
von APPR von | ||
Stefan NE Stefan | ||
Börzel NN Börzel | ||
übernommen VVPP übernehmen | ||
. $. . | ||
</s> | ||
</text> | ||
---- | ||
|
32 changes: 32 additions & 0 deletions
32
...tion-doc/src/main/resources/META-INF/asciidoc/user-guide/formats-inlinexml.adoc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
// Copyright 2019 | ||
// Ubiquitous Knowledge Processing (UKP) Lab and FG Language Technology | ||
// Technische Universität Darmstadt | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
[[sect_formats_inlinexml]] | ||
= Inline XML | ||
|
||
Tries its best to export the annotations into an inline XML representation. Overlapping annotations are not supported in this format and are silently discarded during export. | ||
|
||
[cols="2,1,1,1,3"] | ||
|==== | ||
| Format | Read | Write | Custom Layers | Description | ||
|
||
| link:https://uima.apache.org/d/uimaj-current/apidocs/org/apache/uima/util/CasToInlineXml.html[UIMA Inline XML] | ||
| no | ||
| yes | ||
| yes | ||
| | ||
|==== | ||
|
52 changes: 52 additions & 0 deletions
52
inception-doc/src/main/resources/META-INF/asciidoc/user-guide/formats-lif.adoc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
// Copyright 2019 | ||
// Ubiquitous Knowledge Processing (UKP) Lab and FG Language Technology | ||
// Technische Universität Darmstadt | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
[[sect_formats_lif]] | ||
= LAPPS Interchange Format | ||
|
||
The link:https://wiki.lappsgrid.org/interchange/[LAPPS Interchange Format] (LIF) is a JSON-based format which is used by the link:http://www.lappsgrid.org[Language Applications Grid]. The the format is in principle generic, the support for it is based on the link:http://vocab.lappsgrid.org[LAPPS Web Service Exchange Vocabulary]. | ||
|
||
[cols="2,1,1,1,3"] | ||
|==== | ||
| Format | Read | Write | Custom Layers | Description | ||
|
||
| link:https://wiki.lappsgrid.org/interchange/[LIF] | ||
| yes | ||
| yes | ||
| no | ||
| | ||
|==== | ||
|
||
.Example | ||
[source,text] | ||
---- | ||
{ | ||
"id": "v2", | ||
"metadata": { | ||
"contains": { | ||
"Token": { | ||
"producer": "org.anc.lapps.stanford.SATokenizer:1.4.0", | ||
"type": "tokenization:stanford" }, | ||
"Token#pos": { | ||
"producer": "org.anc.lapps.stanford.SATagger:1.4.0", | ||
"posTagSet": "penn", | ||
"type": "postagging:stanford" }}}, | ||
"annotations": [ | ||
{ "@type": "Token", "id": "tok0", "start": 0, "end": 4, "features": { "pos": "NNP" } }, | ||
{ "@type": "Token", "id": "tok1", "start": 5, "end": 10, "features": { "pos": "VBZ" } }, | ||
{ "@type": "Token", "id": "tok2", "start": 10, "end": 11, "features": { "pos": "." } } ] | ||
} | ||
---- |
70 changes: 70 additions & 0 deletions
70
inception-doc/src/main/resources/META-INF/asciidoc/user-guide/formats-nif.adoc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
// Copyright 2019 | ||
// Ubiquitous Knowledge Processing (UKP) Lab and FG Language Technology | ||
// Technische Universität Darmstadt | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
[[sect_formats_nif]] | ||
= NLP Interchange Format | ||
|
||
The link:https://persistence.uni-leipzig.org/nlp2rdf/[NLP Interchange Format] (NIF) provides a way of representing NLP information using semantic web technology, specifically RDF and OWL. A few additions of the format were defined in the apparently in-official link:https://nif.readthedocs.io/en/latest/[NIF 2.1] specification. | ||
|
||
[cols="2,1,1,1,3"] | ||
|==== | ||
| Format | Read | Write | Custom Layers | Description | ||
|
||
| link:https://persistence.uni-leipzig.org/nlp2rdf/[NIF] | ||
| yes | ||
| yes | ||
| no | ||
| | ||
|==== | ||
|
||
.Example | ||
[source,text] | ||
---- | ||
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . | ||
@prefix nif: <http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#> . | ||
@prefix itsrdf: <http://www.w3.org/2005/11/its/rdf#> . | ||
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . | ||
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . | ||
<http://example.org/document0#char=0,86> | ||
a nif:RFC5147String , nif:String , nif:Context ; | ||
nif:beginIndex "0"^^xsd:nonNegativeInteger ; | ||
nif:endIndex "86"^^xsd:nonNegativeInteger ; | ||
nif:isString "Japan (Japanese: 日本 Nippon or Nihon) is a stratovolcanic archipelago of 6,852 islands."^^xsd:string ; | ||
nif:topic <http://example.org/document0#annotation0> . | ||
<http://example.org/document0#char=0,5> | ||
a nif:RFC5147String , nif:String ; | ||
nif:anchorOf "Japan"^^xsd:string ; | ||
nif:beginIndex "0"^^xsd:nonNegativeInteger ; | ||
nif:endIndex "5"^^xsd:nonNegativeInteger ; | ||
nif:referenceContext <http://example.org/document0#char=0,86> ; | ||
itsrdf:taClassRef <http://example.org/Country> , <http://example.org/StratovolcanicArchipelago> ; | ||
itsrdf:taIdentRef <http://example.org/Japan> . | ||
<http://example.org/document0#char=42,68> | ||
a nif:RFC5147String , nif:String ; | ||
nif:anchorOf "stratovolcanic archipelago"^^xsd:string ; | ||
nif:beginIndex "42"^^xsd:nonNegativeInteger ; | ||
nif:endIndex "68"^^xsd:nonNegativeInteger ; | ||
nif:referenceContext <http://example.org/document0#char=0,86> ; | ||
itsrdf:taClassRef <http://example.org/Archipelago> , rdfs:Class ; | ||
itsrdf:taIdentRef <http://example.org/StratovolcanicArchipelago> . | ||
<http://example.org/document0#annotation0> | ||
a nif:Annotation ; | ||
itsrdf:taIdentRef <http://example.org/Geography> . | ||
---- |
Oops, something went wrong.