Skip to content

Commit

Permalink
#4498 - Support exporting in brat format
Browse files Browse the repository at this point in the history
- Enable reading for the brat custom format
- Try improving the matching of brat data to the actual type system defined in the project when reading
- Use short type and attribute names when writing
  • Loading branch information
reckart committed Mar 25, 2024
1 parent a7209a1 commit 0b4f8e5
Show file tree
Hide file tree
Showing 10 changed files with 255 additions and 87 deletions.
18 changes: 18 additions & 0 deletions inception/inception-io-brat/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,18 @@
<groupId>de.tudarmstadt.ukp.inception.app</groupId>
<artifactId>inception-api-formats</artifactId>
</dependency>
<dependency>
<groupId>de.tudarmstadt.ukp.inception.app</groupId>
<artifactId>inception-api-annotation</artifactId>
</dependency>
<dependency>
<groupId>de.tudarmstadt.ukp.inception.app</groupId>
<artifactId>inception-model</artifactId>
</dependency>
<dependency>
<groupId>de.tudarmstadt.ukp.inception.app</groupId>
<artifactId>inception-schema-api</artifactId>
</dependency>

<dependency>
<groupId>org.springframework</groupId>
Expand Down Expand Up @@ -104,5 +112,15 @@
<artifactId>inception-support</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.dkpro.core</groupId>
<artifactId>dkpro-core-api-ner-asl</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.dkpro.core</groupId>
<artifactId>dkpro-core-api-metadata-asl</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
*/
package de.tudarmstadt.ukp.inception.io.brat;

import static de.tudarmstadt.ukp.inception.annotation.layer.relation.RelationLayerSupport.FEAT_REL_SOURCE;
import static de.tudarmstadt.ukp.inception.annotation.layer.relation.RelationLayerSupport.FEAT_REL_TARGET;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
import static org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription;

Expand All @@ -28,9 +30,11 @@

import de.tudarmstadt.ukp.clarin.webanno.api.format.FormatSupport;
import de.tudarmstadt.ukp.clarin.webanno.model.Project;
import de.tudarmstadt.ukp.inception.annotation.layer.relation.RelationLayerSupport;
import de.tudarmstadt.ukp.inception.io.brat.config.BratAutoConfiguration;
import de.tudarmstadt.ukp.inception.io.brat.dkprocore.BratReader;
import de.tudarmstadt.ukp.inception.io.brat.dkprocore.BratWriter;
import de.tudarmstadt.ukp.inception.schema.api.AnnotationSchemaService;

/**
* Support for brat format.
Expand All @@ -45,6 +49,13 @@ public class BratCustomFormatSupport
public static final String ID = "bratCustom";
public static final String NAME = "brat custom (experimental)";

private final AnnotationSchemaService schemaService;

public BratCustomFormatSupport(AnnotationSchemaService aSchemaService)
{
schemaService = aSchemaService;
}

@Override
public String getId()
{
Expand All @@ -66,15 +77,16 @@ public boolean isReadable()
@Override
public boolean isWritable()
{
return false;
return true;
}

@Override
public CollectionReaderDescription getReaderDescription(Project aProject,
TypeSystemDescription aTSD)
throws ResourceInitializationException
{
return createReaderDescription(BratReader.class, aTSD, //
return createReaderDescription( //
BratReader.class, aTSD, //
BratReader.PARAM_LENIENT, true);
}

Expand All @@ -83,6 +95,17 @@ public AnalysisEngineDescription getWriterDescription(Project aProject,
TypeSystemDescription aTSD, CAS aCAS)
throws ResourceInitializationException
{
return createEngineDescription(BratWriter.class, aTSD);
var layers = schemaService.listAnnotationLayer(aProject);

var relationLayerMappings = layers.stream() //
.filter(layer -> RelationLayerSupport.TYPE.equals(layer.getType()))
.map(layer -> layer.getName() + ":" + FEAT_REL_SOURCE + ":" + FEAT_REL_TARGET)
.toList();

return createEngineDescription( //
BratWriter.class, aTSD, //
BratWriter.PARAM_SHORT_TYPE_NAMES, true, //
BratWriter.PARAM_SHORT_ATTRIBUTE_NAMES, true, //
BratWriter.PARAM_RELATION_TYPES, relationLayerMappings);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

import de.tudarmstadt.ukp.inception.io.brat.BratBasicFormatSupport;
import de.tudarmstadt.ukp.inception.io.brat.BratCustomFormatSupport;
import de.tudarmstadt.ukp.inception.schema.api.AnnotationSchemaService;

@Configuration
public class BratAutoConfiguration
Expand All @@ -38,8 +39,8 @@ public BratBasicFormatSupport bratBasicFormatSupport()
@Bean
@ConditionalOnProperty(prefix = "format.brat-custom", name = "enabled", //
havingValue = "true", matchIfMissing = false)
public BratCustomFormatSupport bratCustomFormatSupport()
public BratCustomFormatSupport bratCustomFormatSupport(AnnotationSchemaService aSchemaService)
{
return new BratCustomFormatSupport();
return new BratCustomFormatSupport(aSchemaService);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,8 @@
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.ObjectMapper;

import de.tudarmstadt.ukp.inception.io.brat.dkprocore.internal.mapping.CommentMapping;
import de.tudarmstadt.ukp.inception.io.brat.dkprocore.internal.mapping.Mapping;
import de.tudarmstadt.ukp.inception.io.brat.dkprocore.internal.mapping.RelationMapping;
import de.tudarmstadt.ukp.inception.io.brat.dkprocore.internal.mapping.SpanMapping;
import de.tudarmstadt.ukp.inception.io.brat.dkprocore.internal.mapping.TypeMapping;
import de.tudarmstadt.ukp.inception.io.brat.dkprocore.internal.model.BratAnnotation;
import de.tudarmstadt.ukp.inception.io.brat.dkprocore.internal.model.BratAnnotationDocument;
Expand All @@ -72,7 +70,6 @@
import de.tudarmstadt.ukp.inception.io.brat.dkprocore.internal.model.BratNoteAnnotation;
import de.tudarmstadt.ukp.inception.io.brat.dkprocore.internal.model.BratRelationAnnotation;
import de.tudarmstadt.ukp.inception.io.brat.dkprocore.internal.model.BratTextAnnotation;
import de.tudarmstadt.ukp.inception.io.brat.dkprocore.internal.model.Offsets;

/**
* Reader for the brat format.
Expand Down Expand Up @@ -118,14 +115,19 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
{
super.initialize(aContext);

var mapper = new ObjectMapper();
mapper.setDefaultSetterInfo(JsonSetter.Value.forContentNulls(Nulls.AS_EMPTY));
mapper.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, true);
try {
mapping = mapper.readValue(mappingJson, Mapping.class);
if (mappingJson != null) {
var mapper = new ObjectMapper();
mapper.setDefaultSetterInfo(JsonSetter.Value.forContentNulls(Nulls.AS_EMPTY));
mapper.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, true);
try {
mapping = mapper.readValue(mappingJson, Mapping.class);
}
catch (IOException e) {
throw new ResourceInitializationException(e);
}
}
catch (IOException e) {
throw new ResourceInitializationException(e);
else {
mapping = new Mapping();
}

warnings = new LinkedHashSet<String>();
Expand Down Expand Up @@ -203,15 +205,15 @@ private void readAnnotations(JCas aJCas, InputStream aIs) throws IOException
doc = BratAnnotationDocument.read(r);
}

CAS cas = aJCas.getCas();
TypeSystem ts = aJCas.getTypeSystem();
var cas = aJCas.getCas();
var ts = aJCas.getTypeSystem();

List<BratRelationAnnotation> relations = new ArrayList<>();
List<BratEventAnnotation> events = new ArrayList<>();
List<BratNoteAnnotation> notes = new ArrayList<>();
for (BratAnnotation anno : doc.getAnnotations()) {
var relations = new ArrayList<BratRelationAnnotation>();
var events = new ArrayList<BratEventAnnotation>();
var notes = new ArrayList<BratNoteAnnotation>();
for (var anno : doc.getAnnotations()) {
if (anno instanceof BratTextAnnotation) {
Type type = mapping.getTextTypeMapppings().getUimaType(ts, anno);
var type = mapping.getTextTypeMapppings().getUimaType(ts, anno);
create(cas, type, (BratTextAnnotation) anno);
}
else if (anno instanceof BratRelationAnnotation) {
Expand All @@ -221,7 +223,7 @@ else if (anno instanceof BratNoteAnnotation) {
notes.add((BratNoteAnnotation) anno);
}
else if (anno instanceof BratEventAnnotation) {
Type type = mapping.getTextTypeMapppings().getUimaType(ts, anno);
var type = mapping.getTextTypeMapppings().getUimaType(ts, anno);
create(cas, type, (BratEventAnnotation) anno);
events.add((BratEventAnnotation) anno);
}
Expand All @@ -232,32 +234,32 @@ else if (anno instanceof BratEventAnnotation) {
}

// Go through the relations now
for (BratRelationAnnotation rel : relations) {
Type type = mapping.getRelationTypeMapppings().getUimaType(ts, rel);
for (var rel : relations) {
var type = mapping.getRelationTypeMapppings().getUimaType(ts, rel);
create(cas, type, rel);
}

// Go through the events again and handle the slots
for (BratEventAnnotation e : events) {
Type type = mapping.getTextTypeMapppings().getUimaType(ts, e);
fillSlots(cas, type, doc, e);
for (var event : events) {
var type = mapping.getTextTypeMapppings().getUimaType(ts, event);
fillSlots(cas, type, doc, event);
}

// Finally go through the notes and map them to features (if configured to do so)
for (BratNoteAnnotation n : notes) {
FeatureStructure anno = idMap.get(n.getTarget());
for (var n : notes) {
var anno = idMap.get(n.getTarget());

Type type = anno.getType();
Collection<CommentMapping> mappings = mapping.getCommentMapping(type.getName());
var type = anno.getType();
var mappings = mapping.getCommentMapping(type.getName());

if (mappings.isEmpty()) {
warnings.add("No comment mappings defined for note type [" + n.getType()
+ "] on annotation type [" + type.getName() + "]");
continue;
}

List<BratAttribute> attrs = new ArrayList<>();
for (CommentMapping m : mappings) {
var attrs = new ArrayList<BratAttribute>();
for (var m : mappings) {
if (m.matches(n.getNote())) {
attrs.add(new BratAttribute(-1, m.getFeature(), n.getTarget(), m.apply()));
}
Expand All @@ -275,11 +277,11 @@ private void readText(JCas aJCas, InputStream aIS) throws IOException

private void create(CAS aCAS, Type aType, BratTextAnnotation aAnno)
{
SpanMapping param = mapping.getSpanMapping(aType.getName());
TypeMapping tmap = mapping.getTextTypeMapppings().getMappingByBratType(aAnno.getType());
var param = mapping.getSpanMapping(aType.getName());
var tmap = mapping.getTextTypeMapppings().getMappingByBratType(aAnno.getType());

for (Offsets offset : aAnno.getOffsets()) {
AnnotationFS anno = aCAS.createAnnotation(aType, offset.getBegin(), offset.getEnd());
for (var offset : aAnno.getOffsets()) {
var anno = aCAS.createAnnotation(aType, offset.getBegin(), offset.getEnd());

if (tmap != null) {
fillDefaultAttributes(anno, tmap.getDefaultFeatureValues());
Expand All @@ -302,11 +304,11 @@ private void create(CAS aCAS, Type aType, BratTextAnnotation aAnno)

private void create(CAS aCAS, Type aType, BratEventAnnotation aAnno)
{
SpanMapping param = mapping.getSpanMapping(aType.getName());
TypeMapping tmap = mapping.getTextTypeMapppings().getMappingByBratType(aAnno.getType());
var param = mapping.getSpanMapping(aType.getName());
var tmap = mapping.getTextTypeMapppings().getMappingByBratType(aAnno.getType());

for (Offsets offset : aAnno.getTriggerAnnotation().getOffsets()) {
AnnotationFS anno = aCAS.createAnnotation(aType, offset.getBegin(), offset.getEnd());
for (var offset : aAnno.getTriggerAnnotation().getOffsets()) {
var anno = aCAS.createAnnotation(aType, offset.getBegin(), offset.getEnd());

if (tmap != null) {
fillDefaultAttributes(anno, tmap.getDefaultFeatureValues());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ public class BratWriter
* </code>.
*/
public static final String PARAM_RELATION_TYPES = "relationTypes";
@ConfigurationParameter(name = PARAM_RELATION_TYPES, mandatory = true)
@ConfigurationParameter(name = PARAM_RELATION_TYPES, defaultValue = {})
/*
* , defaultValue = { Dependency._TypeName + ":" + Dependency._FeatName_Governor + ":" +
* Dependency._FeatName_Dependent }
Expand Down Expand Up @@ -175,6 +175,13 @@ public class BratWriter
"#bc80bd", "#ccebc5", "#ffed6f" })
private String[] palette;

/**
* Whether to render types by their short name or by their qualified name.
*/
public static final String PARAM_SHORT_TYPE_NAMES = "shortTypeNames";
@ConfigurationParameter(name = PARAM_SHORT_TYPE_NAMES, mandatory = true, defaultValue = "false")
private boolean shortTypeNames;

/**
* Whether to render attributes by their short name or by their qualified name.
*/
Expand All @@ -201,6 +208,7 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
converter.setWriteNullAttributes(writeNullAttributes);
converter.setWriteRelationAttributes(writeRelationAttributes);
converter.setShortAttributeNames(shortAttributeNames);
converter.setShortTypeNames(shortTypeNames);
converter.setPalette(palette);
converter.setExcludeTypes(excludeTypes);
converter.setSpanTypes(spanTypes);
Expand Down
Loading

0 comments on commit 0b4f8e5

Please sign in to comment.