diff --git a/src/main/java/org/apache/uima/json/jsoncas2/model/FeatureStructures.java b/src/main/java/org/apache/uima/json/jsoncas2/model/FeatureStructures.java index 109050d..32479df 100644 --- a/src/main/java/org/apache/uima/json/jsoncas2/model/FeatureStructures.java +++ b/src/main/java/org/apache/uima/json/jsoncas2/model/FeatureStructures.java @@ -18,26 +18,40 @@ */ package org.apache.uima.json.jsoncas2.model; +import static java.util.Comparator.comparing; import static java.util.stream.Collectors.toList; import java.util.Collection; -import java.util.Comparator; +import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.Set; import org.apache.uima.cas.FeatureStructure; +import com.fasterxml.jackson.databind.DatabindContext; public class FeatureStructures implements Iterable { + public static final String ALL_FEATURE_STRUCTURES = "UIMA.AllFeatureStructures"; + private final List featureStructures; + private final Set typeNames; + public FeatureStructures(Collection aFeatureStructures) { + typeNames = new HashSet<>(); featureStructures = aFeatureStructures.stream() // - .sorted(Comparator.comparing(fs -> { - return fs.getType().getName(); - })) // + .map(fs -> { + typeNames.add(fs.getType().getName()); + return fs; + }) // + .sorted(comparing(fs -> fs.getType().getName())) // .collect(toList()); } + public boolean existsAnnotationOfType(String aTypeName) { + return typeNames.contains(aTypeName); + } + @Override public Iterator iterator() { return featureStructures.iterator(); @@ -46,4 +60,12 @@ public Iterator iterator() { public boolean isEmpty() { return featureStructures.isEmpty(); } + + public static void set(DatabindContext aProvider, FeatureStructures aAllFs) { + aProvider.setAttribute(ALL_FEATURE_STRUCTURES, aAllFs); + } + + public static FeatureStructures get(DatabindContext aProvider) { + return (FeatureStructures) aProvider.getAttribute(ALL_FEATURE_STRUCTURES); + } } diff --git a/src/main/java/org/apache/uima/json/jsoncas2/ser/CasSerializer.java b/src/main/java/org/apache/uima/json/jsoncas2/ser/CasSerializer.java index a5ebb10..6159864 100644 --- a/src/main/java/org/apache/uima/json/jsoncas2/ser/CasSerializer.java +++ b/src/main/java/org/apache/uima/json/jsoncas2/ser/CasSerializer.java @@ -68,9 +68,12 @@ public void serialize(CAS aCas, JsonGenerator aJg, SerializerProvider aProvider) serializeHeader(aCas, aJg, aProvider); + FeatureStructures allFSes = findAllFeatureStructures(aCas); + FeatureStructures.set(aProvider, allFSes); + serializeTypes(aCas, aJg, aProvider); - serializeFeatureStructures(aCas, aJg, aProvider); + serializeFeatureStructures(allFSes, aJg, aProvider); serializeViews(aCas, aJg, aProvider); @@ -94,13 +97,12 @@ private void serializeTypes(CAS aCas, JsonGenerator aJg, SerializerProvider aPro } } - private void serializeFeatureStructures(CAS aCas, JsonGenerator aJg, SerializerProvider aProvider) + private void serializeFeatureStructures(FeatureStructures aAllFSes, JsonGenerator aJg, SerializerProvider aProvider) throws IOException { - FeatureStructures allFSes = findAllFeatureStructures(aCas); - FeatureStructureToViewIndex.set(aProvider, new FeatureStructureToViewIndex(allFSes)); - if (!allFSes.isEmpty()) { + FeatureStructureToViewIndex.set(aProvider, new FeatureStructureToViewIndex(aAllFSes)); + if (!aAllFSes.isEmpty()) { aJg.writeFieldName(FEATURE_STRUCTURES_FIELD); - aProvider.defaultSerializeValue(allFSes, aJg); + aProvider.defaultSerializeValue(aAllFSes, aJg); } } diff --git a/src/main/java/org/apache/uima/json/jsoncas2/ser/TypeSerializer.java b/src/main/java/org/apache/uima/json/jsoncas2/ser/TypeSerializer.java index 75553d7..0b29172 100644 --- a/src/main/java/org/apache/uima/json/jsoncas2/ser/TypeSerializer.java +++ b/src/main/java/org/apache/uima/json/jsoncas2/ser/TypeSerializer.java @@ -20,6 +20,7 @@ import static java.util.stream.Collectors.toList; import static org.apache.uima.json.jsoncas2.JsonCas2Names.ELEMENT_TYPE_FIELD; +import static org.apache.uima.json.jsoncas2.JsonCas2Names.NAME_FIELD; import static org.apache.uima.json.jsoncas2.JsonCas2Names.SUPER_TYPE_FIELD; import java.io.IOException; @@ -28,8 +29,6 @@ import org.apache.uima.cas.Feature; import org.apache.uima.cas.Type; import org.apache.uima.cas.impl.TypeImpl; -import org.apache.uima.json.jsoncas2.JsonCas2Names; - import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.SerializerProvider; import com.fasterxml.jackson.databind.ser.std.StdSerializer; @@ -46,7 +45,7 @@ public void serialize(Type aType, JsonGenerator aJg, SerializerProvider aProvide throws IOException { aJg.writeStartObject(aType); - aJg.writeStringField(JsonCas2Names.NAME_FIELD, aType.getName()); + aJg.writeStringField(NAME_FIELD, aType.getName()); Type parent = ((TypeImpl) aType).getSuperType(); if (parent != null) { @@ -57,13 +56,12 @@ public void serialize(Type aType, JsonGenerator aJg, SerializerProvider aProvide aJg.writeStringField(ELEMENT_TYPE_FIELD, aType.getComponentType().getName()); } - List newFeatures = aType.getFeatures().stream().filter(f -> f.getDomain() == aType) + List localFeatures = aType.getFeatures().stream() // + .filter(f -> f.getDomain() == aType) // .collect(toList()); - if (!newFeatures.isEmpty()) { - for (Feature feature : newFeatures) { - aJg.writeFieldName(feature.getShortName()); - aProvider.defaultSerializeValue(feature, aJg); - } + for (Feature feature : localFeatures) { + aJg.writeFieldName(feature.getShortName()); + aProvider.defaultSerializeValue(feature, aJg); } aJg.writeEndObject(); diff --git a/src/main/java/org/apache/uima/json/jsoncas2/ser/TypeSystemSerializer.java b/src/main/java/org/apache/uima/json/jsoncas2/ser/TypeSystemSerializer.java index 7be06ca..03a0235 100644 --- a/src/main/java/org/apache/uima/json/jsoncas2/ser/TypeSystemSerializer.java +++ b/src/main/java/org/apache/uima/json/jsoncas2/ser/TypeSystemSerializer.java @@ -24,14 +24,19 @@ import static java.util.stream.Collectors.toList; import java.io.IOException; +import java.util.ArrayDeque; +import java.util.Deque; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.stream.StreamSupport; import org.apache.uima.cas.CAS; +import org.apache.uima.cas.Feature; import org.apache.uima.cas.Type; import org.apache.uima.cas.TypeSystem; +import org.apache.uima.json.jsoncas2.mode.TypeSystemMode; +import org.apache.uima.json.jsoncas2.model.FeatureStructures; import org.apache.uima.json.jsoncas2.ref.ReferenceCache; import com.fasterxml.jackson.core.JsonGenerator; @@ -41,7 +46,7 @@ public class TypeSystemSerializer extends StdSerializer { private static final long serialVersionUID = -4369127219437592227L; - private final Set BUILT_IN_TYPES = unmodifiableSet(new HashSet<>(asList( + private static final Set BUILT_IN_TYPES = unmodifiableSet(new HashSet<>(asList( CAS.TYPE_NAME_ANNOTATION, CAS.TYPE_NAME_ANNOTATION_BASE, CAS.TYPE_NAME_ARRAY_BASE, CAS.TYPE_NAME_BOOLEAN, CAS.TYPE_NAME_BOOLEAN_ARRAY, CAS.TYPE_NAME_BYTE, CAS.TYPE_NAME_BYTE_ARRAY, CAS.TYPE_NAME_DOCUMENT_ANNOTATION, CAS.TYPE_NAME_DOUBLE, @@ -65,11 +70,9 @@ public void serialize(TypeSystem aTypeSystem, JsonGenerator jg, SerializerProvid throws IOException { ReferenceCache refCache = ReferenceCache.get(aProvider); - jg.writeStartObject(aTypeSystem); + List types = findTypesToSerialize(aTypeSystem, aProvider); - List types = StreamSupport.stream(aTypeSystem.spliterator(), false) - .sorted(comparing(Type::getName)) - .filter(type -> !BUILT_IN_TYPES.contains(type.getName())).collect(toList()); + jg.writeStartObject(aTypeSystem); for (Type type : types) { jg.writeFieldName(refCache.typeRef(type)); @@ -79,4 +82,65 @@ public void serialize(TypeSystem aTypeSystem, JsonGenerator jg, SerializerProvid jg.writeEndObject(); } + + private List findTypesToSerialize(TypeSystem aTypeSystem, SerializerProvider aProvider) { + Iterable typesSource = aTypeSystem; + + if (TypeSystemMode.get(aProvider) == TypeSystemMode.MINIMAL) { + Deque queue = collectTypesUsedByFeatureStructures(aTypeSystem, aProvider); + typesSource = collectTypesUsedByTypes(aTypeSystem, typesSource, queue); + } + + return StreamSupport.stream(typesSource.spliterator(), false) // + .filter(type -> !type.isArray()) // + .filter(type -> !BUILT_IN_TYPES.contains(type.getName())) // + .sorted(comparing(Type::getName)) // + .collect(toList()); + } + + private Set collectTypesUsedByTypes(TypeSystem aTypeSystem, Iterable typesSource, Deque queue) { + Set typeSet = new HashSet<>(); + while (!queue.isEmpty()) { + Type t = queue.poll(); + + if (typeSet.contains(t)) { + continue; + } + + for (Feature f : t.getFeatures()) { + Type parent = aTypeSystem.getParent(t); + while (parent != null) { + if (!typeSet.contains(parent)) { + queue.add(parent); + } + parent = aTypeSystem.getParent(parent); + } + + Type range = f.getRange(); + if (!typeSet.contains(range)) { + queue.add(range); + } + + Type componentType = range.getComponentType(); + if (componentType != null && !typeSet.contains(typesSource)) { + queue.add(componentType); + } + } + + typeSet.add(t); + } + return typeSet; + } + + private Deque collectTypesUsedByFeatureStructures(TypeSystem aTypeSystem, + SerializerProvider aProvider) { + FeatureStructures allFs = FeatureStructures.get(aProvider); + Deque queue = new ArrayDeque<>(); + aTypeSystem.forEach(type -> { + if (allFs.existsAnnotationOfType(type.getName())) { + queue.add(type); + } + }); + return queue; + } } diff --git a/src/test/java/org/apache/uima/json/jsoncas2/ser/CasSerializerTest.java b/src/test/java/org/apache/uima/json/jsoncas2/ser/CasSerializerTest.java new file mode 100644 index 0000000..975c894 --- /dev/null +++ b/src/test/java/org/apache/uima/json/jsoncas2/ser/CasSerializerTest.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.uima.json.jsoncas2.ser; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.uima.UIMAFramework.getResourceSpecifierFactory; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.contentOf; + +import java.io.File; + +import org.apache.uima.cas.CAS; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.json.jsoncas2.JsonCas2Serializer; +import org.apache.uima.json.jsoncas2.mode.TypeSystemMode; +import org.apache.uima.resource.metadata.TypeDescription; +import org.apache.uima.resource.metadata.TypeSystemDescription; +import org.apache.uima.util.CasCreationUtils; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +class CasSerializerTest { + private final static String USED_ANNOTATION_TYPE = "custom.UsedAnnotationType"; + + private final static String ANNOTATION_TYPE_USED_FROM_RANGE = "custom.AnnotationTypeUsedFromRange"; + + private final static String ANNOTATION_TYPE_USED_FROM_COMPONENT = "custom.AnnotationTypeUsedFromComponent"; + + private final static String ANNOTATION_TYPE_USED_FROM_PARENT = "custom.AnnotationTypeUsedFromParent"; + + private final static String UNUSED_ANNOTATION_TYPE = "custom.UnusedAnnotationType"; + + private JsonCas2Serializer sut; + + @BeforeEach + void setup() { + sut = new JsonCas2Serializer(); + } + + @Test + void thatUnusedTypesAreNotSerializedInMinimalTypeSystemMode(@TempDir + File aTemp) throws Exception { + CAS cas = CasCreationUtils.createCas(makeTypeSystem(), null, null); + createFeatureStructure(USED_ANNOTATION_TYPE, cas); + + File out = new File(aTemp, "out.json"); + sut.setTypeSystemMode(TypeSystemMode.MINIMAL); + sut.serialize(cas, out); + + assertThat(contentOf(out, UTF_8)).isEqualTo( + contentOf(getClass().getResource("/CasSerializerTest/minimalTypeSystem.json"), UTF_8)); + } + + @Test + void thatAllTypesAreSerializedInFullTypeSystemMode(@TempDir + File aTemp) throws Exception { + CAS cas = CasCreationUtils.createCas(makeTypeSystem(), null, null); + createFeatureStructure(USED_ANNOTATION_TYPE, cas); + + File out = new File(aTemp, "out.json"); + sut.setTypeSystemMode(TypeSystemMode.FULL); + sut.serialize(cas, out); + + assertThat(contentOf(out, UTF_8)).isEqualTo( + contentOf(getClass().getResource("/CasSerializerTest/fullTypeSystem.json"), UTF_8)); + } + + @Test + void thatNoTypesAreSerializedInNoTypeSystemMode(@TempDir + File aTemp) throws Exception { + CAS cas = CasCreationUtils.createCas(makeTypeSystem(), null, null); + createFeatureStructure(USED_ANNOTATION_TYPE, cas); + + File out = new File(aTemp, "out.json"); + sut.setTypeSystemMode(TypeSystemMode.NONE); + sut.serialize(cas, out); + + assertThat(contentOf(out, UTF_8)).isEqualTo( + contentOf(getClass().getResource("/CasSerializerTest/noTypeSystem.json"), UTF_8)); + } + + private TypeSystemDescription makeTypeSystem() { + TypeSystemDescription tsd = getResourceSpecifierFactory().createTypeSystemDescription(); + tsd.addType(ANNOTATION_TYPE_USED_FROM_PARENT, null, CAS.TYPE_NAME_ANNOTATION); + tsd.addType(ANNOTATION_TYPE_USED_FROM_RANGE, null, CAS.TYPE_NAME_ANNOTATION); + tsd.addType(ANNOTATION_TYPE_USED_FROM_COMPONENT, null, CAS.TYPE_NAME_ANNOTATION); + tsd.addType(UNUSED_ANNOTATION_TYPE, null, CAS.TYPE_NAME_ANNOTATION); + + TypeDescription td = tsd.addType(USED_ANNOTATION_TYPE, null, ANNOTATION_TYPE_USED_FROM_PARENT); + td.addFeature("feat1", null, ANNOTATION_TYPE_USED_FROM_RANGE); + td.addFeature("feat2", null, CAS.TYPE_NAME_FS_ARRAY, ANNOTATION_TYPE_USED_FROM_COMPONENT, + false); + return tsd; + } + + private FeatureStructure createFeatureStructure(String usedAnnotationType, CAS cas) { + FeatureStructure fs = cas.createAnnotation(cas.getTypeSystem().getType(usedAnnotationType), 0, + 0); + cas.addFsToIndexes(fs); + return fs; + } +} diff --git a/src/test/java/org/apache/uima/json/jsoncas2/ser/FeatureDeSerializerTest.java b/src/test/java/org/apache/uima/json/jsoncas2/ser/FeatureDeSerializerTest.java index e144757..946ccd9 100644 --- a/src/test/java/org/apache/uima/json/jsoncas2/ser/FeatureDeSerializerTest.java +++ b/src/test/java/org/apache/uima/json/jsoncas2/ser/FeatureDeSerializerTest.java @@ -40,16 +40,11 @@ import org.apache.uima.util.TypeSystemUtil; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import com.fasterxml.jackson.core.Version; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.module.SimpleModule; public class FeatureDeSerializerTest { - private Logger log = LoggerFactory.getLogger(getClass()); - private static final String TYPE = "Type"; private static final String FEATURE = "feature"; diff --git a/src/test/java/org/apache/uima/json/jsoncas2/ser/TypeDeSerializerTest.java b/src/test/java/org/apache/uima/json/jsoncas2/ser/TypeDeSerializerTest.java index 7e97998..9671bcc 100644 --- a/src/test/java/org/apache/uima/json/jsoncas2/ser/TypeDeSerializerTest.java +++ b/src/test/java/org/apache/uima/json/jsoncas2/ser/TypeDeSerializerTest.java @@ -31,16 +31,11 @@ import org.apache.uima.resource.metadata.TypeSystemDescription; import org.apache.uima.util.CasCreationUtils; import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import com.fasterxml.jackson.core.Version; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.module.SimpleModule; public class TypeDeSerializerTest { - private Logger log = LoggerFactory.getLogger(getClass()); - @Test public void thatTypeDeSerializationWorks() throws Exception { ObjectMapper mapper = getMapper(); diff --git a/src/test/resources/CasSerializerTest/fullTypeSystem.json b/src/test/resources/CasSerializerTest/fullTypeSystem.json new file mode 100644 index 0000000..1d40c87 --- /dev/null +++ b/src/test/resources/CasSerializerTest/fullTypeSystem.json @@ -0,0 +1,50 @@ +{ + "%TYPES" : { + "custom.AnnotationTypeUsedFromComponent" : { + "%NAME" : "custom.AnnotationTypeUsedFromComponent", + "%SUPER_TYPE" : "uima.tcas.Annotation" + }, + "custom.AnnotationTypeUsedFromParent" : { + "%NAME" : "custom.AnnotationTypeUsedFromParent", + "%SUPER_TYPE" : "uima.tcas.Annotation" + }, + "custom.AnnotationTypeUsedFromRange" : { + "%NAME" : "custom.AnnotationTypeUsedFromRange", + "%SUPER_TYPE" : "uima.tcas.Annotation" + }, + "custom.UnusedAnnotationType" : { + "%NAME" : "custom.UnusedAnnotationType", + "%SUPER_TYPE" : "uima.tcas.Annotation" + }, + "custom.UsedAnnotationType" : { + "%NAME" : "custom.UsedAnnotationType", + "%SUPER_TYPE" : "custom.AnnotationTypeUsedFromParent", + "feat1" : { + "%NAME" : "feat1", + "%RANGE" : "custom.AnnotationTypeUsedFromRange" + }, + "feat2" : { + "%NAME" : "feat2", + "%RANGE" : "custom.AnnotationTypeUsedFromComponent[]" + } + } + }, + "%FEATURE_STRUCTURES" : [ { + "%ID" : 1, + "%TYPE" : "custom.UsedAnnotationType", + "@sofa" : 2, + "begin" : 0, + "end" : 0 + }, { + "%ID" : 2, + "%TYPE" : "uima.cas.Sofa", + "sofaNum" : 1, + "sofaID" : "_InitialView" + } ], + "%VIEWS" : { + "_InitialView" : { + "%SOFA" : 2, + "%MEMBERS" : [ 1 ] + } + } +} \ No newline at end of file diff --git a/src/test/resources/CasSerializerTest/minimalTypeSystem.json b/src/test/resources/CasSerializerTest/minimalTypeSystem.json new file mode 100644 index 0000000..245672d --- /dev/null +++ b/src/test/resources/CasSerializerTest/minimalTypeSystem.json @@ -0,0 +1,46 @@ +{ + "%TYPES" : { + "custom.AnnotationTypeUsedFromComponent" : { + "%NAME" : "custom.AnnotationTypeUsedFromComponent", + "%SUPER_TYPE" : "uima.tcas.Annotation" + }, + "custom.AnnotationTypeUsedFromParent" : { + "%NAME" : "custom.AnnotationTypeUsedFromParent", + "%SUPER_TYPE" : "uima.tcas.Annotation" + }, + "custom.AnnotationTypeUsedFromRange" : { + "%NAME" : "custom.AnnotationTypeUsedFromRange", + "%SUPER_TYPE" : "uima.tcas.Annotation" + }, + "custom.UsedAnnotationType" : { + "%NAME" : "custom.UsedAnnotationType", + "%SUPER_TYPE" : "custom.AnnotationTypeUsedFromParent", + "feat1" : { + "%NAME" : "feat1", + "%RANGE" : "custom.AnnotationTypeUsedFromRange" + }, + "feat2" : { + "%NAME" : "feat2", + "%RANGE" : "custom.AnnotationTypeUsedFromComponent[]" + } + } + }, + "%FEATURE_STRUCTURES" : [ { + "%ID" : 1, + "%TYPE" : "custom.UsedAnnotationType", + "@sofa" : 2, + "begin" : 0, + "end" : 0 + }, { + "%ID" : 2, + "%TYPE" : "uima.cas.Sofa", + "sofaNum" : 1, + "sofaID" : "_InitialView" + } ], + "%VIEWS" : { + "_InitialView" : { + "%SOFA" : 2, + "%MEMBERS" : [ 1 ] + } + } +} \ No newline at end of file diff --git a/src/test/resources/CasSerializerTest/noTypeSystem.json b/src/test/resources/CasSerializerTest/noTypeSystem.json new file mode 100644 index 0000000..2f97e2f --- /dev/null +++ b/src/test/resources/CasSerializerTest/noTypeSystem.json @@ -0,0 +1,20 @@ +{ + "%FEATURE_STRUCTURES" : [ { + "%ID" : 1, + "%TYPE" : "custom.UsedAnnotationType", + "@sofa" : 2, + "begin" : 0, + "end" : 0 + }, { + "%ID" : 2, + "%TYPE" : "uima.cas.Sofa", + "sofaNum" : 1, + "sofaID" : "_InitialView" + } ], + "%VIEWS" : { + "_InitialView" : { + "%SOFA" : 2, + "%MEMBERS" : [ 1 ] + } + } +} \ No newline at end of file