From c272986fb4b560cbd017773284ce1486a927400f Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Thu, 4 Apr 2024 23:19:56 +0200 Subject: [PATCH] #4525 - Support recommendations for boolean features - Add support for boolean features to the doccat recommender - Fix issue editing the traits of the doccat recommender --- ...enNlpDoccatMetadataRecommenderFactory.java | 6 ++-- .../doccat/OpenNlpDoccatRecommender.java | 36 +++++++++---------- .../OpenNlpDoccatRecommenderFactory.java | 5 +-- .../OpenNlpDoccatRecommenderTraitsEditor.java | 10 ++++-- 4 files changed, 30 insertions(+), 27 deletions(-) diff --git a/inception/inception-imls-opennlp/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/opennlp/doccat/OpenNlpDoccatMetadataRecommenderFactory.java b/inception/inception-imls-opennlp/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/opennlp/doccat/OpenNlpDoccatMetadataRecommenderFactory.java index ca92dfa1b60..45da8976bd9 100644 --- a/inception/inception-imls-opennlp/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/opennlp/doccat/OpenNlpDoccatMetadataRecommenderFactory.java +++ b/inception/inception-imls-opennlp/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/opennlp/doccat/OpenNlpDoccatMetadataRecommenderFactory.java @@ -21,6 +21,8 @@ */ package de.tudarmstadt.ukp.inception.recommendation.imls.opennlp.doccat; +import static java.util.Arrays.asList; + import org.apache.uima.cas.CAS; import org.apache.wicket.model.IModel; @@ -66,8 +68,8 @@ public boolean accepts(AnnotationLayer aLayer, AnnotationFeature aFeature) var compatibleSpanLayer = DocumentMetadataLayerSupport.TYPE.equals(aLayer.getType()); - var compatibleFeature = CAS.TYPE_NAME_STRING.equals(aFeature.getType()) - || aFeature.isVirtualFeature(); + var compatibleFeature = asList(CAS.TYPE_NAME_STRING, CAS.TYPE_NAME_BOOLEAN) + .contains(aFeature.getType()) || aFeature.isVirtualFeature(); return compatibleSpanLayer && compatibleFeature; } diff --git a/inception/inception-imls-opennlp/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/opennlp/doccat/OpenNlpDoccatRecommender.java b/inception/inception-imls-opennlp/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/opennlp/doccat/OpenNlpDoccatRecommender.java index f8ee25f0058..61c40fd7f40 100644 --- a/inception/inception-imls-opennlp/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/opennlp/doccat/OpenNlpDoccatRecommender.java +++ b/inception/inception-imls-opennlp/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/opennlp/doccat/OpenNlpDoccatRecommender.java @@ -28,16 +28,12 @@ import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.ArrayList; -import java.util.Collection; import java.util.List; -import java.util.Map.Entry; import java.util.Objects; import org.apache.commons.lang3.math.NumberUtils; import org.apache.uima.cas.CAS; -import org.apache.uima.cas.Feature; import org.apache.uima.cas.FeatureStructure; -import org.apache.uima.cas.Type; import org.apache.uima.cas.text.AnnotationFS; import org.apache.uima.jcas.cas.AnnotationBase; import org.slf4j.Logger; @@ -180,7 +176,7 @@ public Range predict(PredictionContext aContext, CAS aCas, int aBegin, int aEnd) else { annotation = aCas.createFS(predictedType); } - annotation.setStringValue(predictedFeature, label); + annotation.setFeatureValueFromString(predictedFeature, label); annotation.setDoubleValue(scoreFeature, NumberUtils.max(outcome)); annotation.setBooleanValue(isPredictionFeature, true); aCas.addFsToIndexes(annotation); @@ -223,7 +219,7 @@ public EvaluationResult evaluate(List aCasses, DataSplitter aDataSplitter) var trainRatio = (overallTrainingSize > 0) ? trainingSetSize / overallTrainingSize : 0.0; if (trainingSetSize < MIN_TRAINING_SET_SIZE || testSetSize < MIN_TEST_SET_SIZE) { - String msg = String.format( + var msg = String.format( "Not enough evaluation data: training set size [%d] (min. %d), test set size [%d] (min. %d) of total [%d] (min. %d)", trainingSetSize, MIN_TRAINING_SET_SIZE, testSetSize, MIN_TEST_SET_SIZE, data.size(), (MIN_TRAINING_SET_SIZE + MIN_TEST_SET_SIZE)); @@ -269,20 +265,20 @@ protected List extractSamples(List aCasses) { var samples = new ArrayList(); casses: for (CAS cas : aCasses) { - Type sampleUnitType = getType(cas, getSampleUnit()); - Type tokenType = getType(cas, Token.class); + var sampleUnitType = getType(cas, getSampleUnit()); + var tokenType = getType(cas, Token.class); var sampleUnits = indexCovered(cas, sampleUnitType, tokenType); - for (Entry> e : sampleUnits.entrySet()) { - AnnotationFS sampleUnit = e.getKey(); - Collection tokens = e.getValue(); - String[] tokenTexts = tokens.stream().map(AnnotationFS::getCoveredText) + for (var e : sampleUnits.entrySet()) { + var sampleUnit = e.getKey(); + var tokens = e.getValue(); + var tokenTexts = tokens.stream().map(AnnotationFS::getCoveredText) .toArray(String[]::new); - Type annotationType = getType(cas, layerName); - Feature feature = annotationType.getFeatureByBaseName(featureName); + var annotationType = getType(cas, layerName); + var feature = annotationType.getFeatureByBaseName(featureName); - for (AnnotationFS annotation : selectCovered(annotationType, sampleUnit)) { + for (var annotation : selectCovered(annotationType, sampleUnit)) { if (samples.size() >= traits.getTrainingSetSizeLimit()) { break casses; } @@ -291,9 +287,9 @@ protected List extractSamples(List aCasses) continue; } - String label = annotation.getFeatureValueAsString(feature); - DocumentSample nameSample = new DocumentSample( - label != null ? label : NO_CATEGORY, tokenTexts); + var label = annotation.getFeatureValueAsString(feature); + var nameSample = new DocumentSample(label != null ? label : NO_CATEGORY, + tokenTexts); if (nameSample.getCategory() != null) { samples.add(nameSample); } @@ -307,8 +303,8 @@ protected List extractSamples(List aCasses) private DoccatModel train(List aSamples, TrainingParameters aParameters) throws RecommendationException { - try (DocumentSampleStream stream = new DocumentSampleStream(aSamples)) { - DoccatFactory factory = new DoccatFactory(); + try (var stream = new DocumentSampleStream(aSamples)) { + var factory = new DoccatFactory(); return DocumentCategorizerME.train("unknown", stream, aParameters, factory); } catch (IOException e) { diff --git a/inception/inception-imls-opennlp/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/opennlp/doccat/OpenNlpDoccatRecommenderFactory.java b/inception/inception-imls-opennlp/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/opennlp/doccat/OpenNlpDoccatRecommenderFactory.java index 166b4b4923c..34e85b690e7 100644 --- a/inception/inception-imls-opennlp/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/opennlp/doccat/OpenNlpDoccatRecommenderFactory.java +++ b/inception/inception-imls-opennlp/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/opennlp/doccat/OpenNlpDoccatRecommenderFactory.java @@ -22,6 +22,7 @@ package de.tudarmstadt.ukp.inception.recommendation.imls.opennlp.doccat; import static de.tudarmstadt.ukp.clarin.webanno.model.AnchoringMode.SENTENCES; +import static java.util.Arrays.asList; import org.apache.uima.cas.CAS; import org.apache.wicket.model.IModel; @@ -70,8 +71,8 @@ public boolean accepts(AnnotationLayer aLayer, AnnotationFeature aFeature) && !aLayer.isCrossSentence() // && SpanLayerSupport.TYPE.equals(aLayer.getType()); - var compatibleFeature = CAS.TYPE_NAME_STRING.equals(aFeature.getType()) - || aFeature.isVirtualFeature(); + var compatibleFeature = asList(CAS.TYPE_NAME_STRING, CAS.TYPE_NAME_BOOLEAN) + .contains(aFeature.getType()) || aFeature.isVirtualFeature(); return compatibleSpanLayer && compatibleFeature; } diff --git a/inception/inception-imls-opennlp/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/opennlp/doccat/OpenNlpDoccatRecommenderTraitsEditor.java b/inception/inception-imls-opennlp/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/opennlp/doccat/OpenNlpDoccatRecommenderTraitsEditor.java index 13b5133e755..e2f1b77335e 100644 --- a/inception/inception-imls-opennlp/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/opennlp/doccat/OpenNlpDoccatRecommenderTraitsEditor.java +++ b/inception/inception-imls-opennlp/src/main/java/de/tudarmstadt/ukp/inception/recommendation/imls/opennlp/doccat/OpenNlpDoccatRecommenderTraitsEditor.java @@ -23,6 +23,7 @@ import org.apache.wicket.model.IModel; import org.apache.wicket.spring.injection.annot.SpringBean; +import de.tudarmstadt.ukp.inception.recommendation.api.RecommenderFactoryRegistry; import de.tudarmstadt.ukp.inception.recommendation.api.model.Recommender; import de.tudarmstadt.ukp.inception.recommendation.api.recommender.DefaultTrainableRecommenderTraitsEditor; import de.tudarmstadt.ukp.inception.recommendation.api.recommender.RecommendationEngineFactory; @@ -34,7 +35,7 @@ public class OpenNlpDoccatRecommenderTraitsEditor private static final String MID_FORM = "form"; - private @SpringBean RecommendationEngineFactory toolFactory; + private @SpringBean RecommenderFactoryRegistry recommenderFactoryRegistry; private final OpenNlpDoccatRecommenderTraits traits; @@ -42,10 +43,13 @@ public OpenNlpDoccatRecommenderTraitsEditor(String aId, IModel aRec { super(aId, aRecommender); + var toolFactory = (RecommendationEngineFactory) recommenderFactoryRegistry + .getFactory(aRecommender.getObject().getTool()); + traits = toolFactory.readTraits(aRecommender.getObject()); - Form form = new Form( - MID_FORM, new CompoundPropertyModel<>(traits)) + var form = new Form(MID_FORM, + new CompoundPropertyModel<>(traits)) { private static final long serialVersionUID = -3109239605742291123L;