Skip to content

Commit

Permalink
Merge branch 'main' into refactoring/Upgrade-to-Spring-Boot-3
Browse files Browse the repository at this point in the history
* main:
  #4400 - Display recoomender name in log instead of RecommenderContext
  #4397 - Better error message when PMC document cannot be found
  #4395 - BioC export cannot be enabled
  #2696 - Document-level recommendations
  #2696 - Document-level recommendations
  #2696 - Document-level recommendations
  #2696 - Document-level recommendations
  No issue: Plugin complains on Jenkins that git is not found but it is there - try bumping plugin version
  • Loading branch information
reckart committed Dec 27, 2023
2 parents b83e285 + dcfc9b9 commit 3340eb7
Show file tree
Hide file tree
Showing 33 changed files with 399 additions and 419 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import static java.util.Arrays.asList;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine;
import static org.apache.uima.fit.factory.CollectionReaderFactory.createReader;
import static org.apache.uima.fit.util.CasUtil.getType;
import static org.apache.uima.fit.util.CasUtil.select;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.contentOf;
import static org.mockito.ArgumentMatchers.any;
Expand All @@ -32,6 +34,7 @@

import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.fit.factory.JCasFactory;
import org.dkpro.core.io.tcf.TcfReader;
Expand Down Expand Up @@ -59,6 +62,7 @@
import de.tudarmstadt.ukp.clarin.webanno.model.Project;
import de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.inception.annotation.feature.bool.BooleanFeatureSupport;
import de.tudarmstadt.ukp.inception.annotation.feature.link.LinkFeatureSupport;
Expand All @@ -76,7 +80,6 @@
import de.tudarmstadt.ukp.inception.schema.api.AnnotationSchemaService;
import de.tudarmstadt.ukp.inception.schema.service.FeatureSupportRegistryImpl;
import de.tudarmstadt.ukp.inception.support.json.JSONUtil;
import de.tudarmstadt.ukp.inception.support.uima.WebAnnoCasUtil;

@ExtendWith(MockitoExtension.class)
public class BratSerializerImplTest
Expand Down Expand Up @@ -180,7 +183,7 @@ public void thatSentenceOrientedStrategyRenderCorrectly() throws Exception
state.setAllAnnotationLayers(schemaService.listAnnotationLayer(project));
state.setPagingStrategy(new SentenceOrientedPagingStrategy());
state.getPreferences().setWindowSize(10);
state.setFirstVisibleUnit(WebAnnoCasUtil.getFirstSentence(cas));
state.setFirstVisibleUnit(getFirstSentence(cas));
state.setProject(project);
state.setDocument(sourceDocument, asList(sourceDocument));

Expand Down Expand Up @@ -219,7 +222,7 @@ public void thatLineOrientedStrategyRenderCorrectly() throws Exception
AnnotatorState state = new AnnotatorStateImpl(Mode.ANNOTATION);
state.setPagingStrategy(new LineOrientedPagingStrategy());
state.getPreferences().setWindowSize(10);
state.setFirstVisibleUnit(WebAnnoCasUtil.getFirstSentence(cas));
state.setFirstVisibleUnit(getFirstSentence(cas));
state.setProject(project);
state.setDocument(sourceDocument, asList(sourceDocument));

Expand Down Expand Up @@ -258,7 +261,7 @@ public void thatTokenWrappingStrategyRenderCorrectly() throws Exception
AnnotatorState state = new AnnotatorStateImpl(Mode.ANNOTATION);
state.setPagingStrategy(new TokenWrappingPagingStrategy(80));
state.getPreferences().setWindowSize(10);
state.setFirstVisibleUnit(WebAnnoCasUtil.getFirstSentence(cas));
state.setFirstVisibleUnit(getFirstSentence(cas));
state.setProject(project);
state.setDocument(sourceDocument, asList(sourceDocument));

Expand All @@ -281,4 +284,22 @@ public void thatTokenWrappingStrategyRenderCorrectly() throws Exception
assertThat(contentOf(new File("src/test/resources/longlines.json"), UTF_8))
.isEqualToNormalizingNewlines(contentOf(new File(jsonFilePath), UTF_8));
}

/**
* Get the internal address of the first sentence annotation from CAS. This will be used as a
* reference for moving forward/backward sentences positions
*
* @param aCas
* The CAS object assumed to contains some sentence annotations
* @return the sentence number or -1 if aCas don't have sentence annotation
*/
private static AnnotationFS getFirstSentence(CAS aCas)
{
AnnotationFS firstSentence = null;
for (AnnotationFS s : select(aCas, getType(aCas, Sentence.class))) {
firstSentence = s;
break;
}
return firstSentence;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@
import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_QUERY;
import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_QUERY_BEST_MATCH_TERM_NC;
import static de.tudarmstadt.ukp.inception.conceptlinking.model.CandidateEntity.KEY_QUERY_NC;
import static de.tudarmstadt.ukp.inception.support.uima.WebAnnoCasUtil.selectSentenceCovering;
import static de.tudarmstadt.ukp.inception.support.uima.WebAnnoCasUtil.selectTokensCovered;
import static java.lang.System.currentTimeMillis;
import static java.util.Arrays.asList;
import static java.util.Collections.unmodifiableList;
import static java.util.Comparator.comparingInt;
import static java.util.stream.Collectors.toCollection;
import static org.apache.uima.fit.util.CasUtil.getType;
import static org.apache.uima.fit.util.CasUtil.select;

import java.io.File;
import java.net.URISyntaxException;
Expand All @@ -47,6 +47,7 @@
import org.apache.commons.lang3.Validate;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.util.CasUtil;
import org.eclipse.rdf4j.common.net.ParsedIRI;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -58,6 +59,8 @@
import org.springframework.core.annotation.AnnotationAwareOrderComparator;

import de.tudarmstadt.ukp.clarin.webanno.model.Project;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.inception.conceptlinking.config.EntityLinkingProperties;
import de.tudarmstadt.ukp.inception.conceptlinking.config.EntityLinkingPropertiesImpl;
import de.tudarmstadt.ukp.inception.conceptlinking.config.EntityLinkingServiceAutoConfiguration;
Expand Down Expand Up @@ -513,4 +516,31 @@ public List<KBHandle> searchItems(KnowledgeBase aKB, String aQuery)
{
return disambiguate(aKB, null, ConceptFeatureValueType.ANY_OBJECT, aQuery, null, 0, null);
}

/**
* Get the sentence based on the annotation begin offset
*
* @param aCas
* the CAS.
* @param aBegin
* the begin offset.
* @return the sentence.
*/
private static AnnotationFS selectSentenceCovering(CAS aCas, int aBegin)
{
AnnotationFS currentSentence = null;
for (AnnotationFS sentence : select(aCas, getType(aCas, Sentence.class))) {
if (sentence.getBegin() <= aBegin && sentence.getEnd() > aBegin) {
currentSentence = sentence;
break;
}
}
return currentSentence;
}

private static Collection<AnnotationFS> selectTokensCovered(AnnotationFS aCover)
{
return CasUtil.selectCovered(aCover.getCAS(), getType(aCover.getCAS(), Token.class),
aCover);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,13 @@
import static org.apache.uima.cas.impl.Serialization.deserializeCASComplete;
import static org.apache.uima.cas.impl.Serialization.serializeCASComplete;

import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.impl.CASImpl;
import org.apache.uima.resource.ResourceInitializationException;

Expand Down Expand Up @@ -93,8 +96,15 @@ public static CASImpl makeDummyCas()

public static Map<String, Long> countFeatureStructures(CASImpl casImpl)
{
return WebAnnoCasUtil.findAllFeatureStructures(casImpl).stream() //
return findAllFeatureStructures(casImpl).stream() //
.map(fs -> fs.getType().getName()) //
.collect(groupingBy(identity(), counting()));
}

public static Set<FeatureStructure> findAllFeatureStructures(CAS aCas)
{
Set<FeatureStructure> allFSes = new LinkedHashSet<>();
((CASImpl) aCas).walkReachablePlusFSsSorted(allFSes::add, null, null, null);
return allFSes;
}
}
4 changes: 4 additions & 0 deletions inception/inception-external-search-pubmed/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.dataformat</groupId>
<artifactId>jackson-dataformat-xml</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@
*/
package de.tudarmstadt.ukp.inception.externalsearch.pubmed.pmcoa;

import java.io.IOException;
import java.util.Map;

import org.springframework.http.HttpMethod;
import org.springframework.http.converter.xml.MappingJackson2XmlHttpMessageConverter;
import org.springframework.web.client.HttpClientErrorException.NotFound;
import org.springframework.web.client.RestTemplate;

import de.tudarmstadt.ukp.inception.externalsearch.pubmed.traits.PubMedProviderTraits;
Expand All @@ -40,14 +42,25 @@ public PmcOaClient()
restTemplate.getMessageConverters().add(new MappingJackson2XmlHttpMessageConverter());
}

public byte[] bioc(PubMedProviderTraits aTraits, String aID)
public byte[] bioc(PubMedProviderTraits aTraits, String aID) throws IOException
{
var variables = Map.of( //
PARAM_ID, aID);
try {
var variables = Map.of( //
PARAM_ID, aID);

var response = restTemplate.exchange(BIOC_URL, HttpMethod.GET, null, byte[].class,
variables);
var response = restTemplate.exchange(BIOC_URL, HttpMethod.GET, null, byte[].class,
variables);

return response.getBody();
return response.getBody();
}
catch (NotFound e) {
throw new IOException("BioC version of document [" + aID + "] not found at ["
+ BIOC_URL.replace("{id}", aID)
+ "]. The Open Access files and BioC versions are not updated as "
+ "quickly as the PMC website itself is updated. It may take a couple of days until "
+ "a particular file is available as BioC. Another reason could be that the document you "
+ "are looking for is not included in the Open Access set. Try adding "
+ "`\"open access\"[filter]` without \"`\" to your search to filter by Open Access files.");
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@

import java.io.Serializable;

import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.databind.annotation.JsonSerialize;

//The @JsonSerialize annotation avoid the "InvalidDefinitionException: No serializer found"
//exception without having to set SerializationFeature.FAIL_ON_EMPTY_BEANS
@JsonSerialize
@JsonIgnoreProperties(ignoreUnknown = true)
public class PubMedProviderTraits
implements Serializable
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import de.tudarmstadt.ukp.inception.recommendation.imls.ollama.response.ResponseAsLabelExtractor;
import de.tudarmstadt.ukp.inception.recommendation.imls.ollama.response.ResponseExtractor;
import de.tudarmstadt.ukp.inception.rendering.model.Range;
import de.tudarmstadt.ukp.inception.support.logging.LogMessage;

public class OllamaRecommender
extends NonTrainableRecommenderEngineImplBase
Expand Down Expand Up @@ -103,8 +104,9 @@ public Range predict(PredictionContext aContext, CAS aCas, int aBegin, int aEnd)
responseExtractor.extract(this, aCas, promptContext, response);
}
catch (IOException e) {
aContext.error("Ollama [%s] failed to respond: %s", traits.getModel(),
ExceptionUtils.getRootCauseMessage(e));
aContext.log(LogMessage.warn(getRecommender().getName(),
"Ollama [%s] failed to respond: %s", traits.getModel(),
ExceptionUtils.getRootCauseMessage(e)));
LOG.error("Ollama [{}] failed to respond: {}", traits.getModel(),
ExceptionUtils.getRootCauseMessage(e));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
import de.tudarmstadt.ukp.inception.recommendation.api.recommender.RecommenderContext.Key;
import de.tudarmstadt.ukp.inception.recommendation.api.recommender.TrainingCapability;
import de.tudarmstadt.ukp.inception.rendering.model.Range;
import de.tudarmstadt.ukp.inception.support.logging.LogMessage;
import opennlp.tools.doccat.DoccatFactory;
import opennlp.tools.doccat.DoccatModel;
import opennlp.tools.doccat.DocumentCategorizerME;
Expand Down Expand Up @@ -108,13 +109,14 @@ public void train(RecommenderContext aContext, List<CAS> aCasses) throws Recomme
var docSamples = extractSamples(aCasses);

if (docSamples.size() < 2) {
aContext.warn("Not enough training data: [%d] items", docSamples.size());
aContext.log(LogMessage.warn(getRecommender().getName(),
"Not enough training data: [%d] items", docSamples.size()));
return;
}

if (docSamples.stream().map(DocumentSample::getCategory).distinct().count() <= 1) {
aContext.warn("Training data requires at least two different labels",
docSamples.size());
aContext.log(LogMessage.warn(getRecommender().getName(),
"Training data requires at least two different labels", docSamples.size()));
return;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
import de.tudarmstadt.ukp.inception.recommendation.api.recommender.RecommenderContext.Key;
import de.tudarmstadt.ukp.inception.recommendation.api.recommender.TrainingCapability;
import de.tudarmstadt.ukp.inception.rendering.model.Range;
import de.tudarmstadt.ukp.inception.support.logging.LogMessage;
import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap;
import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
import opennlp.tools.ml.BeamSearch;
Expand Down Expand Up @@ -95,7 +96,8 @@ public void train(RecommenderContext aContext, List<CAS> aCasses) throws Recomme
var nameSamples = extractNameSamples(aCasses);

if (nameSamples.size() < 2) {
aContext.warn("Not enough training data: [%d] items", nameSamples.size());
aContext.log(LogMessage.warn(getRecommender().getName(),
"Not enough training data: [%d] items", nameSamples.size()));
return;
}

Expand All @@ -104,10 +106,10 @@ public void train(RecommenderContext aContext, List<CAS> aCasses) throws Recomme
// OpenNLP
int beamSize = Math.max(maxRecommendations, NameFinderME.DEFAULT_BEAM_SIZE);

TrainingParameters params = traits.getParameters();
var params = traits.getParameters();
params.put(BeamSearch.BEAM_SIZE_PARAMETER, Integer.toString(beamSize));

TokenNameFinderModel model = train(nameSamples, params);
var model = train(nameSamples, params);

aContext.put(KEY_MODEL, model);
}
Expand Down Expand Up @@ -304,7 +306,7 @@ private String determineLabel(Span aName, int aTokenIdx)
return label;
}

private List<NameSample> extractNameSamples(List<CAS> aCasses)
private List<NameSample> extractNameSamples(Iterable<CAS> aCasses)
{
var nameSamples = new ArrayList<NameSample>();

Expand Down Expand Up @@ -402,8 +404,8 @@ private TokenNameFinderModel train(List<NameSample> aNameSamples,
TrainingParameters aParameters)
throws RecommendationException
{
try (NameSampleStream stream = new NameSampleStream(aNameSamples)) {
TokenNameFinderFactory finderFactory = new TokenNameFinderFactory();
try (var stream = new NameSampleStream(aNameSamples)) {
var finderFactory = new TokenNameFinderFactory();
return NameFinderME.train("unknown", null, stream, aParameters, finderFactory);
}
catch (IOException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import de.tudarmstadt.ukp.inception.recommendation.api.recommender.RecommenderContext.Key;
import de.tudarmstadt.ukp.inception.recommendation.api.recommender.TrainingCapability;
import de.tudarmstadt.ukp.inception.rendering.model.Range;
import de.tudarmstadt.ukp.inception.support.logging.LogMessage;
import opennlp.tools.ml.BeamSearch;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSSample;
Expand Down Expand Up @@ -97,7 +98,8 @@ public void train(RecommenderContext aContext, List<CAS> aCasses) throws Recomme
var posSamples = extractPosSamples(aCasses);

if (posSamples.size() < 2) {
aContext.warn("Not enough training data: [%d] items", posSamples.size());
aContext.log(LogMessage.warn(getRecommender().getName(),
"Not enough training data: [%d] items", posSamples.size()));
return;
}

Expand Down
Loading

0 comments on commit 3340eb7

Please sign in to comment.