Skip to content

Commit

Permalink
Merge branch 'main' into refactoring/4558-Better-verification-for-fea…
Browse files Browse the repository at this point in the history
…ture-names

* main:
  #4560 - Describe how to use ELK to process Snomed before import
  #4513 - Support full text search in knowledge bases running on Blazegraph
  #4550 - Clean up code
  • Loading branch information
reckart committed Feb 26, 2024
2 parents 283ac92 + 3b19c9f commit 4bc22f3
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 48 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,14 @@

import static org.apache.uima.fit.factory.TypeSystemDescriptionFactory.createTypeSystemDescription;
import static org.apache.uima.fit.util.CasUtil.getType;
import static org.apache.uima.fit.util.FSUtil.setFeature;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;

import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.util.CasUtil;
import org.apache.uima.fit.util.FSUtil;
import org.apache.uima.resource.metadata.TypeSystemDescription;
Expand Down Expand Up @@ -58,8 +54,7 @@ public static void clearCasMetadata(CAS aCas) throws IllegalStateException
return;
}

List<AnnotationFS> cmds = new ArrayList<>(
CasUtil.select(aCas, getType(aCas, CASMetadata.class)));
var cmds = aCas.select(CASMetadata.class).toList();
if (cmds.size() > 1) {
throw new IllegalStateException("CAS contains more than one CASMetadata instance");
}
Expand All @@ -82,41 +77,42 @@ public static void addOrUpdateCasMetadata(CAS aCas, long aTimeStamp, SourceDocum
return;
}

Type casMetadataType = getType(aCas, CASMetadata.class);
var casMetadataType = getType(aCas, CASMetadata.class);
FeatureStructure cmd;
List<AnnotationFS> cmds = new ArrayList<>(CasUtil.select(aCas, casMetadataType));
var cmds = aCas.select(CASMetadata.class).toList();
if (cmds.size() > 1) {
throw new IOException("CAS contains more than one CASMetadata instance!");
}
else if (cmds.size() == 1) {

if (cmds.size() == 1) {
cmd = cmds.get(0);
}
else {
cmd = aCas.createAnnotation(casMetadataType, 0, 0);
}

if (cmd.getType().getFeatureByBaseName("username") != null) {
FSUtil.setFeature(cmd, "username", aUsername);
if (cmd.getType().getFeatureByBaseName(CASMetadata._FeatName_username) != null) {
setFeature(cmd, CASMetadata._FeatName_username, aUsername);
}

if (cmd.getType().getFeatureByBaseName("sourceDocumentId") != null) {
FSUtil.setFeature(cmd, "sourceDocumentId", aDocument.getId());
if (cmd.getType().getFeatureByBaseName(CASMetadata._FeatName_sourceDocumentId) != null) {
setFeature(cmd, CASMetadata._FeatName_sourceDocumentId, aDocument.getId());
}

if (cmd.getType().getFeatureByBaseName("sourceDocumentName") != null) {
FSUtil.setFeature(cmd, "sourceDocumentName", aDocument.getName());
if (cmd.getType().getFeatureByBaseName(CASMetadata._FeatName_sourceDocumentName) != null) {
setFeature(cmd, CASMetadata._FeatName_sourceDocumentName, aDocument.getName());
}

if (cmd.getType().getFeatureByBaseName("projectId") != null) {
FSUtil.setFeature(cmd, "projectId", aDocument.getProject().getId());
if (cmd.getType().getFeatureByBaseName(CASMetadata._FeatName_projectId) != null) {
setFeature(cmd, CASMetadata._FeatName_projectId, aDocument.getProject().getId());
}

if (cmd.getType().getFeatureByBaseName("projectName") != null) {
FSUtil.setFeature(cmd, "projectName", aDocument.getProject().getName());
if (cmd.getType().getFeatureByBaseName(CASMetadata._FeatName_projectName) != null) {
setFeature(cmd, CASMetadata._FeatName_projectName, aDocument.getProject().getName());
}

if (cmd.getType().getFeatureByBaseName("lastChangedOnDisk") != null) {
FSUtil.setFeature(cmd, "lastChangedOnDisk", aTimeStamp);
if (cmd.getType().getFeatureByBaseName(CASMetadata._FeatName_lastChangedOnDisk) != null) {
setFeature(cmd, CASMetadata._FeatName_lastChangedOnDisk, aTimeStamp);
LOG.trace("CAS [{}] for [{}]@[{}]({}): set lastChangedOnDisk: {}", aCas.hashCode(),
aUsername, aDocument.getName(), aDocument.getId(), aTimeStamp);
}
Expand All @@ -131,17 +127,18 @@ public static Optional<FeatureStructure> getCasMetadataFS(CAS aCas)

public static long getLastChanged(CAS aCas)
{
Type casMetadataType = getType(aCas, CASMetadata.class);
Feature feature = casMetadataType.getFeatureByBaseName("lastChangedOnDisk");
var casMetadataType = getType(aCas, CASMetadata.class);
var feature = casMetadataType.getFeatureByBaseName(CASMetadata._FeatName_lastChangedOnDisk);
return aCas.select(casMetadataType).map(cmd -> cmd.getLongValue(feature)).findFirst()
.orElse(-1l);
}

public static Optional<String> getUsername(CAS aCas)
{
try {
FeatureStructure fs = CasUtil.selectSingle(aCas, getType(aCas, CASMetadata.class));
return Optional.ofNullable(FSUtil.getFeature(fs, "username", String.class));
var fs = CasUtil.selectSingle(aCas, getType(aCas, CASMetadata.class));
return Optional.ofNullable(
FSUtil.getFeature(fs, CASMetadata._FeatName_username, String.class));
}
catch (IllegalArgumentException e) {
return Optional.empty();
Expand All @@ -151,8 +148,9 @@ public static Optional<String> getUsername(CAS aCas)
public static Optional<Long> getSourceDocumentId(CAS aCas)
{
try {
FeatureStructure fs = CasUtil.selectSingle(aCas, getType(aCas, CASMetadata.class));
return Optional.ofNullable(FSUtil.getFeature(fs, "sourceDocumentId", Long.class));
var fs = CasUtil.selectSingle(aCas, getType(aCas, CASMetadata.class));
return Optional.ofNullable(
FSUtil.getFeature(fs, CASMetadata._FeatName_sourceDocumentId, Long.class));
}
catch (IllegalArgumentException e) {
return Optional.empty();
Expand All @@ -162,8 +160,9 @@ public static Optional<Long> getSourceDocumentId(CAS aCas)
public static Optional<String> getSourceDocumentName(CAS aCas)
{
try {
FeatureStructure fs = CasUtil.selectSingle(aCas, getType(aCas, CASMetadata.class));
return Optional.ofNullable(FSUtil.getFeature(fs, "sourceDocumentName", String.class));
var fs = CasUtil.selectSingle(aCas, getType(aCas, CASMetadata.class));
return Optional.ofNullable(
FSUtil.getFeature(fs, CASMetadata._FeatName_sourceDocumentName, String.class));
}
catch (IllegalArgumentException e) {
return Optional.empty();
Expand All @@ -173,8 +172,9 @@ public static Optional<String> getSourceDocumentName(CAS aCas)
public static Optional<Long> getProjectId(CAS aCas)
{
try {
FeatureStructure fs = CasUtil.selectSingle(aCas, getType(aCas, CASMetadata.class));
return Optional.ofNullable(FSUtil.getFeature(fs, "projectId", Long.class));
var fs = CasUtil.selectSingle(aCas, getType(aCas, CASMetadata.class));
return Optional
.ofNullable(FSUtil.getFeature(fs, CASMetadata._FeatName_projectId, Long.class));
}
catch (IllegalArgumentException e) {
return Optional.empty();
Expand All @@ -184,8 +184,9 @@ public static Optional<Long> getProjectId(CAS aCas)
public static Optional<String> getProjectName(CAS aCas)
{
try {
FeatureStructure fs = CasUtil.selectSingle(aCas, getType(aCas, CASMetadata.class));
return Optional.ofNullable(FSUtil.getFeature(fs, "projectName", String.class));
var fs = CasUtil.selectSingle(aCas, getType(aCas, CASMetadata.class));
return Optional.ofNullable(
FSUtil.getFeature(fs, CASMetadata._FeatName_projectName, String.class));
}
catch (IllegalArgumentException e) {
return Optional.empty();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import static de.tudarmstadt.ukp.inception.support.WebAnnoConst.CURATION_USER;
import static de.tudarmstadt.ukp.inception.support.uima.WebAnnoCasUtil.exists;
import static de.tudarmstadt.ukp.inception.support.uima.WebAnnoCasUtil.getRealCas;
import static java.util.Arrays.asList;
import static java.util.Collections.unmodifiableList;
import static java.util.Collections.unmodifiableMap;
import static java.util.Comparator.comparing;
Expand Down Expand Up @@ -426,7 +427,7 @@ public File exportCasToFile(CAS aCas, SourceDocument aDocument, String aFileName
Map<Pair<Project, String>, Object> aBulkOperationContext)
throws IOException, UIMAException
{
Project project = aDocument.getProject();
var project = aDocument.getProject();
try (var logCtx = withProjectLogger(project)) {
var bulkOperationContext = aBulkOperationContext;
if (bulkOperationContext == null) {
Expand Down Expand Up @@ -483,10 +484,9 @@ public TypeSystemDescription getExportSpecificTypes()
public TypeSystemDescription getTypeSystemForExport(Project aProject)
throws ResourceInitializationException
{
var tsds = new ArrayList<TypeSystemDescription>();
tsds.add(schemaTypeSystem);
tsds.add(annotationService.getFullProjectTypeSystem(aProject, false));
return mergeTypeSystems(tsds);
return mergeTypeSystems(asList( //
schemaTypeSystem, //
annotationService.getFullProjectTypeSystem(aProject, false)));
}

/**
Expand Down Expand Up @@ -514,7 +514,7 @@ public void prepareCasForExport(CAS aSourceCas, CAS aTargetCas, SourceDocument a
TypeSystemDescription aFullProjectTypeSystem)
throws ResourceInitializationException, UIMAException, IOException
{
TypeSystemDescription tsd = aFullProjectTypeSystem;
var tsd = aFullProjectTypeSystem;
if (tsd == null) {
tsd = getTypeSystemForExport(aSourceDocument.getProject());
}
Expand All @@ -525,10 +525,9 @@ public void prepareCasForExport(CAS aSourceCas, CAS aTargetCas, SourceDocument a
private List<AnnotationFeature> listSupportedFeatures(Project aProject,
Map<Pair<Project, String>, Object> aBulkOperationContext)
{
Pair<Project, String> exportFeaturesKey = Pair.of(aProject, "exportFeatures");
var exportFeaturesKey = Pair.of(aProject, "exportFeatures");
@SuppressWarnings("unchecked")
List<AnnotationFeature> features = (List<AnnotationFeature>) aBulkOperationContext
.get(exportFeaturesKey);
var features = (List<AnnotationFeature>) aBulkOperationContext.get(exportFeaturesKey);
if (features == null) {
features = annotationService.listSupportedFeatures(aProject).stream() //
.filter(AnnotationFeature::isEnabled) //
Expand Down Expand Up @@ -561,7 +560,7 @@ private void addLayerAndFeatureDefinitionAnnotations(CAS aCas, Project aProject,
var featuresGroupedByLayer = allFeatures.stream() //
.collect(groupingBy(AnnotationFeature::getLayer));

var layers = featuresGroupedByLayer.keySet().stream()
var layers = featuresGroupedByLayer.keySet().stream() //
.sorted(comparing(AnnotationLayer::getName)) //
.toList();

Expand All @@ -571,8 +570,9 @@ private void addLayerAndFeatureDefinitionAnnotations(CAS aCas, Project aProject,
setFeature(layerDefFs, FEATURE_BASE_NAME_UI_NAME, layer.getUiName());
aCas.addFsToIndexes(layerDefFs);

var features = featuresGroupedByLayer.get(layer).stream()
.sorted(comparing(AnnotationFeature::getName)).toList();
var features = featuresGroupedByLayer.get(layer).stream() //
.sorted(comparing(AnnotationFeature::getName)) //
.toList();

for (var feature : features) {
final var featureDefFs = aCas.createFS(featureDefType);
Expand All @@ -597,7 +597,6 @@ private void addTagsetDefinitionAnnotations(CAS aCas, Project aProject,

var aLayer = feature.getLayer().getName();
var aTagSetName = tagSet.getName();

var tagsetType = getType(aCas, TagsetDescription.class);
var layerFeature = tagsetType.getFeatureByBaseName(FEATURE_BASE_NAME_LAYER);
var nameFeature = tagsetType.getFeatureByBaseName(FEATURE_BASE_NAME_NAME);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,12 @@ public KnowledgeBaseProfile(@JsonProperty("name") String aName,
mapping = aMapping;
rootConcepts = aRootConcepts;
info = aInfo;
reification = aReification;
defaultLanguage = aDefaultLanguage;

if (aReification != null) {
reification = aReification;
}

if (aDefaultDataset != null) {
defaultDataset = aDefaultDataset;
}
Expand Down Expand Up @@ -186,6 +189,10 @@ public void setInfo(KnowledgeBaseInfo aInfo)

public Reification getReification()
{
if (reification == null) {
return Reification.NONE;
}

return reification;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,16 @@ snomed-ct:
- Download SNOMED CT RD2 data files, e.g. from the [National Library of Medicine](https://www.nlm.nih.gov/healthit/snomedct/international.html)
- The RD2 package contains a data file in _OWL Functional Syntax_ wiht the ending `.owl` - rename the file to end in `.ofn`
- Import the renamed file into the knowledge base created from this profile (this may take a while)
At this point, you will be able to link against SNOMED concepts, but the tree browser will not be able to find
certain concepts. To fix this, an additional step has to be performed:
- Obtain a copy for the [ELK Reasoner standalone executable](https://github.com/liveontologies/elk-reasoner/releases)
- Use ELK to classify the SNOMED _OWL Functional Syntax_ data: <br>
`java -jar elk-standalone.jar -i snomed-file.ofn -c -o snomed-classified-file.ofn`
- Import that converted classified file into same knowledge base that already contains the other SNOMED data
After all imports are complete, enable the _read only_ checkbox - this enables an internal cache and improves performance.
host-institution-name: SNOMED International
website-url: https://www.snomed.org
license-url: https://www.snomed.org/get-snomed

0 comments on commit 4bc22f3

Please sign in to comment.