diff --git a/inception/inception-app-webapp/src/main/java/de/tudarmstadt/ukp/inception/INCEpTION.java b/inception/inception-app-webapp/src/main/java/de/tudarmstadt/ukp/inception/INCEpTION.java
index 357f71cd5dd..d7b824f640d 100644
--- a/inception/inception-app-webapp/src/main/java/de/tudarmstadt/ukp/inception/INCEpTION.java
+++ b/inception/inception-app-webapp/src/main/java/de/tudarmstadt/ukp/inception/INCEpTION.java
@@ -92,7 +92,6 @@ private static void init(SpringApplicationBuilder aBuilder)
if (Boolean.getBoolean("inception.dev")) {
System.setProperty("wicket.core.settings.debug.enabled", "true");
System.setProperty("wicket.core.settings.general.configuration-type", "development");
- System.setProperty("debug.sendServerSideTimings", "true");
System.setProperty("webanno.debug.enforce_cas_thread_lock", "true");
aBuilder.profiles(DeploymentModeService.PROFILE_DEVELOPMENT_MODE);
}
diff --git a/inception/inception-diag/pom.xml b/inception/inception-diag/pom.xml
index 2c37c7c9c17..b30376fab1c 100644
--- a/inception/inception-diag/pom.xml
+++ b/inception/inception-diag/pom.xml
@@ -34,6 +34,10 @@
org.apache.commons
commons-lang3
+
+ org.apache.commons
+ commons-text
+
org.apache.uima
diff --git a/inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/checks/AllAnnotationsStartAndEndWithCharactersCheck.java b/inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/checks/AllAnnotationsStartAndEndWithCharactersCheck.java
new file mode 100644
index 00000000000..23373cf8dc8
--- /dev/null
+++ b/inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/checks/AllAnnotationsStartAndEndWithCharactersCheck.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Technische Universität Darmstadt under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The Technische Universität Darmstadt
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package de.tudarmstadt.ukp.clarin.webanno.diag.checks;
+
+import static java.lang.String.join;
+import static org.apache.commons.lang3.StringUtils.abbreviateMiddle;
+import static org.apache.commons.text.StringEscapeUtils.escapeJava;
+import static org.apache.uima.fit.util.CasUtil.getType;
+import static org.apache.uima.fit.util.CasUtil.select;
+import static org.springframework.util.CollectionUtils.isEmpty;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.Type;
+
+import de.tudarmstadt.ukp.clarin.webanno.model.Project;
+import de.tudarmstadt.ukp.dkpro.core.api.segmentation.TrimUtils;
+import de.tudarmstadt.ukp.inception.schema.api.AnnotationSchemaService;
+import de.tudarmstadt.ukp.inception.support.logging.LogMessage;
+
+public class AllAnnotationsStartAndEndWithCharactersCheck
+ implements Check
+{
+ private final AnnotationSchemaService annotationService;
+
+ public AllAnnotationsStartAndEndWithCharactersCheck(AnnotationSchemaService aAnnotationService)
+ {
+ annotationService = aAnnotationService;
+ }
+
+ @Override
+ public boolean check(Project aProject, CAS aCas, List aMessages)
+ {
+ if (annotationService == null) {
+ return true;
+ }
+
+ var allAnnoLayers = annotationService.listAnnotationLayer(aProject);
+ if (isEmpty(allAnnoLayers)) {
+ return true;
+ }
+
+ boolean ok = true;
+ for (var layer : allAnnoLayers) {
+ Type type;
+ try {
+ type = getType(aCas, layer.getName());
+ }
+ catch (IllegalArgumentException e) {
+ // If the type does not exist, the CAS has not been upgraded. In this case, we
+ // can skip checking the layer because there will be no annotations anyway.
+ continue;
+ }
+
+ if (!aCas.getTypeSystem().subsumes(aCas.getAnnotationType(), type)) {
+ // Skip non-annotation types
+ continue;
+ }
+
+ var docText = aCas.getDocumentText();
+ for (var ann : select(aCas, type)) {
+ var offsets = new int[] { ann.getBegin(), ann.getEnd() };
+ TrimUtils.trim(docText, offsets);
+
+ boolean startsWithWhitespace = offsets[0] != ann.getBegin();
+ boolean endsWithWhitespace = offsets[1] != ann.getEnd();
+ if (!startsWithWhitespace && !endsWithWhitespace) {
+ continue;
+ }
+
+ var locations = new ArrayList();
+ if (startsWithWhitespace) {
+ locations.add("starts");
+ }
+ if (endsWithWhitespace) {
+ locations.add("ends");
+ }
+
+ aMessages.add(LogMessage.error(this, "[%s] [%s]@[%d-%d] %s with whitespace",
+ ann.getType().getName(),
+ escapeJava(abbreviateMiddle(ann.getCoveredText(), "…", 20)), ann.getBegin(),
+ ann.getEnd(), join(" and ", locations)));
+
+ ok = false;
+ }
+ }
+
+ return ok;
+ }
+}
diff --git a/inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/config/CasDoctorAutoConfiguration.java b/inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/config/CasDoctorAutoConfiguration.java
index 645313efbf8..48dd146a66c 100644
--- a/inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/config/CasDoctorAutoConfiguration.java
+++ b/inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/config/CasDoctorAutoConfiguration.java
@@ -30,6 +30,7 @@
import de.tudarmstadt.ukp.clarin.webanno.diag.ChecksRegistryImpl;
import de.tudarmstadt.ukp.clarin.webanno.diag.RepairsRegistry;
import de.tudarmstadt.ukp.clarin.webanno.diag.RepairsRegistryImpl;
+import de.tudarmstadt.ukp.clarin.webanno.diag.checks.AllAnnotationsStartAndEndWithCharactersCheck;
import de.tudarmstadt.ukp.clarin.webanno.diag.checks.AllAnnotationsStartAndEndWithinSentencesCheck;
import de.tudarmstadt.ukp.clarin.webanno.diag.checks.AllFeatureStructuresIndexedCheck;
import de.tudarmstadt.ukp.clarin.webanno.diag.checks.CASMetadataTypeIsPresentCheck;
@@ -55,6 +56,7 @@
import de.tudarmstadt.ukp.clarin.webanno.diag.repairs.RemoveZeroSizeTokensAndSentencesRepair;
import de.tudarmstadt.ukp.clarin.webanno.diag.repairs.Repair;
import de.tudarmstadt.ukp.clarin.webanno.diag.repairs.SwitchBeginAndEndOnNegativeSizedAnnotationsRepair;
+import de.tudarmstadt.ukp.clarin.webanno.diag.repairs.TrimAnnotationsRepair;
import de.tudarmstadt.ukp.clarin.webanno.diag.repairs.UpgradeCasRepair;
import de.tudarmstadt.ukp.inception.schema.api.AnnotationSchemaService;
@@ -236,4 +238,17 @@ public UnreachableAnnotationsCheck unreachableAnnotationsCheck()
{
return new UnreachableAnnotationsCheck();
}
+
+ @Bean
+ public AllAnnotationsStartAndEndWithCharactersCheck allAnnotationsStartAndEndWithCharactersCheck(
+ AnnotationSchemaService aAnnotationService)
+ {
+ return new AllAnnotationsStartAndEndWithCharactersCheck(aAnnotationService);
+ }
+
+ @Bean
+ public TrimAnnotationsRepair trimAnnotationsRepair(AnnotationSchemaService aAnnotationService)
+ {
+ return new TrimAnnotationsRepair(aAnnotationService);
+ }
}
diff --git a/inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/repairs/TrimAnnotationsRepair.java b/inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/repairs/TrimAnnotationsRepair.java
new file mode 100644
index 00000000000..20cd1e23eff
--- /dev/null
+++ b/inception/inception-diag/src/main/java/de/tudarmstadt/ukp/clarin/webanno/diag/repairs/TrimAnnotationsRepair.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Technische Universität Darmstadt under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The Technische Universität Darmstadt
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package de.tudarmstadt.ukp.clarin.webanno.diag.repairs;
+
+import static java.lang.String.join;
+import static org.apache.commons.lang3.StringUtils.abbreviateMiddle;
+import static org.apache.commons.text.StringEscapeUtils.escapeJava;
+import static org.apache.uima.fit.util.CasUtil.getType;
+import static org.apache.uima.fit.util.CasUtil.select;
+import static org.springframework.util.CollectionUtils.isEmpty;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.Type;
+import org.apache.uima.jcas.tcas.Annotation;
+
+import de.tudarmstadt.ukp.clarin.webanno.model.Project;
+import de.tudarmstadt.ukp.dkpro.core.api.segmentation.TrimUtils;
+import de.tudarmstadt.ukp.inception.schema.api.AnnotationSchemaService;
+import de.tudarmstadt.ukp.inception.support.logging.LogMessage;
+
+public class TrimAnnotationsRepair
+ implements Repair
+{
+ private final AnnotationSchemaService annotationService;
+
+ public TrimAnnotationsRepair(AnnotationSchemaService aAnnotationService)
+ {
+ annotationService = aAnnotationService;
+ }
+
+ @Override
+ public void repair(Project aProject, CAS aCas, List aMessages)
+ {
+ var allAnnoLayers = annotationService.listAnnotationLayer(aProject);
+ if (isEmpty(allAnnoLayers)) {
+ return;
+ }
+
+ for (var layer : allAnnoLayers) {
+ Type type;
+ try {
+ type = getType(aCas, layer.getName());
+ }
+ catch (IllegalArgumentException e) {
+ // If the type does not exist, the CAS has not been upgraded. In this case, we
+ // can skip checking the layer because there will be no annotations anyway.
+ continue;
+ }
+
+ if (!aCas.getTypeSystem().subsumes(aCas.getAnnotationType(), type)) {
+ // Skip non-annotation types
+ continue;
+ }
+
+ var docText = aCas.getDocumentText();
+ for (var ann : select(aCas, type)) {
+ var oldBegin = ann.getBegin();
+ var oldEnd = ann.getEnd();
+
+ TrimUtils.trim(docText, (Annotation) ann);
+
+ boolean beginChanged = oldBegin != ann.getBegin();
+ boolean endChanged = oldEnd != ann.getEnd();
+ if (!beginChanged && !endChanged) {
+ continue;
+ }
+
+ var locations = new ArrayList();
+ if (beginChanged) {
+ locations.add("start");
+ }
+ if (endChanged) {
+ locations.add("end");
+ }
+
+ aMessages.add(LogMessage.info(this, "Trimmed whitespace of [%s] [%s]@[%d-%d] at %s",
+ ann.getType().getName(),
+ escapeJava(abbreviateMiddle(ann.getCoveredText(), "…", 20)), ann.getBegin(),
+ ann.getEnd(), join(" and ", locations)));
+ }
+ }
+ }
+}
diff --git a/inception/inception-diag/src/main/resources/META-INF/asciidoc/user-guide/casdoctor.adoc b/inception/inception-diag/src/main/resources/META-INF/asciidoc/user-guide/casdoctor.adoc
index e67d37c257f..9eccd51be05 100644
--- a/inception/inception-diag/src/main/resources/META-INF/asciidoc/user-guide/casdoctor.adoc
+++ b/inception/inception-diag/src/main/resources/META-INF/asciidoc/user-guide/casdoctor.adoc
@@ -221,6 +221,16 @@ Checks if there are any unreachable feature structures. Such feature structures
they are not regularly accessible. Such feature structures may be created as a result of bugs.
Removing them is harmless and reduces memory and disk space usage.
+[[check_AllAnnotationsStartAndEndWithCharactersCheck]]
+=== All annotations start and end with characters
+[horizontal]
+ID:: `check_AllAnnotationsStartAndEndWithCharactersCheck`
+Related repairs:: <>
+
+Checks if all annotations start and end with a character (i.e. not a whitespace). Annotations that start or end with a
+whitespace character can cause problems during rendering. Trimming whitespace at the begin and end is typically as
+harmless procedure.
+
[[sect_repairs]]
== Repairs
@@ -369,3 +379,12 @@ ID:: `CoverAllTextInSentencesRepair`
This repair checks if there is any text not covered by sentences. If there is, it creates a new
sentence annotation on this text starting at the end of the last sentence before it (or the start
of the document text) and the begin of the next sentence (or the end of the document text).
+
+[[repair_TrimAnnotationsRepair]]
+=== Trim annotations
+
+[horizontal]
+ID:: `TrimAnnotationsRepair`
+
+This repair adjusts annotation boundaries such that they do not include any whitespace at the beginning or end of the
+annotation.
diff --git a/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/Task.java b/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/Task.java
index c5b4fcc8b06..9b84712c41e 100644
--- a/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/Task.java
+++ b/inception/inception-scheduling/src/main/java/de/tudarmstadt/ukp/inception/scheduling/Task.java
@@ -201,7 +201,7 @@ public void runSync()
}
}
- public abstract void execute();
+ public abstract void execute() throws Exception;
@Override
public String toString()
diff --git a/inception/inception-ui-core/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/core/WicketApplicationBase.java b/inception/inception-ui-core/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/core/WicketApplicationBase.java
index 4d17ed2c12b..21431d78102 100644
--- a/inception/inception-ui-core/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/core/WicketApplicationBase.java
+++ b/inception/inception-ui-core/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/core/WicketApplicationBase.java
@@ -160,8 +160,7 @@ protected void init()
private void installTimingListener()
{
var settings = SettingsUtil.getSettings();
- if (!DEVELOPMENT.equals(getConfigurationType())
- && !"true".equalsIgnoreCase(settings.getProperty("debug.sendServerSideTimings"))) {
+ if (!"true".equalsIgnoreCase(settings.getProperty("debug.sendServerSideTimings"))) {
return;
}
diff --git a/inception/inception-ui-project/pom.xml b/inception/inception-ui-project/pom.xml
index 70ccf959f8d..90a3c01730d 100644
--- a/inception/inception-ui-project/pom.xml
+++ b/inception/inception-ui-project/pom.xml
@@ -115,6 +115,10 @@
de.tudarmstadt.ukp.inception.app
inception-support-bootstrap
+
+ de.tudarmstadt.ukp.inception.app
+ inception-scheduling
+
diff --git a/inception/inception-ui-project/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/project/casdoctor/CasDoctorTask_ImplBase.java b/inception/inception-ui-project/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/project/casdoctor/CasDoctorTask_ImplBase.java
new file mode 100644
index 00000000000..cef93c3f139
--- /dev/null
+++ b/inception/inception-ui-project/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/project/casdoctor/CasDoctorTask_ImplBase.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Technische Universität Darmstadt under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The Technische Universität Darmstadt
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package de.tudarmstadt.ukp.clarin.webanno.ui.project.casdoctor;
+
+import static de.tudarmstadt.ukp.clarin.webanno.api.casstorage.CasAccessMode.UNMANAGED_NON_INITIALIZING_ACCESS;
+import static de.tudarmstadt.ukp.inception.scheduling.TaskScope.PROJECT;
+import static de.tudarmstadt.ukp.inception.support.WebAnnoConst.INITIAL_CAS_PSEUDO_USER;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.uima.UIMAException;
+import org.apache.uima.cas.CAS;
+import org.springframework.beans.factory.annotation.Autowired;
+
+import de.tudarmstadt.ukp.clarin.webanno.api.casstorage.CasStorageService;
+import de.tudarmstadt.ukp.clarin.webanno.api.export.DocumentImportExportService;
+import de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument;
+import de.tudarmstadt.ukp.inception.documents.api.DocumentStorageService;
+import de.tudarmstadt.ukp.inception.scheduling.ProjectTask;
+import de.tudarmstadt.ukp.inception.scheduling.Task;
+import de.tudarmstadt.ukp.inception.support.logging.LogMessage;
+
+public abstract class CasDoctorTask_ImplBase
+ extends Task
+ implements ProjectTask
+{
+ private @Autowired CasStorageService casStorageService;
+ private @Autowired DocumentStorageService documentStorageService;
+ private @Autowired DocumentImportExportService importExportService;
+
+ private final List messageSets = new ArrayList<>();
+
+ public CasDoctorTask_ImplBase(Builder extends Builder>> aBuilder)
+ {
+ super(aBuilder.withCancellable(true).withScope(PROJECT));
+ }
+
+ public List getMessageSets()
+ {
+ return messageSets;
+ }
+
+ protected CAS createOrReadInitialCasWithoutSavingOrChecks(SourceDocument aDocument,
+ LogMessageSet aMessageSet)
+ throws IOException, UIMAException
+ {
+ if (casStorageService.existsCas(aDocument, INITIAL_CAS_PSEUDO_USER)) {
+ return casStorageService.readCas(aDocument, INITIAL_CAS_PSEUDO_USER,
+ UNMANAGED_NON_INITIALIZING_ACCESS);
+ }
+
+ var cas = importExportService.importCasFromFileNoChecks(
+ documentStorageService.getSourceDocumentFile(aDocument), aDocument);
+ aMessageSet.add(
+ LogMessage.info(getClass(), "Created initial CAS for [%s]", aDocument.getName()));
+ return cas;
+ }
+
+ protected void noticeIfThereAreNoMessages(LogMessageSet aSet)
+ {
+ if (aSet.getMessages().isEmpty()) {
+ aSet.add(LogMessage.info(getClass(), "Nothing to report."));
+ }
+ }
+
+ protected static class Builder>
+ extends Task.Builder
+ {
+ protected Builder()
+ {
+ }
+ }
+}
diff --git a/inception/inception-ui-project/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/project/casdoctor/CheckTask.java b/inception/inception-ui-project/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/project/casdoctor/CheckTask.java
new file mode 100644
index 00000000000..858f3988a3e
--- /dev/null
+++ b/inception/inception-ui-project/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/project/casdoctor/CheckTask.java
@@ -0,0 +1,222 @@
+/*
+ * Licensed to the Technische Universität Darmstadt under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The Technische Universität Darmstadt
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package de.tudarmstadt.ukp.clarin.webanno.ui.project.casdoctor;
+
+import static de.tudarmstadt.ukp.clarin.webanno.api.casstorage.CasAccessMode.UNMANAGED_NON_INITIALIZING_ACCESS;
+import static de.tudarmstadt.ukp.inception.scheduling.TaskScope.PROJECT;
+import static de.tudarmstadt.ukp.inception.support.WebAnnoConst.CURATION_USER;
+import static de.tudarmstadt.ukp.inception.support.WebAnnoConst.INITIAL_CAS_PSEUDO_USER;
+import static java.util.Arrays.asList;
+
+import java.io.FileNotFoundException;
+import java.lang.invoke.MethodHandles;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.Validate;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.beans.factory.annotation.Autowired;
+
+import de.tudarmstadt.ukp.clarin.webanno.api.casstorage.CasStorageService;
+import de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor;
+import de.tudarmstadt.ukp.clarin.webanno.diag.ChecksRegistry;
+import de.tudarmstadt.ukp.clarin.webanno.diag.RepairsRegistry;
+import de.tudarmstadt.ukp.inception.documents.api.DocumentService;
+import de.tudarmstadt.ukp.inception.scheduling.TaskState;
+import de.tudarmstadt.ukp.inception.support.logging.LogMessage;
+
+public class CheckTask
+ extends CasDoctorTask_ImplBase
+{
+ private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+ public static final String TYPE = "CheckTask";
+
+ private @Autowired RepairsRegistry repairsRegistry;
+ private @Autowired ChecksRegistry checksRegistry;
+ private @Autowired DocumentService documentService;
+ private @Autowired CasStorageService casStorageService;
+
+ private final List checks;
+
+ private int objectCount = 0;
+
+ public CheckTask(Builder extends Builder>> aBuilder)
+ {
+ // Can currently not be cancelled because we're running it sync and the AJAX cancel request
+ // won't get through
+ super(aBuilder.withType(TYPE).withCancellable(false).withScope(PROJECT));
+
+ checks = aBuilder.checks;
+ }
+
+ @Override
+ public String getTitle()
+ {
+ return "Running CAS Doctor checks...";
+ }
+
+ @Override
+ public void execute()
+ {
+ var casDoctor = new CasDoctor(checksRegistry, repairsRegistry);
+ casDoctor.setActiveChecks(checks.toArray(String[]::new));
+
+ var project = getProject();
+
+ var sourceDocuments = documentService.listSourceDocuments(project);
+
+ var monitor = getMonitor();
+ var progress = 0;
+ var maxProgress = sourceDocuments.size();
+
+ for (var sd : sourceDocuments) {
+ progress++;
+
+ monitor.setProgressWithMessage(progress, maxProgress,
+ LogMessage.info(this, "Processing [%s]...", sd.getName()));
+ if (monitor.isCancelled()) {
+ monitor.setState(TaskState.CANCELLED);
+ }
+
+ // Check INITIAL CAS
+ {
+ var messageSet = new LogMessageSet(sd.getName() + " [INITIAL]");
+
+ try {
+ objectCount++;
+ casStorageService.forceActionOnCas(sd, INITIAL_CAS_PSEUDO_USER,
+ (doc, user) -> createOrReadInitialCasWithoutSavingOrChecks(doc,
+ messageSet),
+ (cas) -> casDoctor.analyze(project, cas, messageSet.getMessages()), //
+ false);
+ }
+ catch (Exception e) {
+ messageSet.add(
+ LogMessage.error(getClass(), "Error checking initial CAS for [%s]: %s",
+ sd.getName(), e.getMessage()));
+ LOG.error("Error checking initial CAS for [{}]", sd.getName(), e);
+ }
+
+ noticeIfThereAreNoMessages(messageSet);
+ getMessageSets().add(messageSet);
+ }
+
+ // Check CURATION_USER CAS
+ {
+ var messageSet = new LogMessageSet(sd.getName() + " [" + CURATION_USER + "]");
+ try {
+ objectCount++;
+ casStorageService.forceActionOnCas(sd, CURATION_USER,
+ (doc, user) -> casStorageService.readCas(doc, user,
+ UNMANAGED_NON_INITIALIZING_ACCESS),
+ (cas) -> casDoctor.analyze(project, cas, messageSet.getMessages()), //
+ false);
+ }
+ catch (FileNotFoundException e) {
+ // If there is no CAS for the curation user, then curation has not started yet.
+ // This is not a problem, so we can ignore it.
+ messageSet.add(
+ LogMessage.info(getClass(), "Curation seems to have not yet started."));
+ }
+ catch (Exception e) {
+ messageSet.add(LogMessage.error(getClass(),
+ "Error checking annotations for [%s] for [%s]: %s", CURATION_USER,
+ sd.getName(), e.getMessage()));
+ LOG.error("Error checking annotations for [{}] for [{}]", CURATION_USER,
+ sd.getName(), e);
+ }
+
+ noticeIfThereAreNoMessages(messageSet);
+ getMessageSets().add(messageSet);
+ }
+
+ // Check regular annotator CASes
+ for (var ad : documentService.listAnnotationDocuments(sd)) {
+ var messageSet = new LogMessageSet(sd.getName() + " [" + ad.getUser() + "]");
+ try {
+ if (documentService.existsCas(ad)) {
+ objectCount++;
+ casStorageService.forceActionOnCas(ad.getDocument(), ad.getUser(),
+ (doc, user) -> casStorageService.readCas(doc, user,
+ UNMANAGED_NON_INITIALIZING_ACCESS),
+ (cas) -> casDoctor.analyze(project, cas, messageSet.getMessages()), //
+ false);
+ }
+ }
+ catch (Exception e) {
+ messageSet.add(LogMessage.error(getClass(),
+ "Error checking annotations of user [%s] for [%s]: %s", ad.getUser(),
+ sd.getName(), e.getMessage()));
+ LOG.error("Error checking annotations of user [{}] for [{}]", ad.getUser(),
+ sd.getName(), e);
+ }
+
+ noticeIfThereAreNoMessages(messageSet);
+ getMessageSets().add(messageSet);
+ }
+ }
+
+ monitor.setProgressWithMessage(progress, maxProgress,
+ LogMessage.info(this, "Checks complete"));
+ }
+
+ public int getObjectCount()
+ {
+ return objectCount;
+ }
+
+ public static Builder> builder()
+ {
+ return new Builder<>();
+ }
+
+ public static class Builder>
+ extends CasDoctorTask_ImplBase.Builder
+ {
+ private List checks;
+
+ protected Builder()
+ {
+ }
+
+ @SuppressWarnings("unchecked")
+ public T withChecks(String... aChecks)
+ {
+ checks = asList(aChecks);
+ return (T) this;
+ }
+
+ @SuppressWarnings("unchecked")
+ public T withChecks(Iterable aChecks)
+ {
+ checks = new ArrayList<>();
+ aChecks.forEach(checks::add);
+ return (T) this;
+ }
+
+ public CheckTask build()
+ {
+ Validate.notNull(project, "Parameter [project] must be specified");
+ Validate.notNull(checks, "Parameter [checks] must be specified");
+
+ return new CheckTask(this);
+ }
+ }
+}
diff --git a/inception/inception-ui-project/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/project/casdoctor/LogMessageSet.java b/inception/inception-ui-project/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/project/casdoctor/LogMessageSet.java
new file mode 100644
index 00000000000..426e521ac7a
--- /dev/null
+++ b/inception/inception-ui-project/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/project/casdoctor/LogMessageSet.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Technische Universität Darmstadt under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The Technische Universität Darmstadt
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package de.tudarmstadt.ukp.clarin.webanno.ui.project.casdoctor;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+
+import de.tudarmstadt.ukp.inception.support.logging.LogMessage;
+
+class LogMessageSet
+ implements Serializable
+{
+ private static final long serialVersionUID = 997324549494420840L;
+
+ private String name;
+ private List messages = new ArrayList<>();
+
+ public LogMessageSet(String aName)
+ {
+ name = aName;
+ }
+
+ public String getName()
+ {
+ return name;
+ }
+
+ public List getMessages()
+ {
+ return messages;
+ }
+
+ public void add(LogMessage aMessage)
+ {
+ messages.add(aMessage);
+ }
+}
\ No newline at end of file
diff --git a/inception/inception-ui-project/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/project/casdoctor/ProjectCasDoctorPanel.java b/inception/inception-ui-project/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/project/casdoctor/ProjectCasDoctorPanel.java
index de80f3d4335..2eb220169fc 100644
--- a/inception/inception-ui-project/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/project/casdoctor/ProjectCasDoctorPanel.java
+++ b/inception/inception-ui-project/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/project/casdoctor/ProjectCasDoctorPanel.java
@@ -17,22 +17,14 @@
*/
package de.tudarmstadt.ukp.clarin.webanno.ui.project.casdoctor;
-import static de.tudarmstadt.ukp.clarin.webanno.api.casstorage.CasAccessMode.UNMANAGED_NON_INITIALIZING_ACCESS;
-import static de.tudarmstadt.ukp.clarin.webanno.model.SourceDocumentState.CURATION_FINISHED;
-import static de.tudarmstadt.ukp.clarin.webanno.model.SourceDocumentState.CURATION_IN_PROGRESS;
-import static de.tudarmstadt.ukp.inception.support.WebAnnoConst.CURATION_USER;
-import static de.tudarmstadt.ukp.inception.support.WebAnnoConst.INITIAL_CAS_PSEUDO_USER;
-import static java.util.Arrays.asList;
import static java.util.stream.Collectors.toList;
-import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import org.apache.uima.UIMAException;
-import org.apache.uima.cas.CAS;
import org.apache.wicket.ajax.AjaxRequestTarget;
import org.apache.wicket.feedback.IFeedback;
import org.apache.wicket.markup.html.basic.Label;
@@ -49,16 +41,15 @@
import de.tudarmstadt.ukp.clarin.webanno.api.casstorage.CasStorageService;
import de.tudarmstadt.ukp.clarin.webanno.api.export.DocumentImportExportService;
-import de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor;
import de.tudarmstadt.ukp.clarin.webanno.diag.ChecksRegistry;
import de.tudarmstadt.ukp.clarin.webanno.diag.RepairsRegistry;
import de.tudarmstadt.ukp.clarin.webanno.diag.repairs.Repair.Safe;
-import de.tudarmstadt.ukp.clarin.webanno.model.AnnotationDocument;
import de.tudarmstadt.ukp.clarin.webanno.model.Project;
-import de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument;
+import de.tudarmstadt.ukp.clarin.webanno.security.UserDao;
import de.tudarmstadt.ukp.clarin.webanno.ui.core.settings.ProjectSettingsPanelBase;
import de.tudarmstadt.ukp.inception.documents.api.DocumentService;
import de.tudarmstadt.ukp.inception.documents.api.DocumentStorageService;
+import de.tudarmstadt.ukp.inception.scheduling.SchedulingService;
import de.tudarmstadt.ukp.inception.support.lambda.LambdaAjaxButton;
import de.tudarmstadt.ukp.inception.support.logging.LogMessage;
@@ -75,6 +66,8 @@ public class ProjectCasDoctorPanel
private @SpringBean DocumentImportExportService importExportService;
private @SpringBean RepairsRegistry repairsRegistry;
private @SpringBean ChecksRegistry checksRegistry;
+ private @SpringBean SchedulingService schedulingService;
+ private @SpringBean UserDao userService;
// Data properties
private FormModel formModel = new FormModel();
@@ -85,10 +78,10 @@ public ProjectCasDoctorPanel(String id, IModel aProjectModel)
setOutputMarkupId(true);
- Form form = new Form<>("casDoctorForm", PropertyModel.of(this, "formModel"));
+ var form = new Form("casDoctorForm", PropertyModel.of(this, "formModel"));
add(form);
- CheckBoxMultipleChoice repairs = new CheckBoxMultipleChoice<>("repairs");
+ var repairs = new CheckBoxMultipleChoice("repairs");
repairs.setModel(PropertyModel.of(this, "formModel.repairs"));
repairs.setChoices(repairsRegistry.getExtensions().stream() //
.map(r -> r.getId()).collect(toList()));
@@ -146,96 +139,16 @@ protected void populateItem(ListItem aItem)
private void actionRepair(AjaxRequestTarget aTarget, Form> aForm)
throws IOException, UIMAException, ClassNotFoundException
{
- CasDoctor casDoctor = new CasDoctor(checksRegistry, repairsRegistry);
- casDoctor.setFatalChecks(false);
- casDoctor.setActiveRepairs(formModel.repairs.toArray(String[]::new));
+ var repairTask = RepairTask.builder() //
+ .withSessionOwner(userService.getCurrentUser()) //
+ .withProject(getModelObject()) //
+ .withRepairs(formModel.repairs) //
+ .withTrigger("User request") //
+ .build();
- Project project = getModelObject();
+ schedulingService.executeSync(repairTask);
- formModel.messageSets = new ArrayList<>();
-
- for (SourceDocument sd : documentService.listSourceDocuments(project)) {
- // Repair INITIAL CAS
- {
- LogMessageSet messageSet = new LogMessageSet(sd.getName() + " [INITIAL]");
-
- try {
- casStorageService.forceActionOnCas(sd, INITIAL_CAS_PSEUDO_USER,
- (doc, user) -> createOrReadInitialCasWithoutSavingOrChecks(doc,
- messageSet),
- (cas) -> casDoctor.repair(project, cas, messageSet.messages), //
- true);
- }
- catch (Exception e) {
- messageSet.messages.add(
- LogMessage.error(getClass(), "Error repairing initial CAS for [%s]: %s",
- sd.getName(), e.getMessage()));
- LOG.error("Error repairing initial CAS for [{}]", sd.getName(), e);
- }
-
- noticeIfThereAreNoMessages(messageSet);
- formModel.messageSets.add(messageSet);
- }
-
- // Repair CURATION_USER CAS
- {
- LogMessageSet messageSet = new LogMessageSet(
- sd.getName() + " [" + CURATION_USER + "]");
- try {
- casStorageService.forceActionOnCas(sd, CURATION_USER,
- (doc, user) -> casStorageService.readCas(doc, user,
- UNMANAGED_NON_INITIALIZING_ACCESS),
- (cas) -> casDoctor.repair(project, cas, messageSet.messages), //
- true);
- }
- catch (FileNotFoundException e) {
- if (asList(CURATION_IN_PROGRESS, CURATION_FINISHED).contains(sd.getState())) {
- messageSet.messages
- .add(LogMessage.error(getClass(), "Curation CAS missing."));
- }
- else {
- // If there is no CAS for the curation user, then curation has not started
- // yet. This is not a problem, so we can ignore it.
- messageSet.messages
- .add(LogMessage.info(getClass(), "Curation has not started."));
- }
- }
- catch (Exception e) {
- messageSet.messages.add(LogMessage.error(getClass(),
- "Error checking annotations for [%s] for [%s]: %s", CURATION_USER,
- sd.getName(), e.getMessage()));
- LOG.error("Error checking annotations for [{}] for [{}]", CURATION_USER,
- sd.getName(), e);
- }
-
- noticeIfThereAreNoMessages(messageSet);
- formModel.messageSets.add(messageSet);
- }
-
- // Repair regular annotator CASes
- for (AnnotationDocument ad : documentService.listAnnotationDocuments(sd)) {
- if (documentService.existsCas(ad)) {
- LogMessageSet messageSet = new LogMessageSet(
- sd.getName() + " [" + ad.getUser() + "]");
- try {
- casStorageService.forceActionOnCas(sd, ad.getUser(),
- (doc, user) -> casStorageService.readCas(doc, user,
- UNMANAGED_NON_INITIALIZING_ACCESS),
- (cas) -> casDoctor.repair(project, cas, messageSet.messages), //
- true);
- }
- catch (Exception e) {
- messageSet.messages.add(LogMessage.error(getClass(),
- "Error repairing annotations of user [%s] for [%s]: %s",
- ad.getUser(), sd.getName(), e.getMessage()));
- LOG.error("Error repairing annotations of user [{}] for [{}]", ad.getUser(),
- sd.getName(), e);
- }
- noticeIfThereAreNoMessages(messageSet);
- formModel.messageSets.add(messageSet);
- }
- }
- }
+ formModel.messageSets = repairTask.getMessageSets();
aTarget.add(this);
}
@@ -243,136 +156,37 @@ private void actionRepair(AjaxRequestTarget aTarget, Form> aForm)
private void actionCheck(AjaxRequestTarget aTarget, Form> aForm)
throws IOException, UIMAException, ClassNotFoundException
{
- CasDoctor casDoctor = new CasDoctor(checksRegistry, repairsRegistry);
- casDoctor.setActiveChecks(
- checksRegistry.getExtensions().stream().map(c -> c.getId()).toArray(String[]::new));
-
- Project project = getModelObject();
-
- formModel.messageSets = new ArrayList<>();
-
- int objectCount = 0;
- for (SourceDocument sd : documentService.listSourceDocuments(project)) {
- // Check INITIAL CAS
- {
- LogMessageSet messageSet = new LogMessageSet(sd.getName() + " [INITIAL]");
-
- try {
- objectCount++;
- casStorageService.forceActionOnCas(sd, INITIAL_CAS_PSEUDO_USER,
- (doc, user) -> createOrReadInitialCasWithoutSavingOrChecks(doc,
- messageSet),
- (cas) -> casDoctor.analyze(project, cas, messageSet.messages), //
- false);
- }
- catch (Exception e) {
- messageSet.messages.add(
- LogMessage.error(getClass(), "Error checking initial CAS for [%s]: %s",
- sd.getName(), e.getMessage()));
- LOG.error("Error checking initial CAS for [{}]", sd.getName(), e);
- }
-
- noticeIfThereAreNoMessages(messageSet);
- formModel.messageSets.add(messageSet);
- }
-
- // Check CURATION_USER CAS
- {
- LogMessageSet messageSet = new LogMessageSet(
- sd.getName() + " [" + CURATION_USER + "]");
- try {
- objectCount++;
- casStorageService.forceActionOnCas(sd, CURATION_USER,
- (doc, user) -> casStorageService.readCas(doc, user,
- UNMANAGED_NON_INITIALIZING_ACCESS),
- (cas) -> casDoctor.analyze(project, cas, messageSet.messages), //
- false);
- }
- catch (FileNotFoundException e) {
- // If there is no CAS for the curation user, then curation has not started yet.
- // This is not a problem, so we can ignore it.
- messageSet.messages.add(
- LogMessage.info(getClass(), "Curation seems to have not yet started."));
- }
- catch (Exception e) {
- messageSet.messages.add(LogMessage.error(getClass(),
- "Error checking annotations for [%s] for [%s]: %s", CURATION_USER,
- sd.getName(), e.getMessage()));
- LOG.error("Error checking annotations for [{}] for [{}]", CURATION_USER,
- sd.getName(), e);
- }
-
- noticeIfThereAreNoMessages(messageSet);
- formModel.messageSets.add(messageSet);
- }
+ var checks = checksRegistry.getExtensions().stream().map(c -> c.getId()).toList();
+ var checkTask = CheckTask.builder() //
+ .withSessionOwner(userService.getCurrentUser()) //
+ .withProject(getModelObject()) //
+ .withTrigger("User request") //
+ .withChecks(checks) //
+ .build();
- // Check regular annotator CASes
- for (AnnotationDocument ad : documentService.listAnnotationDocuments(sd)) {
- if (documentService.existsCas(ad)) {
- LogMessageSet messageSet = new LogMessageSet(
- sd.getName() + " [" + ad.getUser() + "]");
- try {
- objectCount++;
- casStorageService.forceActionOnCas(ad.getDocument(), ad.getUser(),
- (doc, user) -> casStorageService.readCas(doc, user,
- UNMANAGED_NON_INITIALIZING_ACCESS),
- (cas) -> casDoctor.analyze(project, cas, messageSet.messages), //
- false);
- }
- catch (Exception e) {
- messageSet.messages.add(LogMessage.error(getClass(),
- "Error checking annotations of user [%s] for [%s]: %s",
- ad.getUser(), sd.getName(), e.getMessage()));
- LOG.error("Error checking annotations of user [{}] for [{}]", ad.getUser(),
- sd.getName(), e);
- }
+ schedulingService.executeSync(checkTask);
- noticeIfThereAreNoMessages(messageSet);
- formModel.messageSets.add(messageSet);
- }
- }
- }
+ formModel.messageSets = checkTask.getMessageSets();
+ var objectCount = checkTask.getObjectCount();
if (objectCount > 0) {
- info("Applied " + casDoctor.getActiveChecks().size() + " checks to " + objectCount
+ info("Applied " + checks.size() + " checks to " + objectCount
+ " annotation objects - see report for details");
}
else {
warn("Project does not contain any annotation objects that can be checked");
}
- aTarget.addChildren(getPage(), IFeedback.class);
+ aTarget.addChildren(getPage(), IFeedback.class);
aTarget.add(this);
}
- private CAS createOrReadInitialCasWithoutSavingOrChecks(SourceDocument aDocument,
- LogMessageSet aMessageSet)
- throws IOException, UIMAException
- {
- if (casStorageService.existsCas(aDocument, INITIAL_CAS_PSEUDO_USER)) {
- return casStorageService.readCas(aDocument, INITIAL_CAS_PSEUDO_USER,
- UNMANAGED_NON_INITIALIZING_ACCESS);
- }
-
- var cas = importExportService.importCasFromFileNoChecks(
- documentStorageService.getSourceDocumentFile(aDocument), aDocument);
- aMessageSet.messages.add(
- LogMessage.info(getClass(), "Created initial CAS for [%s]", aDocument.getName()));
- return cas;
- }
-
- private void noticeIfThereAreNoMessages(LogMessageSet aSet)
- {
- if (aSet.messages.isEmpty()) {
- aSet.messages.add(LogMessage.info(getClass(), "Nothing to report."));
- }
- }
-
private class FormModel
implements Serializable
{
private static final long serialVersionUID = 5421427363671176637L;
+ @SuppressWarnings("unused")
private List messageSets = new ArrayList<>();
private List repairs;
@@ -387,19 +201,4 @@ private class FormModel
.collect(toList());
}
}
-
- @SuppressWarnings("unused")
- private static class LogMessageSet
- implements Serializable
- {
- private static final long serialVersionUID = 997324549494420840L;
-
- private String name;
- private List messages = new ArrayList<>();
-
- public LogMessageSet(String aName)
- {
- name = aName;
- }
- }
}
diff --git a/inception/inception-ui-project/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/project/casdoctor/RepairTask.java b/inception/inception-ui-project/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/project/casdoctor/RepairTask.java
new file mode 100644
index 00000000000..407b2a3746a
--- /dev/null
+++ b/inception/inception-ui-project/src/main/java/de/tudarmstadt/ukp/clarin/webanno/ui/project/casdoctor/RepairTask.java
@@ -0,0 +1,218 @@
+/*
+ * Licensed to the Technische Universität Darmstadt under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The Technische Universität Darmstadt
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package de.tudarmstadt.ukp.clarin.webanno.ui.project.casdoctor;
+
+import static de.tudarmstadt.ukp.clarin.webanno.api.casstorage.CasAccessMode.UNMANAGED_NON_INITIALIZING_ACCESS;
+import static de.tudarmstadt.ukp.clarin.webanno.model.SourceDocumentState.CURATION_FINISHED;
+import static de.tudarmstadt.ukp.clarin.webanno.model.SourceDocumentState.CURATION_IN_PROGRESS;
+import static de.tudarmstadt.ukp.inception.scheduling.TaskScope.PROJECT;
+import static de.tudarmstadt.ukp.inception.support.WebAnnoConst.CURATION_USER;
+import static de.tudarmstadt.ukp.inception.support.WebAnnoConst.INITIAL_CAS_PSEUDO_USER;
+import static java.util.Arrays.asList;
+
+import java.io.FileNotFoundException;
+import java.lang.invoke.MethodHandles;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.Validate;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.beans.factory.annotation.Autowired;
+
+import de.tudarmstadt.ukp.clarin.webanno.api.casstorage.CasStorageService;
+import de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor;
+import de.tudarmstadt.ukp.clarin.webanno.diag.ChecksRegistry;
+import de.tudarmstadt.ukp.clarin.webanno.diag.RepairsRegistry;
+import de.tudarmstadt.ukp.inception.documents.api.DocumentService;
+import de.tudarmstadt.ukp.inception.scheduling.TaskState;
+import de.tudarmstadt.ukp.inception.support.logging.LogMessage;
+
+public class RepairTask
+ extends CasDoctorTask_ImplBase
+{
+ private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+ public static final String TYPE = "RepairTask";
+
+ private @Autowired RepairsRegistry repairsRegistry;
+ private @Autowired ChecksRegistry checksRegistry;
+ private @Autowired DocumentService documentService;
+ private @Autowired CasStorageService casStorageService;
+
+ private final List repairs;
+
+ public RepairTask(Builder extends Builder>> aBuilder)
+ {
+ // Can currently not be cancelled because we're running it sync and the AJAX cancel request
+ // won't get through
+ super(aBuilder.withType(TYPE).withCancellable(false).withScope(PROJECT));
+
+ repairs = aBuilder.repairs;
+ }
+
+ @Override
+ public String getTitle()
+ {
+ return "Running CAS Doctor repairs...";
+ }
+
+ @Override
+ public void execute()
+ {
+ var casDoctor = new CasDoctor(checksRegistry, repairsRegistry);
+ casDoctor.setFatalChecks(false);
+ casDoctor.setActiveRepairs(repairs.toArray(String[]::new));
+
+ var project = getProject();
+
+ var sourceDocuments = documentService.listSourceDocuments(project);
+
+ var monitor = getMonitor();
+ var progress = 0;
+ var maxProgress = sourceDocuments.size();
+
+ for (var sd : sourceDocuments) {
+ progress++;
+
+ monitor.setProgressWithMessage(progress, maxProgress,
+ LogMessage.info(this, "Processing [%s]...", sd.getName()));
+ if (monitor.isCancelled()) {
+ monitor.setState(TaskState.CANCELLED);
+ }
+
+ // Repair INITIAL CAS
+ {
+ var messageSet = new LogMessageSet(sd.getName() + " [INITIAL]");
+
+ try {
+ casStorageService.forceActionOnCas(sd, INITIAL_CAS_PSEUDO_USER,
+ (doc, user) -> createOrReadInitialCasWithoutSavingOrChecks(doc,
+ messageSet),
+ (cas) -> casDoctor.repair(project, cas, messageSet.getMessages()), //
+ true);
+ }
+ catch (Exception e) {
+ messageSet.add(
+ LogMessage.error(getClass(), "Error repairing initial CAS for [%s]: %s",
+ sd.getName(), e.getMessage()));
+ LOG.error("Error repairing initial CAS for [{}]", sd.getName(), e);
+ }
+
+ noticeIfThereAreNoMessages(messageSet);
+ getMessageSets().add(messageSet);
+ }
+
+ // Repair CURATION_USER CAS
+ {
+ var messageSet = new LogMessageSet(sd.getName() + " [" + CURATION_USER + "]");
+ try {
+ casStorageService.forceActionOnCas(sd, CURATION_USER,
+ (doc, user) -> casStorageService.readCas(doc, user,
+ UNMANAGED_NON_INITIALIZING_ACCESS),
+ (cas) -> casDoctor.repair(project, cas, messageSet.getMessages()), //
+ true);
+ }
+ catch (FileNotFoundException e) {
+ if (asList(CURATION_IN_PROGRESS, CURATION_FINISHED).contains(sd.getState())) {
+ messageSet.add(LogMessage.error(getClass(), "Curation CAS missing."));
+ }
+ else {
+ // If there is no CAS for the curation user, then curation has not started
+ // yet. This is not a problem, so we can ignore it.
+ messageSet.add(LogMessage.info(getClass(), "Curation has not started."));
+ }
+ }
+ catch (Exception e) {
+ messageSet.add(LogMessage.error(getClass(),
+ "Error checking annotations for [%s] for [%s]: %s", CURATION_USER,
+ sd.getName(), e.getMessage()));
+ LOG.error("Error checking annotations for [{}] for [{}]", CURATION_USER,
+ sd.getName(), e);
+ }
+
+ noticeIfThereAreNoMessages(messageSet);
+ getMessageSets().add(messageSet);
+ }
+
+ // Repair regular annotator CASes
+ for (var ad : documentService.listAnnotationDocuments(sd)) {
+ var messageSet = new LogMessageSet(sd.getName() + " [" + ad.getUser() + "]");
+ try {
+ if (documentService.existsCas(ad)) {
+ casStorageService.forceActionOnCas(sd, ad.getUser(),
+ (doc, user) -> casStorageService.readCas(doc, user,
+ UNMANAGED_NON_INITIALIZING_ACCESS),
+ (cas) -> casDoctor.repair(project, cas, messageSet.getMessages()), //
+ true);
+ }
+ }
+ catch (Exception e) {
+ messageSet.add(LogMessage.error(getClass(),
+ "Error repairing annotations of user [%s] for [%s]: %s", ad.getUser(),
+ sd.getName(), e.getMessage()));
+ LOG.error("Error repairing annotations of user [{}] for [{}]", ad.getUser(),
+ sd.getName(), e);
+ }
+
+ noticeIfThereAreNoMessages(messageSet);
+ getMessageSets().add(messageSet);
+ }
+ }
+
+ monitor.setProgressWithMessage(progress, maxProgress,
+ LogMessage.info(this, "Repairs complete"));
+ }
+
+ public static Builder> builder()
+ {
+ return new Builder<>();
+ }
+
+ public static class Builder>
+ extends CasDoctorTask_ImplBase.Builder
+ {
+ private List repairs;
+
+ protected Builder()
+ {
+ }
+
+ @SuppressWarnings("unchecked")
+ public T withRepairs(String... aRepairs)
+ {
+ repairs = asList(aRepairs);
+ return (T) this;
+ }
+
+ @SuppressWarnings("unchecked")
+ public T withRepairs(Iterable aRepairs)
+ {
+ repairs = new ArrayList<>();
+ aRepairs.forEach(repairs::add);
+ return (T) this;
+ }
+
+ public RepairTask build()
+ {
+ Validate.notNull(project, "RepairTask requires a project");
+
+ return new RepairTask(this);
+ }
+ }
+}