Skip to content

Commit

Permalink
#4634 - Pull document file storage code out of document service into …
Browse files Browse the repository at this point in the history
…a new service

- Move the code out
  • Loading branch information
reckart committed Mar 16, 2024
1 parent fea7951 commit 8ed229c
Show file tree
Hide file tree
Showing 18 changed files with 339 additions and 191 deletions.
4 changes: 4 additions & 0 deletions inception/inception-documents-api/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@
<groupId>org.apache.wicket</groupId>
<artifactId>wicket-core</artifactId>
</dependency>
<dependency>
<groupId>org.apache.wicket</groupId>
<artifactId>wicket-util</artifactId>
</dependency>

<!-- Spring dependencies -->

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
*/
package de.tudarmstadt.ukp.inception.documents.api;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
Expand Down Expand Up @@ -47,15 +46,6 @@

public interface DocumentService
{
/**
* The Directory where the {@link SourceDocument}s and {@link AnnotationDocument}s stored
*
* @return the directory.
* @deprecated Use {@link RepositoryProperties#getPath()} instead.
*/
@Deprecated
File getDir();

// --------------------------------------------------------------------------------------------
// Methods related to SourceDocuments
// --------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -117,13 +107,6 @@ public interface DocumentService
*/
SourceDocument getSourceDocument(long projectId, long documentId);

/**
* @param document
* The {@link SourceDocument} to be examined
* @return the originally imported source document file.
*/
File getSourceDocumentFile(SourceDocument document);

/**
* List all source documents in a project. The source documents are the original TCF documents
* imported.
Expand Down Expand Up @@ -661,8 +644,10 @@ List<AnnotationDocument> listAnnotationDocumentsWithStateForUser(Project aProjec
*
* @param annotationDocument
* the {@link AnnotationDocument} to be removed
* @throws IOException
* if there was a problem deleting
*/
void removeAnnotationDocument(AnnotationDocument annotationDocument);
void removeAnnotationDocument(AnnotationDocument annotationDocument) throws IOException;

AnnotationDocument createOrGetAnnotationDocument(SourceDocument aDocument, User aUser);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Licensed to the Technische Universität Darmstadt under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Technische Universität Darmstadt
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.inception.documents.api;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;

import org.apache.wicket.util.resource.IResourceStream;

import de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument;

public interface DocumentStorageService
{
InputStream openSourceDocumentFile(SourceDocument aDocument) throws IOException;

void writeSourceDocumentFile(SourceDocument aDocument, InputStream aIs) throws IOException;

void copySourceDocumentFile(SourceDocument aDocument, File aTargetDir) throws IOException;

long getSourceDocumentFileSize(SourceDocument aDocument);

File getSourceDocumentFile(SourceDocument aDocument);

IResourceStream getSourceDocumentResourceStream(SourceDocument aDocument);

IResourceStream getSourceDocumentResourceStream(SourceDocument aDocument, String aContentType);

void removeSourceDocumentFile(SourceDocument aDocument) throws IOException;
}
4 changes: 4 additions & 0 deletions inception/inception-documents/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@
<groupId>org.apache.wicket</groupId>
<artifactId>wicket-core</artifactId>
</dependency>
<dependency>
<groupId>org.apache.wicket</groupId>
<artifactId>wicket-util</artifactId>
</dependency>

<!-- Spring dependencies -->

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,6 @@
import static de.tudarmstadt.ukp.clarin.webanno.security.ValidationUtils.FILESYSTEM_RESERVED_CHARACTERS;
import static de.tudarmstadt.ukp.clarin.webanno.security.ValidationUtils.RELAXED_SHELL_SPECIAL_CHARACTERS;
import static de.tudarmstadt.ukp.inception.annotation.storage.CasMetadataUtils.addOrUpdateCasMetadata;
import static de.tudarmstadt.ukp.inception.project.api.ProjectService.DOCUMENT_FOLDER;
import static de.tudarmstadt.ukp.inception.project.api.ProjectService.PROJECT_FOLDER;
import static de.tudarmstadt.ukp.inception.project.api.ProjectService.SOURCE_FOLDER;
import static de.tudarmstadt.ukp.inception.project.api.ProjectService.withProjectLogger;
import static de.tudarmstadt.ukp.inception.support.WebAnnoConst.CURATION_USER;
import static de.tudarmstadt.ukp.inception.support.WebAnnoConst.INITIAL_CAS_PSEUDO_USER;
Expand All @@ -51,19 +48,16 @@
import static java.util.Collections.emptyList;
import static java.util.Objects.isNull;
import static java.util.stream.Collectors.toSet;
import static org.apache.commons.io.IOUtils.copyLarge;
import static org.apache.commons.lang3.ArrayUtils.isEmpty;
import static org.apache.commons.lang3.StringUtils.contains;
import static org.apache.commons.lang3.StringUtils.containsAny;
import static org.apache.commons.lang3.StringUtils.isBlank;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.lang.invoke.MethodHandles;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Collection;
Expand All @@ -87,7 +81,6 @@
import javax.persistence.NoResultException;

import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.Validate;
Expand Down Expand Up @@ -118,6 +111,7 @@
import de.tudarmstadt.ukp.clarin.webanno.security.model.User;
import de.tudarmstadt.ukp.inception.annotation.storage.CasStorageSession;
import de.tudarmstadt.ukp.inception.documents.api.DocumentService;
import de.tudarmstadt.ukp.inception.documents.api.DocumentStorageService;
import de.tudarmstadt.ukp.inception.documents.api.RepositoryProperties;
import de.tudarmstadt.ukp.inception.documents.api.SourceDocumentStateStats;
import de.tudarmstadt.ukp.inception.documents.config.DocumentServiceAutoConfiguration;
Expand All @@ -141,7 +135,7 @@
public class DocumentServiceImpl
implements DocumentService
{
private final Logger log = LoggerFactory.getLogger(getClass());
private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

private static final String MSG_DOCUMENT_NAME_TOO_LONG = "document.name.error.too-long";
private static final String MSG_DOCUMENT_NAME_EMPTY = "document.name.error.empty";
Expand All @@ -163,54 +157,28 @@ public class DocumentServiceImpl
private final ProjectService projectService;
private final ApplicationEventPublisher applicationEventPublisher;
private final RepositoryProperties repositoryProperties;
private final DocumentStorageService documentStorageService;

@Autowired
public DocumentServiceImpl(RepositoryProperties aRepositoryProperties,
CasStorageService aCasStorageService, DocumentImportExportService aImportExportService,
ProjectService aProjectService, ApplicationEventPublisher aApplicationEventPublisher,
EntityManager aEntityManager)
EntityManager aEntityManager, DocumentStorageService aDocumentStorageService)
{
repositoryProperties = aRepositoryProperties;
casStorageService = aCasStorageService;
importExportService = aImportExportService;
projectService = aProjectService;
applicationEventPublisher = aApplicationEventPublisher;
entityManager = aEntityManager;
documentStorageService = aDocumentStorageService;

if (repositoryProperties != null) {
BaseLoggers.BOOT_LOG.info("Document repository path: {}",
repositoryProperties.getPath());
}
}

// NO TRANSACTION REQUIRED - This does not do any should not do a database access, so we do not
// need to be in a transaction here. Avoiding the transaction speeds up the call.
@Deprecated
@Override
public File getDir()
{
return repositoryProperties.getPath();
}

// NO TRANSACTION REQUIRED - This does not do any should not do a database access, so we do not
// need to be in a transaction here. Avoiding the transaction speeds up the call.
private File getSourceDocumentFolder(SourceDocument aDocument)
{
Validate.notNull(aDocument, "Source document must be specified");
Validate.notNull(aDocument.getProject().getId(),
"Source document's project must have an ID");
Validate.notNull(aDocument.getId(), "Source document must have an ID");

return repositoryProperties.getPath().toPath() //
.toAbsolutePath() //
.resolve(PROJECT_FOLDER) //
.resolve(Long.toString(aDocument.getProject().getId())) //
.resolve(DOCUMENT_FOLDER)//
.resolve(Long.toString(aDocument.getId())) //
.resolve(SOURCE_FOLDER) //
.toFile();
}

// NO TRANSACTION REQUIRED - This does not do any should not do a database access, so we do not
// need to be in a transaction here. Avoiding the transaction speeds up the call.
@Override
Expand All @@ -219,24 +187,14 @@ public void exportSourceDocuments(OutputStream os, List<SourceDocument> selected
{
try (var zos = new ZipOutputStream(os)) {
for (var doc : selectedDocuments) {
try (var dis = new FileInputStream(getSourceDocumentFile(doc))) {
try (var dis = documentStorageService.openSourceDocumentFile(doc)) {
zos.putNextEntry(new ZipEntry(doc.getName()));
IOUtils.copyLarge(dis, zos);
}
}
}
}

// NO TRANSACTION REQUIRED - This does not do any should not do a database access, so we do not
// need to be in a transaction here. Avoiding the transaction speeds up the call.
@Override
public File getSourceDocumentFile(SourceDocument aDocument)
{
Validate.notNull(aDocument, "Source document must be specified");

return getSourceDocumentFolder(aDocument).toPath().resolve(aDocument.getName()).toFile();
}

@Override
@Transactional
public SourceDocument createSourceDocument(SourceDocument aDocument)
Expand Down Expand Up @@ -294,7 +252,7 @@ public AnnotationDocument createAnnotationDocument(AnnotationDocument aAnnotatio
entityManager.persist(aAnnotationDocument);

try (var logCtx = withProjectLogger(aAnnotationDocument.getProject())) {
log.info("Created annotation document {} in project {}", aAnnotationDocument,
LOG.info("Created annotation document {} in project {}", aAnnotationDocument,
aAnnotationDocument.getProject());
}

Expand Down Expand Up @@ -726,33 +684,27 @@ public void removeSourceDocument(SourceDocument aDocument) throws IOException
// on it might need to have access to the associated annotation documents
applicationEventPublisher.publishEvent(new BeforeDocumentRemovedEvent(this, aDocument));

for (AnnotationDocument annotationDocument : listAllAnnotationDocuments(aDocument)) {
for (var annotationDocument : listAllAnnotationDocuments(aDocument)) {
removeAnnotationDocument(annotationDocument);
}

entityManager.remove(
entityManager.contains(aDocument) ? aDocument : entityManager.merge(aDocument));
documentStorageService.removeSourceDocumentFile(aDocument);

String path = repositoryProperties.getPath().getAbsolutePath() + "/" + PROJECT_FOLDER + "/"
+ aDocument.getProject().getId() + "/" + DOCUMENT_FOLDER + "/" + aDocument.getId();

// remove from file both source and related annotation file
if (new File(path).exists()) {
FileUtils.forceDelete(new File(path));
}

Project project = aDocument.getProject();
var project = aDocument.getProject();
try (var logCtx = withProjectLogger(project)) {
log.info("Removed source document {} from project {}", aDocument, project);
LOG.info("Removed source document {} from project {}", aDocument, project);
}
}

@Override
@Transactional
public void removeAnnotationDocument(AnnotationDocument aAnnotationDocument)
public void removeAnnotationDocument(AnnotationDocument aAnnotationDocument) throws IOException
{
Validate.notNull(aAnnotationDocument, "Annotation document must be specified");

casStorageService.deleteCas(aAnnotationDocument.getDocument(),
aAnnotationDocument.getUser());
entityManager.remove(aAnnotationDocument);
}

Expand All @@ -778,34 +730,27 @@ public void uploadSourceDocument(InputStream aIs, SourceDocument aDocument,
createSourceDocument(aDocument);

// Import the actual content
File targetFile = getSourceDocumentFile(aDocument);
try (var session = CasStorageSession.openNested()) {
FileUtils.forceMkdir(targetFile.getParentFile());

try (var os = new FileOutputStream(targetFile)) {
copyLarge(aIs, os);
}
documentStorageService.writeSourceDocumentFile(aDocument, aIs);

// Check if the file has a valid format / can be converted without error
// This requires that the document ID has already been assigned
CAS cas = createOrReadInitialCas(aDocument, NO_CAS_UPGRADE, aFullProjectTypeSystem);
var cas = createOrReadInitialCas(aDocument, NO_CAS_UPGRADE, aFullProjectTypeSystem);

log.trace("Sending AfterDocumentCreatedEvent for {}", aDocument);
LOG.trace("Sending AfterDocumentCreatedEvent for {}", aDocument);
applicationEventPublisher
.publishEvent(new AfterDocumentCreatedEvent(this, aDocument, cas));

Project project = aDocument.getProject();
try (var logCtx = withProjectLogger(project)) {
log.info("Imported source document {} to project {}", aDocument, project);
LOG.info("Imported source document {} to project {}", aDocument, project);
}
}
catch (IOException e) {
FileUtils.forceDelete(targetFile);
removeSourceDocument(aDocument);
throw e;
}
catch (Exception e) {
FileUtils.forceDelete(targetFile);
removeSourceDocument(aDocument);
throw new IOException(e.getMessage(), e);
}
Expand Down Expand Up @@ -858,7 +803,7 @@ public CAS createOrReadInitialCas(SourceDocument aDocument, CasUpgradeMode aUpgr
{
Validate.notNull(aDocument, "Source document must be specified");

log.debug("Loading initial CAS for source document {} in project {}", aDocument,
LOG.debug("Loading initial CAS for source document {} in project {}", aDocument,
aDocument.getProject());

return casStorageService.readOrCreateCas(aDocument, INITIAL_CAS_PSEUDO_USER, aUpgradeMode,
Expand All @@ -868,7 +813,7 @@ public CAS createOrReadInitialCas(SourceDocument aDocument, CasUpgradeMode aUpgr
// we create them here lazily
try {
return importExportService.importCasFromFileNoChecks(
getSourceDocumentFile(aDocument), aDocument,
documentStorageService.getSourceDocumentFile(aDocument), aDocument,
aFullProjectTypeSystem);
}
catch (UIMAException e) {
Expand Down Expand Up @@ -1527,7 +1472,7 @@ public void beforeProjectRemove(BeforeProjectRemovedEvent aEvent) throws IOExcep
// }

try (var logCtx = withProjectLogger(project)) {
log.info("Removed all documents from project {} being deleted", project);
LOG.info("Removed all documents from project {} being deleted", project);
}
}

Expand Down
Loading

0 comments on commit 8ed229c

Please sign in to comment.