From ac15fb8016fdb07e1fc848ff563e269cf01bd220 Mon Sep 17 00:00:00 2001 From: sebr72 <48369171+sebr72@users.noreply.github.com> Date: Wed, 17 Jan 2024 16:50:18 +0100 Subject: [PATCH] Maintenance Swagger API to clean data dir of orphaned metadata absent from DB (#252) * add maintenance entry point * list datadir dir at metadata level * Log orphanedDataPath into a File * stream of strings to log at the end * 'improve' logs outputs * delete files * Code cleanup * Add new Counter for not deleted path, and refactoring * polish --------- Co-authored-by: christophe mangeat --- .../api/maintenance/DatadirCleaner.java | 108 ++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 services/src/main/java/org/fao/geonet/api/maintenance/DatadirCleaner.java diff --git a/services/src/main/java/org/fao/geonet/api/maintenance/DatadirCleaner.java b/services/src/main/java/org/fao/geonet/api/maintenance/DatadirCleaner.java new file mode 100644 index 0000000000..865e3ca3b6 --- /dev/null +++ b/services/src/main/java/org/fao/geonet/api/maintenance/DatadirCleaner.java @@ -0,0 +1,108 @@ +package org.fao.geonet.api.maintenance; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.swagger.v3.oas.annotations.Parameter; +import io.swagger.v3.oas.annotations.tags.Tag; +import org.apache.commons.io.FileUtils; +import org.fao.geonet.kernel.GeonetworkDataDirectory; +import org.fao.geonet.kernel.datamanager.IMetadataUtils; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.http.HttpStatus; +import org.springframework.http.MediaType; +import org.springframework.security.access.prepost.PreAuthorize; +import org.springframework.stereotype.Controller; +import org.springframework.web.bind.annotation.*; + +import java.io.IOException; +import java.io.PrintWriter; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Stream; + +import static java.lang.String.format; + +@RequestMapping(value = {"/{portal}/api/maintenance"}) +@Tag(name = "maintenance") +@Controller("maintenance") +public class DatadirCleaner { + + @Autowired + GeonetworkDataDirectory geonetworkDataDirectory; + @Autowired + IMetadataUtils metadataUtils; + + private final AtomicInteger processedPathCounter = new AtomicInteger(); + private final AtomicInteger notDeletedPathCounter = new AtomicInteger(); + + PrintWriter pwToFlush; + + @io.swagger.v3.oas.annotations.Operation(summary = "Clean data dir", description = "Search for dangling metadata " + + "in data dir, and delete them since they are no longer referenced in the database. Please use cautiously.") + @RequestMapping( + path = "/cleanDatadir", + produces = MediaType.APPLICATION_JSON_VALUE, + method = RequestMethod.GET) + @ResponseStatus(value = HttpStatus.OK) + @PreAuthorize("hasAuthority('UserAdmin')") + @ResponseBody + public synchronized ObjectNode cleanDataDir( @Parameter(description = "portal", required = true, hidden = true) @PathVariable(value = "portal") final String portal) throws IOException { + processedPathCounter.set(0); + notDeletedPathCounter.set(0); + final Path orphanedDataFilePath = cleanFile(); + return new ObjectMapper().createObjectNode() // + .put("status", format("Cleaned the orphaned data: see details in %s.", orphanedDataFilePath)) // + .put("pathsCounters", format("Kept %d paths out of %d.", notDeletedPathCounter.get(), processedPathCounter.get())) + .put("warning", format("Although the portal %s was defined, it clears orphaned data without knowledge of the portal.", portal)); + } + + public Path cleanFile() throws IOException { + final Path rootPath = geonetworkDataDirectory.getMetadataDataDir(); + final Path orphanedDataReportFilePath = rootPath.resolve("orphanedDataFiles.txt"); + try(PrintWriter pw = new PrintWriter(Files.newBufferedWriter(orphanedDataReportFilePath))) { + pwToFlush = pw; + listFiles(rootPath) // + .flatMap(this::listFiles) // + .flatMap(this::processPath) // + .forEach(pw::println); + } + return orphanedDataReportFilePath; + } + + private Stream processPath(final Path path) { + final Stream.Builder toLog = Stream.builder(); + if (isOrphanedPath(path, toLog)) { + FileUtils.deleteQuietly(path.toFile().getAbsoluteFile()); + toLog.add(path.toAbsolutePath().toString()); + toLog.add(format("SQL# select count(*) from metadata where id = %s;", path.getFileName())); + } else { + notDeletedPathCounter.incrementAndGet(); + } + if (processedPathCounter.incrementAndGet() % 100 == 0) { + toLog.add(format("Processed %d paths.", processedPathCounter.get())); + pwToFlush.flush(); + } + return toLog.build(); + } + + private boolean isOrphanedPath(final Path path, final Stream.Builder toLog) { + try { + return !metadataUtils.exists(Integer.parseInt(path.getFileName().toString())); + } catch (RuntimeException e) { + toLog.add(format("ERROR# %s.", path)); + return false; + } + } + + private Stream listFiles(final Path path) { + if (!Files.isDirectory(path)) { + return Stream.of(); + } + try { + return Files.list(path); + } catch (IOException e) { + throw new RuntimeException(format("Failed to access path %s.", path), e); + } + } +}