From 7d5af562acdaf1d49be71ac02c6338c9ef4d9db2 Mon Sep 17 00:00:00 2001 From: ndc-dxc Date: Thu, 3 Oct 2024 16:34:18 +0200 Subject: [PATCH 1/5] harvester will stop for fatal errors --- .../BaseSemanticAssetHarvester.java | 28 ++++++++++++++++++- .../ndc/repository/TripleStoreRepository.java | 12 ++++++-- .../ndc/service/DefaultInstanceManager.java | 10 +++++++ .../resources/application-local.properties | 2 +- 4 files changed, 48 insertions(+), 4 deletions(-) diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvester.java b/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvester.java index dd19e9e..bb7b851 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvester.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvester.java @@ -21,16 +21,21 @@ import java.io.File; import java.nio.file.Path; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.Set; import static it.gov.innovazione.ndc.harvester.service.ActualConfigService.ConfigKey.MAX_FILE_SIZE_BYTES; @Slf4j @RequiredArgsConstructor public abstract class BaseSemanticAssetHarvester

implements SemanticAssetHarvester { + + private static final List INFRASTRUCTURE_EXCEPTIONS = List.of("java.net", "org.apache.http"); + private final SemanticAssetType type; private final NdcEventPublisher eventPublisher; private final ConfigService configService; @@ -40,6 +45,10 @@ public SemanticAssetType getType() { return type; } + private static boolean isInfrastructureTypeException(Throwable cause) { + return INFRASTRUCTURE_EXCEPTIONS.stream().anyMatch(cause.getClass().getName()::contains); + } + @Override public void harvest(Repository repository, Path rootPath) { log.debug("Looking for {} paths", type); @@ -58,6 +67,7 @@ public void harvest(Repository repository, Path rootPath) { processPath(repository.getUrl(), path); log.debug("Path {} processed correctly for {}", path, type); } catch (SinglePathProcessingException e) { + boolean isInfrastuctureError = checkInfrastructureError(e); Optional.ofNullable(HarvestExecutionContextUtils.getContext()) .ifPresent(context -> context.addHarvestingError(repository, e, path.getAllFiles())); eventPublisher.publishAlertableEvent( @@ -65,7 +75,7 @@ public void harvest(Repository repository, Path rootPath) { DefaultAlertableEvent.builder() .name("Harvester Single Path Processing Error") .description("Error processing " + type + " " + path + " in repo " + repository.getUrl()) - .category(EventCategory.SEMANTIC) + .category(isInfrastuctureError ? EventCategory.INFRASTRUCTURE : EventCategory.SEMANTIC) .severity(Severity.ERROR) .context(Map.of( "error", e.getRealErrorMessage(), @@ -81,6 +91,22 @@ public void harvest(Repository repository, Path rootPath) { } } + private boolean checkInfrastructureError(SinglePathProcessingException e) { + // checks if in the chain of exceptions there is an infrastructure error (es. java.net, httpException, etc) + Throwable cause = e; + Set seen = new HashSet<>(); + while (cause != null) { + if (!seen.add(cause)) { + return false; + } + if (isInfrastructureTypeException(cause)) { + return true; + } + cause = cause.getCause(); + } + return false; + } + private void notifyIfSizeExceed(P path, Long maxFileSizeBytes) { HarvestExecutionContext context = HarvestExecutionContextUtils.getContext(); if (Objects.nonNull(context) && Objects.nonNull(path)) { diff --git a/src/main/java/it/gov/innovazione/ndc/repository/TripleStoreRepository.java b/src/main/java/it/gov/innovazione/ndc/repository/TripleStoreRepository.java index 3e00b49..5a36aca 100644 --- a/src/main/java/it/gov/innovazione/ndc/repository/TripleStoreRepository.java +++ b/src/main/java/it/gov/innovazione/ndc/repository/TripleStoreRepository.java @@ -30,8 +30,13 @@ public TripleStoreRepository(VirtuosoClient virtuosoClient) { this.virtuosoClient = virtuosoClient; } + private static String getCommandAndLog(String command) { + log.info("Update command: {}", command); + return command; + } + private static String getRenameCommand(String oldGraph, String newGraph) { - return format(RENAME_GRAPH, oldGraph, newGraph); + return getCommandAndLog(format(RENAME_GRAPH, oldGraph, newGraph)); } private void saveWithConnection(String graphName, Model model, RDFConnection connection) { @@ -48,7 +53,7 @@ private void saveWithConnection(String graphName, Model model, RDFConnection con } private static String getUpdateCommand(String repoUrl, String repoUrlPrefix) { - return format(DROP_SILENT_GRAPH_WITH_LOG_ENABLE_3, reworkRepoUrlIfNecessary(repoUrl, repoUrlPrefix)); + return getCommandAndLog(format(DROP_SILENT_GRAPH_WITH_LOG_ENABLE_3, reworkRepoUrlIfNecessary(repoUrl, repoUrlPrefix))); } @SneakyThrows @@ -62,11 +67,13 @@ private static String reworkRepoUrlIfNecessary(String repoUrl, String repoUrlPre } public void clearExistingNamedGraph(String repoUrl) { + log.info("Clearing existing named graph for {}", repoUrl); clearExistingNamedGraph(repoUrl, ONLINE_GRAPH_PREFIX); } public void clearExistingNamedGraph(String repoUrl, String prefix) { try { + log.info("Clearing existing named graph for {} with prefix {}", repoUrl, prefix); String sparqlEndpoint = virtuosoClient.getSparqlEndpoint(); UpdateExecution .service(sparqlEndpoint) @@ -92,6 +99,7 @@ public void save(String graphName, Model model) { public void switchInstances(it.gov.innovazione.ndc.model.harvester.Repository repository) { String tmpGraphName = reworkRepoUrlIfNecessary(repository.getUrl(), TMP_GRAPH_PREFIX); + log.info("Switching instances on Virtuoso ({}, {})", repository.getUrl(), tmpGraphName); clearExistingNamedGraph(repository.getUrl()); rename(tmpGraphName, repository.getUrl()); } diff --git a/src/main/java/it/gov/innovazione/ndc/service/DefaultInstanceManager.java b/src/main/java/it/gov/innovazione/ndc/service/DefaultInstanceManager.java index a4a3d0f..8ea4f05 100644 --- a/src/main/java/it/gov/innovazione/ndc/service/DefaultInstanceManager.java +++ b/src/main/java/it/gov/innovazione/ndc/service/DefaultInstanceManager.java @@ -7,6 +7,7 @@ import it.gov.innovazione.ndc.repository.SemanticAssetMetadataDeleter; import it.gov.innovazione.ndc.repository.TripleStoreRepository; import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Service; import java.util.List; @@ -17,6 +18,7 @@ @Service @RequiredArgsConstructor +@Slf4j public class DefaultInstanceManager implements InstanceManager { private final ConfigService configService; @@ -47,13 +49,21 @@ public Instance getCurrentInstance(Repository repository) { public void switchInstances(Repository repository) { // switch instance on Repositories + log.info("Switching instance for repository {}", repository.getUrl()); Instance newInstance = getNextOnlineInstance(repository); + + log.info("Switching Elastic search to instance {} for repo {}", newInstance, repository.getUrl()); + configService.writeConfigKey(ACTIVE_INSTANCE, "system", newInstance, repository.getId()); Instance instanceToDelete = newInstance.switchInstance(); + log.info("Deleting metadata for instance {} for repo {}", instanceToDelete, repository.getUrl()); + deleter.deleteByRepoUrl(repository.getUrl(), instanceToDelete); + log.info("Switching instances on Virtuoso for repo {}", repository.getUrl()); + // switch instance on Virtuoso tripleStoreRepository.switchInstances(repository); } diff --git a/src/main/resources/application-local.properties b/src/main/resources/application-local.properties index 71fcaba..e14d0ac 100644 --- a/src/main/resources/application-local.properties +++ b/src/main/resources/application-local.properties @@ -1,4 +1,4 @@ -harvester.repositories=https://github.com/istat/ts-ontologie-vocabolari-controllati +harvester.repositories=https://github.com/FrankMaverick/Leo-OpenData virtuoso.sparql=http://localhost:8890/sparql-auth virtuoso.sparql-graph-store=http://localhost:8890/sparql-graph-crud-auth From 6e5633d38a7b7fa136db58208d31d7b2e171ec04 Mon Sep 17 00:00:00 2001 From: ndc-dxc Date: Fri, 18 Oct 2024 11:38:35 +0200 Subject: [PATCH 2/5] adds accepts header for url check --- .../gov/innovazione/ndc/controller/CheckUrlController.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main/java/it/gov/innovazione/ndc/controller/CheckUrlController.java b/src/main/java/it/gov/innovazione/ndc/controller/CheckUrlController.java index 4729544..8887397 100644 --- a/src/main/java/it/gov/innovazione/ndc/controller/CheckUrlController.java +++ b/src/main/java/it/gov/innovazione/ndc/controller/CheckUrlController.java @@ -21,6 +21,10 @@ @Slf4j public class CheckUrlController { + public static final String ACCEPTED_MIME_TYPES = + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif," + + "image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"; + @GetMapping @SneakyThrows @Operation( @@ -38,6 +42,7 @@ public ResponseEntity check(@RequestParam String url) { log.info("Checking url {}", url); HttpURLConnection huc = (HttpURLConnection) new URL(url).openConnection(); huc.setConnectTimeout(5000); + huc.setRequestProperty("Accept", ACCEPTED_MIME_TYPES); log.info("Response code for url {} is : {}", url, huc.getResponseCode()); return new ResponseEntity<>(HttpStatus.valueOf(huc.getResponseCode())); } From 0bc723f522c4fff2c66f2596cbf3da8fc216d327 Mon Sep 17 00:00:00 2001 From: ndc-dxc Date: Thu, 24 Oct 2024 12:39:30 +0200 Subject: [PATCH 3/5] implementazione iniziale logger --- .../SimpleHarvestRepositoryProcessor.java | 70 +++++++++++--- .../ndc/harvester/HarvesterService.java | 27 ++++++ .../ControlledVocabularyPathProcessor.java | 19 ++++ .../service/HarvesterRunService.java | 10 +- .../ndc/harvester/util/FileUtils.java | 24 +++++ .../ndc/service/logging/HarvesterStage.java | 10 ++ .../ndc/service/logging/LoggingContext.java | 93 +++++++++++++++++++ .../service/logging/NDCHarvesterLogger.java | 59 ++++++++++++ src/main/resources/logback-spring.xml | 27 ++++++ 9 files changed, 322 insertions(+), 17 deletions(-) create mode 100644 src/main/java/it/gov/innovazione/ndc/service/logging/HarvesterStage.java create mode 100644 src/main/java/it/gov/innovazione/ndc/service/logging/LoggingContext.java create mode 100644 src/main/java/it/gov/innovazione/ndc/service/logging/NDCHarvesterLogger.java create mode 100644 src/main/resources/logback-spring.xml diff --git a/src/main/java/it/gov/innovazione/ndc/config/SimpleHarvestRepositoryProcessor.java b/src/main/java/it/gov/innovazione/ndc/config/SimpleHarvestRepositoryProcessor.java index d428233..7aea332 100644 --- a/src/main/java/it/gov/innovazione/ndc/config/SimpleHarvestRepositoryProcessor.java +++ b/src/main/java/it/gov/innovazione/ndc/config/SimpleHarvestRepositoryProcessor.java @@ -16,6 +16,8 @@ import it.gov.innovazione.ndc.model.harvester.Repository; import it.gov.innovazione.ndc.service.GithubService; import it.gov.innovazione.ndc.service.InstanceManager; +import it.gov.innovazione.ndc.service.logging.HarvesterStage; +import it.gov.innovazione.ndc.service.logging.LoggingContext; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.ThreadUtils; @@ -33,6 +35,10 @@ import static it.gov.innovazione.ndc.config.AsyncConfiguration.THREAD_PREFIX; import static it.gov.innovazione.ndc.model.harvester.HarvesterRun.Status.ALREADY_RUNNING; import static it.gov.innovazione.ndc.model.harvester.HarvesterRun.Status.FAILURE; +import static it.gov.innovazione.ndc.model.harvester.HarvesterRun.Status.RUNNING; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticError; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticInfo; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticWarn; import static java.lang.String.format; import static org.apache.commons.lang3.StringUtils.endsWith; import static org.apache.commons.lang3.StringUtils.startsWith; @@ -70,8 +76,25 @@ public void execute(String runId, Repository repository, String correlationId, S Instance instanceToHarvest = instanceManager.getNextOnlineInstance(repository); + HarvestExecutionContextUtils.setContext( + HarvestExecutionContext.builder() + .repository(repository) + .revision(revision) + .correlationId(correlationId) + .runId(runId) + .currentUserId(currentUserLogin) + .instance(instanceToHarvest) + .build()); + publishHarvesterStartedEvent(repository, correlationId, revision, runId, currentUserLogin, instanceToHarvest); + logSemanticInfo(LoggingContext.builder() + .stage(HarvesterStage.START) + .harvesterStatus(RUNNING) + .message("Starting harvester on repo " + repository.getUrl()) + .additionalInfo("force", force) + .build()); + synchronized (locks) { if (locks.contains(repository.getId() + revision)) { log.info("Harvesting for repo '{}' is already in progress", repository.getUrl()); @@ -85,6 +108,15 @@ public void execute(String runId, Repository repository, String correlationId, S format("Harvesting for repo %s is already running", repository.getUrl())), currentUserLogin); + + logSemanticError(LoggingContext.builder() + .stage(HarvesterStage.START) + .harvesterStatus(ALREADY_RUNNING) + .message("Failed harvester on repo " + repository.getUrl()) + .details("Harvester on repo " + repository.getUrl() + " is already running") + .additionalInfo("force", force) + .build()); + setThreadName(runId, repository.getId(), revision, "IDLE"); return; } @@ -97,16 +129,6 @@ public void execute(String runId, Repository repository, String correlationId, S verifySameRunWasNotExecuted(repository, revision); } - HarvestExecutionContextUtils.setContext( - HarvestExecutionContext.builder() - .repository(repository) - .revision(revision) - .correlationId(correlationId) - .runId(runId) - .currentUserId(currentUserLogin) - .instance(instanceToHarvest) - .build()); - verifyNoNdcIssuesInRepoIfNecessary(repository); harvesterService.harvest(repository, revision, instanceToHarvest); @@ -142,6 +164,13 @@ private void verifyNoNdcIssuesInRepoIfNecessary(Repository repository) { boolean hasIssues = ndcIssue.isPresent(); if (hasIssues) { URL issueUrl = ndcIssue.get().getUrl(); + logSemanticError(LoggingContext.builder() + .stage(HarvesterStage.START) + .harvesterStatus(FAILURE) + .repoUrl(repository.getUrl()) + .message("Repository has at least one open NDC issues") + .additionalInfo("issueUrl", issueUrl) + .build()); throw new RepoContainsNdcIssueException(format("Repository %s has NDC issues [%s]", repository.getUrl(), issueUrl.toString())); @@ -155,14 +184,31 @@ private void removeLock(Repository repository, String revision) { } private synchronized void verifySameRunWasNotExecuted(Repository repository, String revision) { - if (harvesterRunService.isHarvestingAlreadyExecuted(repository.getId(), revision)) { + Optional harvesterRun = harvesterRunService.isHarvestingAlreadyExecuted(repository.getId(), revision); + if (harvesterRun.isPresent()) { + logSemanticWarn(LoggingContext.builder() + .stage(HarvesterStage.START) + .harvesterStatus(HarvesterRun.Status.UNCHANGED) + .repoUrl(repository.getUrl()) + .message("Harvesting for repo '" + repository.getUrl() + "' was already executed") + .additionalInfo("otherJobId", harvesterRun.get().getId()) + .additionalInfo("revision", revision) + .build()); throw new HarvesterAlreadyExecutedException(format("Harvesting for repo '%s' with revision '%s' was already executed and no force param was passed", repository.getUrl(), revision)); } } private synchronized void verifyHarvestingIsNotInProgress(String runId, Repository repository) { - if (harvesterRunService.isHarvestingInProgress(runId, repository)) { + Optional harvestingInProgress = harvesterRunService.isHarvestingInProgress(runId, repository); + if (harvestingInProgress.isPresent()) { + logSemanticError(LoggingContext.builder() + .jobId(runId) + .message("Harvesting for repo '" + repository.getUrl() + "' is already in progress") + .harvesterStatus(ALREADY_RUNNING) + .repoUrl(repository.getUrl()) + .additionalInfo("otherJobId", harvestingInProgress.get().getId()) + .build()); throw new HarvesterAlreadyInProgressException(format("Harvesting for repo '%s' is already in progress", repository.getUrl())); } } diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/HarvesterService.java b/src/main/java/it/gov/innovazione/ndc/harvester/HarvesterService.java index 2b31057..b1b023d 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/HarvesterService.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/HarvesterService.java @@ -6,9 +6,11 @@ import it.gov.innovazione.ndc.harvester.model.index.RightsHolder; import it.gov.innovazione.ndc.harvester.service.RepositoryService; import it.gov.innovazione.ndc.harvester.util.FileUtils; +import it.gov.innovazione.ndc.model.harvester.HarvesterRun; import it.gov.innovazione.ndc.model.harvester.Repository; import it.gov.innovazione.ndc.repository.SemanticAssetMetadataRepository; import it.gov.innovazione.ndc.repository.TripleStoreRepository; +import it.gov.innovazione.ndc.service.logging.LoggingContext; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Component; @@ -20,9 +22,13 @@ import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.stream.Collectors; import static it.gov.innovazione.ndc.repository.TripleStoreRepository.ONLINE_GRAPH_PREFIX; import static it.gov.innovazione.ndc.repository.TripleStoreRepository.TMP_GRAPH_PREFIX; +import static it.gov.innovazione.ndc.service.logging.HarvesterStage.CLONE_REPO; +import static it.gov.innovazione.ndc.service.logging.HarvesterStage.MAINTAINER_EXTRACTION; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticInfo; import static java.util.stream.Collectors.groupingBy; import static java.util.stream.Collectors.toList; import static java.util.stream.Collectors.toMap; @@ -44,6 +50,17 @@ private static void updateContextWithMaintainers(List mai context = HarvestExecutionContext.builder() .build(); } + if (!maintainers.isEmpty()) { + logSemanticInfo(LoggingContext.builder() + .stage(MAINTAINER_EXTRACTION) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .message("Adding maintainers to context") + .additionalInfo("maintainers", + maintainers.stream() + .map(Repository.Maintainer::toString) + .collect(Collectors.joining(","))) + .build()); + } context.addMaintainers(maintainers); } @@ -68,6 +85,16 @@ public void harvest(Repository repository, String revision, Instance instance) t try { Path path = cloneRepoToTempPath(repoUrl, revision); + logSemanticInfo(LoggingContext.builder() + .stage(CLONE_REPO) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .repoUrl(repoUrl) + .message("Repository cloned") + .additionalInfo("tempPath", path.toAbsolutePath()) + .additionalInfo("revision", revision) + .additionalInfo("instance", instance) + .build()); + try { updateContext(path, instance); harvestClonedRepo(normalisedRepo, path); diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/pathprocessors/ControlledVocabularyPathProcessor.java b/src/main/java/it/gov/innovazione/ndc/harvester/pathprocessors/ControlledVocabularyPathProcessor.java index ae7979a..be16bed 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/pathprocessors/ControlledVocabularyPathProcessor.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/pathprocessors/ControlledVocabularyPathProcessor.java @@ -7,15 +7,21 @@ import it.gov.innovazione.ndc.harvester.model.Instance; import it.gov.innovazione.ndc.harvester.model.SemanticAssetModelFactory; import it.gov.innovazione.ndc.harvester.model.index.SemanticAssetMetadata; +import it.gov.innovazione.ndc.model.harvester.HarvesterRun; import it.gov.innovazione.ndc.repository.SemanticAssetMetadataRepository; import it.gov.innovazione.ndc.repository.TripleStoreRepository; import it.gov.innovazione.ndc.service.VocabularyDataService; import it.gov.innovazione.ndc.service.VocabularyIdentifier; +import it.gov.innovazione.ndc.service.logging.HarvesterStage; +import it.gov.innovazione.ndc.service.logging.LoggingContext; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; import java.util.List; +import java.util.stream.Collectors; + +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticInfo; @Component @Slf4j @@ -73,6 +79,19 @@ public void dropCsvIndicesForRepo(String repoUrl, Instance instance) { log.debug("Found {} vocabs with indices to drop", vocabs.size()); } + if (vocabs.isEmpty()) { + return; + } + + logSemanticInfo(LoggingContext.builder() + .repoUrl(repoUrl) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .stage(HarvesterStage.CLEANING_METADATA) + .message("Cleaning " + vocabs.size() + " found vocabularies") + .additionalInfo("vocabs", vocabs.stream().map(SemanticAssetMetadata::getIri).collect(Collectors.joining(","))) + .additionalInfo("instance", instance) + .build()); + vocabs.forEach(v -> { VocabularyIdentifier vocabId = new VocabularyIdentifier(v.getAgencyId(), v.getKeyConcept()); diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/service/HarvesterRunService.java b/src/main/java/it/gov/innovazione/ndc/harvester/service/HarvesterRunService.java index d07bb48..e39635b 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/service/HarvesterRunService.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/service/HarvesterRunService.java @@ -60,10 +60,11 @@ public int saveHarvesterRun(HarvesterRun harvesterRun) { harvesterRun.getReason()); } - public boolean isHarvestingInProgress(String runId, Repository repository) { + public Optional isHarvestingInProgress(String runId, Repository repository) { return getRecentRuns(HARVESTING_RECENT_DAYS) .filter(harvesterRun -> !equalsIgnoreCase(harvesterRun.getId(), runId)) - .anyMatch(harvesterRun -> isAlreadyRunning(harvesterRun, repository)); + .filter(harvesterRun -> isAlreadyRunning(harvesterRun, repository)) + .findFirst(); } private boolean isMoreRecentThan(HarvesterRun harvesterRun, Long days) { @@ -85,13 +86,12 @@ private boolean isAlreadyRunning(HarvesterRun harvesterRun, Repository repositor && harvesterRun.getStatus() == HarvesterRun.Status.RUNNING; } - public boolean isHarvestingAlreadyExecuted(String repositoryId, String revision) { + public Optional isHarvestingAlreadyExecuted(String repositoryId, String revision) { return getRecentRuns(HARVESTING_RECENT_DAYS) .filter(harvesterRun -> harvesterRun.getRepositoryId().equals(repositoryId)) .filter(harvesterRun -> harvesterRun.getStatus() == HarvesterRun.Status.SUCCESS) .max(Comparator.comparing(HarvesterRun::getStartedAt)) - .filter(harvesterRun -> equalsIgnoreCase(harvesterRun.getRevision(), revision)) - .isPresent(); + .filter(harvesterRun -> equalsIgnoreCase(harvesterRun.getRevision(), revision)); } public int updateHarvesterRun(HarvesterRun harvesterRun) { diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/util/FileUtils.java b/src/main/java/it/gov/innovazione/ndc/harvester/util/FileUtils.java index 3abf03f..e516dc7 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/util/FileUtils.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/util/FileUtils.java @@ -2,6 +2,8 @@ import it.gov.innovazione.ndc.harvester.exception.InvalidAssetFolderException; import it.gov.innovazione.ndc.model.harvester.Repository; +import it.gov.innovazione.ndc.service.logging.HarvesterStage; +import it.gov.innovazione.ndc.service.logging.LoggingContext; import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Component; @@ -16,6 +18,9 @@ import java.util.Locale; import java.util.stream.Collectors; +import static it.gov.innovazione.ndc.model.harvester.HarvesterRun.Status.RUNNING; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticError; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticWarn; import static java.util.Comparator.reverseOrder; import static org.apache.commons.io.FileUtils.readLines; @@ -67,9 +72,21 @@ public List getMaintainersIfPossible(Path path) { log.info("Extracting maintainers from README.md in {}", path); return extractMaintainers(readme); } + logSemanticWarn(LoggingContext.builder() + .stage(HarvesterStage.MAINTAINER_EXTRACTION) + .harvesterStatus(RUNNING) + .message("No README.md found in " + path) + .additionalInfo("path", path.toString()) + .build()); log.warn("No README.md found in {}", path); return List.of(); } + logSemanticWarn(LoggingContext.builder() + .stage(HarvesterStage.MAINTAINER_EXTRACTION) + .harvesterStatus(RUNNING) + .message("Path does not exist " + path) + .additionalInfo("path", path.toString()) + .build()); log.warn("Path {} does not exist", path); return List.of(); @@ -86,6 +103,13 @@ private List extractMaintainers(File readme) { } } } catch (IOException e) { + logSemanticError(LoggingContext.builder() + .message("Error reading README.md to extract maintainers") + .harvesterStatus(RUNNING) + .stage(HarvesterStage.MAINTAINER_EXTRACTION) + .details(e.getMessage()) + .additionalInfo("path", readme.toString()) + .build()); log.error("Error reading README.md {}", readme, e); } return List.of(); diff --git a/src/main/java/it/gov/innovazione/ndc/service/logging/HarvesterStage.java b/src/main/java/it/gov/innovazione/ndc/service/logging/HarvesterStage.java new file mode 100644 index 0000000..c73940c --- /dev/null +++ b/src/main/java/it/gov/innovazione/ndc/service/logging/HarvesterStage.java @@ -0,0 +1,10 @@ +package it.gov.innovazione.ndc.service.logging; + +public enum HarvesterStage { + START, + CLONE_REPO, + MAINTAINER_EXTRACTION, + CLEANING_VIRTUOSO, + CLEANING_METADATA, + PROCESS_RESOURCE +} diff --git a/src/main/java/it/gov/innovazione/ndc/service/logging/LoggingContext.java b/src/main/java/it/gov/innovazione/ndc/service/logging/LoggingContext.java new file mode 100644 index 0000000..7e6e4b1 --- /dev/null +++ b/src/main/java/it/gov/innovazione/ndc/service/logging/LoggingContext.java @@ -0,0 +1,93 @@ +package it.gov.innovazione.ndc.service.logging; + +import it.gov.innovazione.ndc.alerter.entities.EventCategory; +import it.gov.innovazione.ndc.model.harvester.HarvesterRun; +import lombok.Builder; +import lombok.Data; +import lombok.Singular; +import lombok.With; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; +import org.springframework.boot.logging.LogLevel; + +import java.util.Map; +import java.util.Objects; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +@Builder(toBuilder = true) +@With +@Data +public class LoggingContext { + private static final Map>> contextMapper = + Map.of( + 2, Pair.of("RepoUrl", LoggingContext::getRepoUrl), + 3, Pair.of("Path", LoggingContext::getPath), + 4, Pair.of("MainResource", LoggingContext::getMainResource), + 5, Pair.of("Message", LoggingContext::getMessage), + 6, Pair.of("Status", LoggingContext::getHarvesterStatus), + 7, Pair.of("Details", LoggingContext::getDetails), + 8, Pair.of("EventCategory", LoggingContext::getEventCategory)); + @Builder.Default + private final LogLevel level = LogLevel.INFO; + @Builder.Default + private final String component = "HARVESTER"; + private final String jobId; + private final String repoUrl; + private final String path; + private final String mainResource; + private final String message; + private final String details; + @Singular + private final Map additionalInfos; + private final HarvesterRun.Status harvesterStatus; + private final HarvesterStage stage; + private final EventCategory eventCategory; + + String makeLogEntry() { + String logMessage = + Stream.concat(contextMapper.entrySet().stream() + .sorted(Map.Entry.comparingByKey()) + .map(Map.Entry::getValue) + .map(pair -> Pair.of(pair.getKey(), pair.getRight().apply(this))), + additionalInfos.entrySet().stream()) + .filter(pair -> Objects.nonNull(pair.getValue())) + .filter(pair -> StringUtils.isNoneBlank(pair.getValue().toString())) + .map(e -> e.getKey() + ": " + e.getValue()) + .collect(Collectors.joining(" ")); + + String logHeaders = + Stream.of(component, + stage.name(), + jobId) + .map(s -> Objects.isNull(s) ? "" : s) + .map(String::toUpperCase) + .map(String::trim) + .map(s -> "[" + s + "]") + .collect(Collectors.joining(" ")); + + return logHeaders + " " + logMessage; + } + + public LoggingContext semantic() { + return this.withEventCategory(EventCategory.SEMANTIC); + } + + public LoggingContext infrastructure() { + return this.withEventCategory(EventCategory.INFRASTRUCTURE); + } + + public LoggingContext warn() { + return this.withLevel(LogLevel.WARN); + } + + public LoggingContext trace() { + return this.withLevel(LogLevel.TRACE); + } + + public LoggingContext error() { + return this.withLevel(LogLevel.ERROR); + } + +} diff --git a/src/main/java/it/gov/innovazione/ndc/service/logging/NDCHarvesterLogger.java b/src/main/java/it/gov/innovazione/ndc/service/logging/NDCHarvesterLogger.java new file mode 100644 index 0000000..88aba78 --- /dev/null +++ b/src/main/java/it/gov/innovazione/ndc/service/logging/NDCHarvesterLogger.java @@ -0,0 +1,59 @@ +package it.gov.innovazione.ndc.service.logging; + +import it.gov.innovazione.ndc.harvester.context.HarvestExecutionContext; +import it.gov.innovazione.ndc.harvester.context.HarvestExecutionContextUtils; +import lombok.extern.slf4j.Slf4j; +import org.springframework.boot.logging.LogLevel; + +import java.util.Map; +import java.util.function.Consumer; + +@Slf4j +public class NDCHarvesterLogger { + + private static final Map> mapper = Map.of( + LogLevel.ERROR, log::error, + LogLevel.WARN, log::warn, + LogLevel.INFO, log::info, + LogLevel.DEBUG, log::debug, + LogLevel.TRACE, log::trace + ); + + private static void log(LoggingContext loggingContext) { + HarvestExecutionContext context = HarvestExecutionContextUtils.getContext(); + if (context != null) { + loggingContext = loggingContext.toBuilder() + .additionalInfo("revision", context.getRevision()) + .additionalInfo("correlationId", context.getCorrelationId()) + .additionalInfo("currentUserId", context.getCurrentUserId()) + .additionalInfo("rootPath", context.getRootPath()) + .additionalInfo("instance", context.getInstance()) + .build() + .withJobId(context.getRunId()) + .withRepoUrl(context.getRepository().getUrl()); + } + try { + Consumer logMethod = mapper.get(loggingContext.getLevel()); + logMethod.accept(loggingContext.makeLogEntry()); + } catch (Exception e) { + log.error("There was an exception while logging", e); + } + } + + public static void logSemanticInfo(LoggingContext loggingContext) { + log(loggingContext.semantic().withLevel(LogLevel.INFO)); + } + + public static void logSemanticError(LoggingContext loggingContext) { + log(loggingContext.error().semantic()); + } + + public static void logSemanticWarn(LoggingContext loggingContext) { + log(loggingContext.warn().semantic()); + } + + public static void logSemanticTrace(LoggingContext loggingContext) { + log(loggingContext.trace().semantic()); + } + +} diff --git a/src/main/resources/logback-spring.xml b/src/main/resources/logback-spring.xml new file mode 100644 index 0000000..f4fc42f --- /dev/null +++ b/src/main/resources/logback-spring.xml @@ -0,0 +1,27 @@ + + + + + + %d{yyyy-MM-dd HH:mm:ss} [-%5p] %logger{36} - %m%n + + + + + + + + %d{yyyy-MM-dd HH:mm:ss} [-%5p] %msg%n + + + + + + + + + + + \ No newline at end of file From 6e83eb8a225991ccce1c591fd7615eed04c476fe Mon Sep 17 00:00:00 2001 From: ndc-dxc Date: Wed, 13 Nov 2024 18:16:18 +0100 Subject: [PATCH 4/5] logging requirement --- build.gradle | 2 +- .../ndc/alerter/AlerterService.java | 8 ++ .../controller/AbstractCrudController.java | 26 ++++++ .../ndc/alerter/data/ProfileInitializer.java | 2 +- .../innovazione/ndc/config/GitHubConfig.java | 17 ++++ .../SimpleHarvestRepositoryProcessor.java | 28 ++++--- .../controller/ConfigurationController.java | 2 - .../harvester/AgencyRepositoryService.java | 59 ++++++++++++-- .../ndc/harvester/HarvesterService.java | 33 +++++--- .../ndc/harvester/csv/CsvParser.java | 29 ++++++- .../BaseSemanticAssetHarvester.java | 37 +++++++++ .../model/BaseSemanticAssetModel.java | 30 +++++++ .../model/ControlledVocabularyModel.java | 12 +++ .../model/SemanticAssetModelFactory.java | 20 +++++ .../BaseSemanticAssetPathProcessor.java | 42 ++++++++++ .../ControlledVocabularyPathProcessor.java | 21 ++++- .../ControlledVocabularyFolderScanner.java | 47 ++++++++++- .../scanners/OntologyFolderScanner.java | 40 ++++++++- .../scanners/SchemaFolderScanner.java | 37 +++++++-- .../harvester/service/RepositoryService.java | 8 ++ .../startupjob/TempEraserStartupJob.java | 5 ++ .../ndc/repository/TripleStoreRepository.java | 26 +++++- .../ndc/service/VocabularyDataService.java | 13 +++ .../ndc/service/logging/HarvesterStage.java | 1 + .../ndc/service/logging/LoggingContext.java | 14 +++- .../service/logging/NDCHarvesterLogger.java | 39 ++++++--- .../logging/NDCHarvesterLoggerUtils.java | 81 +++++++++++++++++++ .../resources/application-local.properties | 1 + src/main/resources/application.properties | 3 +- src/main/resources/logback-spring.xml | 4 +- 30 files changed, 620 insertions(+), 67 deletions(-) create mode 100644 src/main/java/it/gov/innovazione/ndc/service/logging/NDCHarvesterLoggerUtils.java diff --git a/build.gradle b/build.gradle index 2cf318c..21fa04b 100644 --- a/build.gradle +++ b/build.gradle @@ -1,5 +1,5 @@ plugins { - id 'org.springframework.boot' version '3.3.4' + id 'org.springframework.boot' version '3.3.5' id 'io.spring.dependency-management' version '1.1.6' id 'java' id 'checkstyle' diff --git a/src/main/java/it/gov/innovazione/ndc/alerter/AlerterService.java b/src/main/java/it/gov/innovazione/ndc/alerter/AlerterService.java index 136e04d..fadc761 100644 --- a/src/main/java/it/gov/innovazione/ndc/alerter/AlerterService.java +++ b/src/main/java/it/gov/innovazione/ndc/alerter/AlerterService.java @@ -3,6 +3,7 @@ import it.gov.innovazione.ndc.alerter.data.EventService; import it.gov.innovazione.ndc.alerter.dto.EventDto; import it.gov.innovazione.ndc.alerter.event.AlertableEvent; +import it.gov.innovazione.ndc.service.logging.LoggingContext; import lombok.RequiredArgsConstructor; import org.springframework.security.core.context.SecurityContext; import org.springframework.security.core.context.SecurityContextHolder; @@ -12,6 +13,7 @@ import java.time.Instant; import java.util.Optional; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logApplicationInfo; import static org.apache.commons.lang3.ObjectUtils.defaultIfNull; @Service @@ -30,6 +32,12 @@ public void alert(AlertableEvent alertableEvent) { .createdBy(getUser()) .occurredAt(defaultIfNull(alertableEvent.getOccurredAt(), Instant.now())) .build()); + logApplicationInfo(LoggingContext.builder() + .component("alerter") + .message("Alerted event") + .details(alertableEvent.getDescription()) + .eventCategory(alertableEvent.getCategory()) + .build()); } public static String getUser() { diff --git a/src/main/java/it/gov/innovazione/ndc/alerter/controller/AbstractCrudController.java b/src/main/java/it/gov/innovazione/ndc/alerter/controller/AbstractCrudController.java index 91576d4..64c3392 100644 --- a/src/main/java/it/gov/innovazione/ndc/alerter/controller/AbstractCrudController.java +++ b/src/main/java/it/gov/innovazione/ndc/alerter/controller/AbstractCrudController.java @@ -5,6 +5,8 @@ import it.gov.innovazione.ndc.alerter.data.EntityService; import it.gov.innovazione.ndc.alerter.dto.SlimPager; import it.gov.innovazione.ndc.alerter.entities.Nameable; +import it.gov.innovazione.ndc.service.logging.LoggingContext; +import it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger; import jakarta.validation.Valid; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -82,6 +84,14 @@ protected void handlePreCreate(D dto) { protected void handlePostCreate(D createdEntity) { log.info("Created entity: {}", createdEntity); + NDCHarvesterLogger.logApplicationInfo( + LoggingContext.builder() + .component("EntityCreation") + .message("Created entity") + .additionalInfo("entityName", createdEntity.getName()) + .additionalInfo("entityId", createdEntity.getId()) + .additionalInfo("entityType", createdEntity.getClass().getSimpleName()) + .build()); } @PatchMapping @@ -103,6 +113,14 @@ protected void handlePreUpdate(D dto) { protected void handlePostUpdate(D updatedDto) { log.info("Updated entity: {}", updatedDto); + NDCHarvesterLogger.logApplicationInfo( + LoggingContext.builder() + .component("EntityUpdate") + .message("Updated entity") + .additionalInfo("entityName", updatedDto.getName()) + .additionalInfo("entityId", updatedDto.getId()) + .additionalInfo("entityType", updatedDto.getClass().getSimpleName()) + .build()); } @DeleteMapping("{id}") @@ -123,5 +141,13 @@ protected void handlePreDelete(String id) { protected void handlePostDelete(D deletedDto) { log.info("Deleted entity: {}", deletedDto); + NDCHarvesterLogger.logApplicationInfo( + LoggingContext.builder() + .component("EntityDeletion") + .message("Deleted entity") + .additionalInfo("entityName", deletedDto.getName()) + .additionalInfo("entityId", deletedDto.getId()) + .additionalInfo("entityType", deletedDto.getClass().getSimpleName()) + .build()); } } diff --git a/src/main/java/it/gov/innovazione/ndc/alerter/data/ProfileInitializer.java b/src/main/java/it/gov/innovazione/ndc/alerter/data/ProfileInitializer.java index d3e8fc8..41705e6 100644 --- a/src/main/java/it/gov/innovazione/ndc/alerter/data/ProfileInitializer.java +++ b/src/main/java/it/gov/innovazione/ndc/alerter/data/ProfileInitializer.java @@ -31,7 +31,7 @@ public class ProfileInitializer implements Initializer { public void init() { List existingProfileNames = repository.findAll().stream() .map(Profile::getName) - .collect(Collectors.toList()); + .toList(); DEFAULT_PROFILES.stream() .filter(not(p -> existingProfileNames.contains(p.getLeft()))) .map(pair -> Profile.builder() diff --git a/src/main/java/it/gov/innovazione/ndc/config/GitHubConfig.java b/src/main/java/it/gov/innovazione/ndc/config/GitHubConfig.java index d298356..e9698f5 100644 --- a/src/main/java/it/gov/innovazione/ndc/config/GitHubConfig.java +++ b/src/main/java/it/gov/innovazione/ndc/config/GitHubConfig.java @@ -5,6 +5,8 @@ import it.gov.innovazione.ndc.alerter.event.DefaultAlertableEvent; import it.gov.innovazione.ndc.eventhandler.NdcEventPublisher; import it.gov.innovazione.ndc.service.NdcGitHubClient; +import it.gov.innovazione.ndc.service.logging.LoggingContext; +import it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger; import lombok.RequiredArgsConstructor; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; @@ -16,6 +18,8 @@ import java.time.Instant; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logApplicationInfo; + @Slf4j @Configuration @RequiredArgsConstructor @@ -39,11 +43,24 @@ NdcGitHubClient gitHub(@Value("${github.personal-access-token}") String token) { .severity(Severity.WARNING) .build()); + NDCHarvesterLogger.logApplicationWarn(LoggingContext.builder() + .component("GitHubConfig") + .message("GitHubConfig not provided") + .details("GitHubConfig personal access token not provided. The GitHub issuer capability will be disabled") + .eventCategory(EventCategory.APPLICATION) + .build()); + return NdcGitHubClient.builder() .enabled(false) .build(); } log.info("GitHub personal access token provided. The GitHub issuer capability will be enabled"); + logApplicationInfo(LoggingContext.builder() + .component("GitHubConfig") + .message("GitHubConfig provided") + .details("GitHub personal access token provided. The GitHub issuer capability will be enabled") + .eventCategory(EventCategory.APPLICATION) + .build()); return NdcGitHubClient.builder() .gitHub(new GitHubBuilder().withOAuthToken(token).build()) .enabled(true) diff --git a/src/main/java/it/gov/innovazione/ndc/config/SimpleHarvestRepositoryProcessor.java b/src/main/java/it/gov/innovazione/ndc/config/SimpleHarvestRepositoryProcessor.java index 7aea332..7751afc 100644 --- a/src/main/java/it/gov/innovazione/ndc/config/SimpleHarvestRepositoryProcessor.java +++ b/src/main/java/it/gov/innovazione/ndc/config/SimpleHarvestRepositoryProcessor.java @@ -18,6 +18,7 @@ import it.gov.innovazione.ndc.service.InstanceManager; import it.gov.innovazione.ndc.service.logging.HarvesterStage; import it.gov.innovazione.ndc.service.logging.LoggingContext; +import it.gov.innovazione.ndc.service.logging.NDCHarvesterLoggerUtils; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.ThreadUtils; @@ -86,13 +87,24 @@ public void execute(String runId, Repository repository, String correlationId, S .instance(instanceToHarvest) .build()); + NDCHarvesterLoggerUtils.setInitialContext( + LoggingContext.builder() + .jobId(runId) + .repoUrl(repository.getUrl()) + .stage(HarvesterStage.START) + .harvesterStatus(RUNNING) + .component("HARVESTER") + .additionalInfo("revision", revision) + .additionalInfo("correlationId", correlationId) + .additionalInfo("currentUserLogin", currentUserLogin) + .additionalInfo("instance", instanceToHarvest) + .additionalInfo("force", force) + .build()); + publishHarvesterStartedEvent(repository, correlationId, revision, runId, currentUserLogin, instanceToHarvest); logSemanticInfo(LoggingContext.builder() - .stage(HarvesterStage.START) - .harvesterStatus(RUNNING) .message("Starting harvester on repo " + repository.getUrl()) - .additionalInfo("force", force) .build()); synchronized (locks) { @@ -110,11 +122,9 @@ public void execute(String runId, Repository repository, String correlationId, S currentUserLogin); logSemanticError(LoggingContext.builder() - .stage(HarvesterStage.START) .harvesterStatus(ALREADY_RUNNING) .message("Failed harvester on repo " + repository.getUrl()) .details("Harvester on repo " + repository.getUrl() + " is already running") - .additionalInfo("force", force) .build()); setThreadName(runId, repository.getId(), revision, "IDLE"); @@ -148,7 +158,9 @@ public void execute(String runId, Repository repository, String correlationId, S } finally { log.info("Cleaning up after processing {}", repository.getUrl()); harvesterService.clearTempGraphIfExists(repository.getUrl()); + NDCHarvesterLoggerUtils.clearContext(); log.info("Cleaned up after processing {}", repository.getUrl()); + } // cleanup @@ -165,7 +177,6 @@ private void verifyNoNdcIssuesInRepoIfNecessary(Repository repository) { if (hasIssues) { URL issueUrl = ndcIssue.get().getUrl(); logSemanticError(LoggingContext.builder() - .stage(HarvesterStage.START) .harvesterStatus(FAILURE) .repoUrl(repository.getUrl()) .message("Repository has at least one open NDC issues") @@ -187,12 +198,9 @@ private synchronized void verifySameRunWasNotExecuted(Repository repository, Str Optional harvesterRun = harvesterRunService.isHarvestingAlreadyExecuted(repository.getId(), revision); if (harvesterRun.isPresent()) { logSemanticWarn(LoggingContext.builder() - .stage(HarvesterStage.START) .harvesterStatus(HarvesterRun.Status.UNCHANGED) - .repoUrl(repository.getUrl()) .message("Harvesting for repo '" + repository.getUrl() + "' was already executed") .additionalInfo("otherJobId", harvesterRun.get().getId()) - .additionalInfo("revision", revision) .build()); throw new HarvesterAlreadyExecutedException(format("Harvesting for repo '%s' with revision '%s' was already executed and no force param was passed", repository.getUrl(), revision)); @@ -203,10 +211,8 @@ private synchronized void verifyHarvestingIsNotInProgress(String runId, Reposito Optional harvestingInProgress = harvesterRunService.isHarvestingInProgress(runId, repository); if (harvestingInProgress.isPresent()) { logSemanticError(LoggingContext.builder() - .jobId(runId) .message("Harvesting for repo '" + repository.getUrl() + "' is already in progress") .harvesterStatus(ALREADY_RUNNING) - .repoUrl(repository.getUrl()) .additionalInfo("otherJobId", harvestingInProgress.get().getId()) .build()); throw new HarvesterAlreadyInProgressException(format("Harvesting for repo '%s' is already in progress", repository.getUrl())); diff --git a/src/main/java/it/gov/innovazione/ndc/controller/ConfigurationController.java b/src/main/java/it/gov/innovazione/ndc/controller/ConfigurationController.java index 8e1f474..9a58c54 100644 --- a/src/main/java/it/gov/innovazione/ndc/controller/ConfigurationController.java +++ b/src/main/java/it/gov/innovazione/ndc/controller/ConfigurationController.java @@ -1,7 +1,6 @@ package it.gov.innovazione.ndc.controller; import io.swagger.v3.oas.annotations.Operation; -import it.gov.innovazione.ndc.eventhandler.NdcEventPublisher; import it.gov.innovazione.ndc.eventhandler.event.ConfigService; import it.gov.innovazione.ndc.harvester.service.ActualConfigService; import lombok.RequiredArgsConstructor; @@ -30,7 +29,6 @@ public class ConfigurationController { private final ActualConfigService configService; - private final NdcEventPublisher eventPublisher; @GetMapping @Operation( diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/AgencyRepositoryService.java b/src/main/java/it/gov/innovazione/ndc/harvester/AgencyRepositoryService.java index aecf165..2dff9f7 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/AgencyRepositoryService.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/AgencyRepositoryService.java @@ -11,6 +11,8 @@ import it.gov.innovazione.ndc.harvester.util.GitUtils; import it.gov.innovazione.ndc.harvester.util.PropertiesUtils; import it.gov.innovazione.ndc.harvester.util.Version; +import it.gov.innovazione.ndc.service.logging.HarvesterStage; +import it.gov.innovazione.ndc.service.logging.LoggingContext; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Component; @@ -22,8 +24,9 @@ import java.util.List; import java.util.Optional; import java.util.function.Predicate; -import java.util.stream.Collectors; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticInfo; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticWarn; import static java.util.function.Predicate.not; @Component @@ -84,8 +87,21 @@ private

List

findPaths(Path clonedRepo, Semanti if (!fileUtils.folderExists(assetRootPath)) { log.warn("No {} folder found in {}", type.getDescription(), clonedRepo); + logSemanticWarn(LoggingContext.builder() + .stage(HarvesterStage.PATH_SCANNING) + .message("No " + type.getFolderName() + " folder found in " + clonedRepo) + .additionalInfo("path", clonedRepo.toString()) + .build()); + assetRootPath = Path.of(clonedRepo.toString(), type.getLegacyFolderName()); if (!fileUtils.folderExists(assetRootPath)) { + logSemanticWarn(LoggingContext.builder() + .stage(HarvesterStage.PATH_SCANNING) + .message("No " + type.getLegacyFolderName() + " folder found in " + clonedRepo) + .additionalInfo("path", clonedRepo.toString()) + .build()); + log.warn("No {} or {} folder found in {}", type.getDescription(), type.getLegacyFolderName(), clonedRepo); + return List.of(); } } @@ -95,14 +111,23 @@ private

List

findPaths(Path clonedRepo, Semanti private boolean isDirectoryToBeSkipped(Path path) { String directoryName = fileUtils.getLowerCaseFileName(path); - return fileUtils.isDirectory(path) && this.lowerSkipWords.stream().anyMatch(directoryName::contains); + boolean skip = fileUtils.isDirectory(path) && this.lowerSkipWords.stream().anyMatch(directoryName::contains); + if (skip) { + logSemanticWarn(LoggingContext.builder() + .stage(HarvesterStage.PATH_SCANNING) + .message("Skipping directory " + path) + .additionalInfo("directory", path.toString()) + .additionalInfo("skipWords", lowerSkipWords) + .build()); + } + return skip; } @SneakyThrows private

List

createSemanticAssetPaths(Path path, FolderScanner

scanner, boolean ignoreObsoleteVersions) { List dirContents = fileUtils.listContents(path).stream() .filter(c -> !isDirectoryToBeSkipped(c)) - .collect(Collectors.toList()); + .toList(); boolean hasSubDir = dirContents.stream().anyMatch(fileUtils::isDirectory); if (!hasSubDir) { return tryScanDir(path, scanner); @@ -117,7 +142,7 @@ private

List

createSemanticAssetPaths(Path path } } - return dirContents.stream() + List

assets = dirContents.stream() // consider folders for recursion .filter(fileUtils::isDirectory) // only consider folders which are not obsolete @@ -125,13 +150,28 @@ private

List

createSemanticAssetPaths(Path path // recurse and flatten .flatMap(subDir -> createSemanticAssetPaths(subDir, scanner, ignoreObsoleteVersions).stream()) // then collect - .collect(Collectors.toList()); + .toList(); + + logSemanticInfo(LoggingContext.builder() + .stage(HarvesterStage.PATH_SCANNING) + .message("Scanned folder for " + scanner.getClass().getSimpleName()) + .additionalInfo("folder", path.toString()) + .additionalInfo("assets", assets.size()) + .build()); + + return assets; } private

List

tryScanDir(Path dir, FolderScanner

scanner) throws IOException { try { return scanner.scanFolder(dir); } catch (InvalidAssetFolderException e) { + logSemanticWarn(LoggingContext.builder() + .stage(HarvesterStage.PATH_SCANNING) + .message("Invalid folder " + dir + "; skipping") + .details(e.getRealErrorMessage()) + .additionalInfo("folder", dir.toString()) + .build()); log.warn("Invalid folder {}; skipping", dir, e); return Collections.emptyList(); } @@ -148,7 +188,14 @@ private Predicate isObsoleteVersionPredicate(String latestVersionString) { return path -> { String fileName = path.getFileName().toString(); boolean hasValidVersion = Version.of(fileName).isPresent(); - return hasValidVersion && !latestVersionString.equals(fileName); + boolean isObsolete = hasValidVersion && !latestVersionString.equals(fileName); + logSemanticWarn(LoggingContext.builder() + .stage(HarvesterStage.PATH_SCANNING) + .message("Skipping obsolete version " + path) + .additionalInfo("path", path.toString()) + .additionalInfo("latestVersion", latestVersionString) + .build()); + return isObsolete; }; } } diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/HarvesterService.java b/src/main/java/it/gov/innovazione/ndc/harvester/HarvesterService.java index b1b023d..8f6b786 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/HarvesterService.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/HarvesterService.java @@ -11,6 +11,7 @@ import it.gov.innovazione.ndc.repository.SemanticAssetMetadataRepository; import it.gov.innovazione.ndc.repository.TripleStoreRepository; import it.gov.innovazione.ndc.service.logging.LoggingContext; +import it.gov.innovazione.ndc.service.logging.NDCHarvesterLoggerUtils; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Component; @@ -26,8 +27,10 @@ import static it.gov.innovazione.ndc.repository.TripleStoreRepository.ONLINE_GRAPH_PREFIX; import static it.gov.innovazione.ndc.repository.TripleStoreRepository.TMP_GRAPH_PREFIX; +import static it.gov.innovazione.ndc.service.logging.HarvesterStage.CLEANING_METADATA; import static it.gov.innovazione.ndc.service.logging.HarvesterStage.CLONE_REPO; import static it.gov.innovazione.ndc.service.logging.HarvesterStage.MAINTAINER_EXTRACTION; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logInfrastructureError; import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticInfo; import static java.util.stream.Collectors.groupingBy; import static java.util.stream.Collectors.toList; @@ -85,14 +88,13 @@ public void harvest(Repository repository, String revision, Instance instance) t try { Path path = cloneRepoToTempPath(repoUrl, revision); - logSemanticInfo(LoggingContext.builder() + NDCHarvesterLoggerUtils.overrideContext(LoggingContext.builder() .stage(CLONE_REPO) - .harvesterStatus(HarvesterRun.Status.RUNNING) - .repoUrl(repoUrl) + .build()); + + logSemanticInfo(LoggingContext.builder() .message("Repository cloned") .additionalInfo("tempPath", path.toAbsolutePath()) - .additionalInfo("revision", revision) - .additionalInfo("instance", instance) .build()); try { @@ -104,7 +106,7 @@ public void harvest(Repository repository, String revision, Instance instance) t log.info("Cleaned up data for {} completed", path); } log.info("Repo {} processed correctly", repoUrl); - } catch (IOException e) { + } catch (Exception e) { log.error("Exception while processing {}", repoUrl, e); throw e; } @@ -206,9 +208,22 @@ private void cleanUpTripleStore(String repoUrl, String prefix) { } private void cleanUpIndexedMetadata(String repoUrl, Instance instance) { - log.debug("Cleaning up indexed metadata for {}", repoUrl); - long deletedCount = semanticAssetMetadataRepository.deleteByRepoUrl(repoUrl, instance); - log.debug("Deleted {} indexed metadata for {}", deletedCount, repoUrl); + try { + log.debug("Cleaning up indexed metadata for {}", repoUrl); + long deletedCount = semanticAssetMetadataRepository.deleteByRepoUrl(repoUrl, instance); + logSemanticInfo(LoggingContext.builder() + .stage(CLEANING_METADATA) + .message("Deleted " + deletedCount + " indexed metadata") + .additionalInfo("deletedCount", deletedCount) + .build()); + log.debug("Deleted {} indexed metadata for {}", deletedCount, repoUrl); + } catch (Exception e) { + logInfrastructureError(LoggingContext.builder() + .stage(CLEANING_METADATA) + .message("Error while cleaning up indexed metadata") + .build()); + log.error("Error while cleaning up indexed metadata for {}", repoUrl, e); + } } public void cleanTempGraphsForConfiguredRepo() { diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/csv/CsvParser.java b/src/main/java/it/gov/innovazione/ndc/harvester/csv/CsvParser.java index d818799..45d9cd9 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/csv/CsvParser.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/csv/CsvParser.java @@ -1,5 +1,8 @@ package it.gov.innovazione.ndc.harvester.csv; +import it.gov.innovazione.ndc.model.harvester.HarvesterRun; +import it.gov.innovazione.ndc.service.logging.HarvesterStage; +import it.gov.innovazione.ndc.service.logging.LoggingContext; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.RequiredArgsConstructor; @@ -16,6 +19,9 @@ import java.util.Objects; import java.util.stream.Collectors; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticError; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticInfo; +import static java.lang.String.format; import static java.nio.charset.StandardCharsets.UTF_8; @Component @@ -38,13 +44,28 @@ public CsvData loadCsvDataFromFile(String csvFile) { return tryParseCsv(csvReader, csvFile); } catch (IOException e) { - throw new InvalidCsvException(String.format("Cannot parse CSV file '%s'", csvFile), e); + logSemanticError( + LoggingContext.builder() + .message(format("Cannot parse CSV file '%s'", csvFile)) + .stage(HarvesterStage.PROCESS_RESOURCE) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .build()); + throw new InvalidCsvException(format("Cannot parse CSV file '%s'", csvFile), e); } } private CsvData tryParseCsv(FileReader csvReader, String csvFile) throws IOException { try (CSVParser parser = parseReader(csvReader)) { - return buildCsvDataFromParser(parser, csvFile); + CsvData csvData = buildCsvDataFromParser(parser, csvFile); + logSemanticInfo(LoggingContext.builder() + .stage(HarvesterStage.PROCESS_RESOURCE) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .message("Parsed CSV file") + .additionalInfo("csvFile", csvFile) + .additionalInfo("idName", csvData.getIdName()) + .additionalInfo("records", csvData.getRecords().size()) + .build()); + return csvData; } } @@ -72,13 +93,13 @@ private List> readRecords(CSVParser parser) { private String getIdName(CSVParser parser, String csvFile) { List headerNames = parser.getHeaderNames(); if (headerNames.isEmpty()) { - throw new InvalidCsvException(String.format("Cannot find any headers in '%s'", csvFile)); + throw new InvalidCsvException(format("Cannot find any headers in '%s'", csvFile)); } return nameExtractors.stream() .map(e -> e.extract(headerNames)) .filter(Objects::nonNull) .findFirst() - .orElseThrow(() -> new InvalidCsvException(String.format("Cannot find id column in '%s'", csvFile))); + .orElseThrow(() -> new InvalidCsvException(format("Cannot find id column in '%s'", csvFile))); } } \ No newline at end of file diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvester.java b/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvester.java index bb7b851..06addfe 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvester.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvester.java @@ -16,6 +16,8 @@ import it.gov.innovazione.ndc.harvester.model.Instance; import it.gov.innovazione.ndc.harvester.model.SemanticAssetPath; import it.gov.innovazione.ndc.model.harvester.Repository; +import it.gov.innovazione.ndc.service.logging.HarvesterStage; +import it.gov.innovazione.ndc.service.logging.LoggingContext; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -27,8 +29,12 @@ import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.stream.Collectors; import static it.gov.innovazione.ndc.harvester.service.ActualConfigService.ConfigKey.MAX_FILE_SIZE_BYTES; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logInfrastructureError; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticError; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticWarn; @Slf4j @RequiredArgsConstructor @@ -84,6 +90,23 @@ public void harvest(Repository repository, Path rootPath) { "isFatal", e.isFatal())) .build()); log.error("Error processing {} {} in repo {}", type, path, repository.getUrl(), e); + if (isInfrastuctureError) { + logInfrastructureError( + LoggingContext.builder() + .stage(HarvesterStage.PROCESS_RESOURCE) + .message("Infrastructure error processing " + type + " " + path) + .details(e.getRealErrorMessage()) + .additionalInfo("error", e.getRealErrorMessage()) + .build()); + } else { + logSemanticError( + LoggingContext.builder() + .stage(HarvesterStage.PROCESS_RESOURCE) + .message("Error processing " + type + " " + path) + .details(e.getRealErrorMessage()) + .additionalInfo("error", e.getRealErrorMessage()) + .build()); + } if (e.isFatal()) { throw e; } @@ -113,10 +136,24 @@ private void notifyIfSizeExceed(P path, Long maxFileSizeBytes) { List files = path.getAllFiles(); if (Objects.nonNull(maxFileSizeBytes) && maxFileSizeBytes > 0 && isAnyBiggerThan(maxFileSizeBytes, files)) { + files.stream() .filter(file -> file.length() > maxFileSizeBytes) .forEach(file -> log.info("[FILE-SCANNER] -- File(s) {} is bigger than {} ", file.getName(), maxFileSizeBytes)); notify(context, path, maxFileSizeBytes); + + logSemanticWarn(LoggingContext.builder() + .stage(HarvesterStage.PATH_SCANNING) + .message("File is bigger than maxFileSizeBytes") + .additionalInfo("Ttlpath", path.getTtlPath()) + .additionalInfo("files", files.size()) + .additionalInfo("filesBiggerThanMaxSize", + files.stream() + .filter(file -> file.length() > maxFileSizeBytes) + .map(File::toString) + .collect(Collectors.joining(","))) + .additionalInfo("maxFileSizeBytes", maxFileSizeBytes) + .build()); } } } diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/model/BaseSemanticAssetModel.java b/src/main/java/it/gov/innovazione/ndc/harvester/model/BaseSemanticAssetModel.java index 5d92384..9eedbf3 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/model/BaseSemanticAssetModel.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/model/BaseSemanticAssetModel.java @@ -8,7 +8,10 @@ import it.gov.innovazione.ndc.harvester.model.index.NodeSummary; import it.gov.innovazione.ndc.harvester.model.index.RightsHolder; import it.gov.innovazione.ndc.harvester.model.index.SemanticAssetMetadata; +import it.gov.innovazione.ndc.model.harvester.HarvesterRun; import it.gov.innovazione.ndc.model.profiles.Admsapit; +import it.gov.innovazione.ndc.service.logging.HarvesterStage; +import it.gov.innovazione.ndc.service.logging.LoggingContext; import lombok.Getter; import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.Property; @@ -39,6 +42,8 @@ import static it.gov.innovazione.ndc.harvester.model.extractors.NodeSummaryExtractor.extractRequiredNodeSummary; import static it.gov.innovazione.ndc.harvester.model.extractors.NodeSummaryExtractor.maybeNodeSummaries; import static it.gov.innovazione.ndc.harvester.model.extractors.RightsHolderExtractor.getAgencyId; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticError; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticInfo; import static java.lang.String.format; import static java.util.Collections.emptyList; import static org.apache.jena.rdf.model.ResourceFactory.createResource; @@ -98,6 +103,13 @@ public Resource getMainResource() { mainResource = getUniqueResourceByType(getMainResourceTypeIri()); } + logSemanticInfo(LoggingContext.builder() + .harvesterStatus(HarvesterRun.Status.RUNNING) + .repoUrl(repoUrl) + .message(format("Main resource found: %s", mainResource.getURI())) + .stage(HarvesterStage.PROCESS_RESOURCE) + .build()); + return mainResource; } @@ -132,10 +144,28 @@ private void checkFileDeclaresSingleResource(List resources, String ty } if (resources.isEmpty()) { + logSemanticError(LoggingContext.builder() + .harvesterStatus(HarvesterRun.Status.RUNNING) + .repoUrl(repoUrl) + .message(format("No resource of type '%s' in '%s'", typeIri, StringUtils.hasLength(source) ? source : "provided file")) + .stage(HarvesterStage.PROCESS_RESOURCE) + .additionalInfo("source", source) + .additionalInfo("typeIri", typeIri) + .build()); maybeThrowInvalidModelException(validationContext, () -> new InvalidModelException( format("No resource of type '%s' in '%s'", typeIri, StringUtils.hasLength(source) ? source : "provided file")), true); } + logSemanticError(LoggingContext.builder() + .harvesterStatus(HarvesterRun.Status.RUNNING) + .repoUrl(repoUrl) + .message(format("Found %d resources of type '%s' in '%s', expecting only 1", + resources.size(), typeIri, StringUtils.hasLength(source) ? source : "provided file")) + .stage(HarvesterStage.PROCESS_RESOURCE) + .additionalInfo("source", source) + .additionalInfo("typeIri", typeIri) + .additionalInfo("resources", resources.stream().map(Resource::getURI).collect(Collectors.joining(","))) + .build()); maybeThrowInvalidModelException(validationContext, () -> new InvalidModelException( format("Found %d resources of type '%s' in '%s', expecting only 1", diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/model/ControlledVocabularyModel.java b/src/main/java/it/gov/innovazione/ndc/harvester/model/ControlledVocabularyModel.java index 49b0ec4..1af2488 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/model/ControlledVocabularyModel.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/model/ControlledVocabularyModel.java @@ -6,7 +6,10 @@ import it.gov.innovazione.ndc.harvester.model.index.Distribution; import it.gov.innovazione.ndc.harvester.model.index.RightsHolder; import it.gov.innovazione.ndc.harvester.model.index.SemanticAssetMetadata; +import it.gov.innovazione.ndc.model.harvester.HarvesterRun; import it.gov.innovazione.ndc.model.profiles.NDC; +import it.gov.innovazione.ndc.service.logging.HarvesterStage; +import it.gov.innovazione.ndc.service.logging.LoggingContext; import lombok.extern.slf4j.Slf4j; import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.Resource; @@ -20,6 +23,7 @@ import static it.gov.innovazione.ndc.harvester.SemanticAssetType.CONTROLLED_VOCABULARY; import static it.gov.innovazione.ndc.harvester.model.SemanticAssetModelValidationContext.NO_VALIDATION; import static it.gov.innovazione.ndc.model.profiles.EuropePublicationVocabulary.FILE_TYPE_RDF_TURTLE; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticInfo; import static java.lang.String.format; import static org.apache.jena.vocabulary.DCAT.distribution; @@ -102,8 +106,16 @@ public void addNdcDataServiceProperties(String baseUrl) { rdfModel.add(dataServiceNode, NDC.servesDataset, getMainResource()); rdfModel.add(getMainResource(), NDC.hasDataService, dataServiceNode); rdfModel.add(dataServiceNode, NDC.endpointURL, endpointUrl); + logSemanticInfo(LoggingContext.builder() + .message("Added NDC data service properties") + .stage(HarvesterStage.PROCESS_RESOURCE) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .additionalInfo("dataServiceNode", dataServiceNode) + .additionalInfo("endpointUrl", endpointUrl) + .build()); } + private String buildDataServiceIndividualUri() { return format("https://w3id.org/italia/data/data-service/%s-%s", getAgencyId().getIdentifier(), getKeyConcept()); } diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/model/SemanticAssetModelFactory.java b/src/main/java/it/gov/innovazione/ndc/harvester/model/SemanticAssetModelFactory.java index a3aa693..b21d44b 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/model/SemanticAssetModelFactory.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/model/SemanticAssetModelFactory.java @@ -1,13 +1,18 @@ package it.gov.innovazione.ndc.harvester.model; import it.gov.innovazione.ndc.harvester.model.exception.InvalidModelException; +import it.gov.innovazione.ndc.model.harvester.HarvesterRun; import it.gov.innovazione.ndc.service.InstanceManager; +import it.gov.innovazione.ndc.service.logging.HarvesterStage; +import it.gov.innovazione.ndc.service.logging.LoggingContext; import lombok.RequiredArgsConstructor; import org.apache.jena.rdf.model.Model; import org.apache.jena.riot.Lang; import org.apache.jena.riot.RDFDataMgr; import org.springframework.stereotype.Component; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticError; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticInfo; import static java.lang.String.format; @Component @@ -39,8 +44,23 @@ public SchemaModel createSchema(String ttlFile, String repoUrl) { private T loadAndBuild(String source, ModelConstructor c) { try { Model model = RDFDataMgr.loadModel(source, Lang.TURTLE); + logSemanticInfo( + LoggingContext.builder() + .message(format("Loaded RDF model from '%s'", source)) + .additionalInfo("modelSize", model.size()) + .additionalInfo("modelSource", source) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .stage(HarvesterStage.PROCESS_RESOURCE) + .build()); return c.build(model, source); } catch (Exception e) { + logSemanticError( + LoggingContext.builder() + .message(format("Cannot load RDF model from '%s'", source)) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .details(e.getMessage()) + .stage(HarvesterStage.PROCESS_RESOURCE) + .build()); throw new InvalidModelException(format("Cannot load RDF model from '%s'", source), e); } } diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/pathprocessors/BaseSemanticAssetPathProcessor.java b/src/main/java/it/gov/innovazione/ndc/harvester/pathprocessors/BaseSemanticAssetPathProcessor.java index 075faea..c0c4aef 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/pathprocessors/BaseSemanticAssetPathProcessor.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/pathprocessors/BaseSemanticAssetPathProcessor.java @@ -8,8 +8,11 @@ import it.gov.innovazione.ndc.harvester.model.extractors.RightsHolderExtractor; import it.gov.innovazione.ndc.harvester.model.index.NodeSummary; import it.gov.innovazione.ndc.harvester.model.index.SemanticAssetMetadata; +import it.gov.innovazione.ndc.model.harvester.HarvesterRun; import it.gov.innovazione.ndc.repository.SemanticAssetMetadataRepository; import it.gov.innovazione.ndc.repository.TripleStoreRepository; +import it.gov.innovazione.ndc.service.logging.HarvesterStage; +import it.gov.innovazione.ndc.service.logging.LoggingContext; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.apache.jena.rdf.model.Resource; @@ -19,6 +22,10 @@ import java.util.Optional; import static it.gov.innovazione.ndc.harvester.model.SemanticAssetModelValidationContext.NO_VALIDATION; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logInfrastructureError; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticError; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticInfo; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticWarn; import static java.util.stream.Collectors.toList; @RequiredArgsConstructor @@ -48,8 +55,20 @@ private void collectRightsHolderInContext(String repoUrl, M model) { try { HarvestExecutionContext context = HarvestExecutionContextUtils.getContext(); context.addRightsHolder(RightsHolderExtractor.getAgencyId(model.getMainResource(), NO_VALIDATION)); + logSemanticInfo(LoggingContext.builder() + .message("Added rights holder to context") + .stage(HarvesterStage.PROCESS_RESOURCE) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .additionalInfo("rightsHolder", context.getRightsHolders()) + .build()); } catch (Exception e) { log.error("Error adding rights holder to repo " + repoUrl, e); + logSemanticWarn(LoggingContext.builder() + .message("Error adding rights holder to context") + .details(e.getMessage()) + .stage(HarvesterStage.PROCESS_RESOURCE) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .build()); } } @@ -76,8 +95,20 @@ public void process(String repoUrl, P path) { } catch (Exception e) { log.error("Error processing {}", path, e); if (e instanceof SinglePathProcessingException singlePathProcessingException) { + logSemanticError(LoggingContext.builder() + .message("Error processing " + path) + .details(singlePathProcessingException.getRealErrorMessage()) + .stage(HarvesterStage.PROCESS_RESOURCE) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .build()); throw new SinglePathProcessingException(String.format("Cannot process '%s'", path), e, singlePathProcessingException.isFatal()); } + logInfrastructureError(LoggingContext.builder() + .message("Error processing " + path) + .details(e.getMessage()) + .stage(HarvesterStage.PROCESS_RESOURCE) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .build()); throw new SinglePathProcessingException(String.format("Cannot process '%s'", path), e, false); } } @@ -88,6 +119,12 @@ private void indexMetadataForSearch(M model) { postProcessMetadata(metadata); try { metadataRepository.save(metadata); + logSemanticInfo(LoggingContext.builder() + .message("Indexed metadata for " + model.getMainResource()) + .stage(HarvesterStage.PROCESS_RESOURCE) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .additionalInfo("metadata", metadata) + .build()); } catch (Exception e) { log.error("Error saving metadata for {}", model.getMainResource(), e); throw new SinglePathProcessingException("Cannot save metadata", e, true); @@ -109,6 +146,11 @@ private void persistModelToTripleStore(String repoUrl, P path, M model) { log.debug("Storing RDF content for {} in Virtuoso", model.getMainResource()); try { tripleStoreRepository.save(repoUrl, model.getRdfModel()); + logSemanticInfo(LoggingContext.builder() + .message("Saved RDF content for " + model.getMainResource()) + .stage(HarvesterStage.PROCESS_RESOURCE) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .build()); } catch (Exception e) { log.error("Error saving RDF content for {}", model.getMainResource(), e); throw new SinglePathProcessingException("Cannot save RDF content", e, true); diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/pathprocessors/ControlledVocabularyPathProcessor.java b/src/main/java/it/gov/innovazione/ndc/harvester/pathprocessors/ControlledVocabularyPathProcessor.java index be16bed..03f4a60 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/pathprocessors/ControlledVocabularyPathProcessor.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/pathprocessors/ControlledVocabularyPathProcessor.java @@ -21,6 +21,7 @@ import java.util.List; import java.util.stream.Collectors; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticError; import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticInfo; @Component @@ -51,6 +52,16 @@ protected void processWithModel(String repoUrl, CvPath path, ControlledVocabular String agencyId = model.getAgencyId().getIdentifier(); VocabularyIdentifier vocabularyIdentifier = new VocabularyIdentifier(agencyId, keyConcept); + logSemanticError(LoggingContext.builder() + .stage(HarvesterStage.PROCESS_RESOURCE) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .message("Indexing CSV for " + vocabularyIdentifier) + .additionalInfo("csvPath", p) + .additionalInfo("vocabularyIdentifier", vocabularyIdentifier) + .additionalInfo("keyConcept", keyConcept) + .additionalInfo("agencyId", agencyId) + .build()); + parseAndIndexCsv(vocabularyIdentifier, p); }); } @@ -84,12 +95,9 @@ public void dropCsvIndicesForRepo(String repoUrl, Instance instance) { } logSemanticInfo(LoggingContext.builder() - .repoUrl(repoUrl) - .harvesterStatus(HarvesterRun.Status.RUNNING) .stage(HarvesterStage.CLEANING_METADATA) .message("Cleaning " + vocabs.size() + " found vocabularies") .additionalInfo("vocabs", vocabs.stream().map(SemanticAssetMetadata::getIri).collect(Collectors.joining(","))) - .additionalInfo("instance", instance) .build()); vocabs.forEach(v -> { @@ -105,6 +113,13 @@ private void tryToDropIndex(SemanticAssetMetadata v, VocabularyIdentifier vocabI vocabularyDataService.dropIndex(vocabId); log.info("{} dropped", vocabId); } catch (Exception e) { + logSemanticError(LoggingContext.builder() + .stage(HarvesterStage.CLEANING_METADATA) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .message("Could not drop index " + vocabId) + .additionalInfo("vocabId", vocabId) + .additionalInfo("iri", v.getIri()) + .build()); log.error("Could not drop index {}", vocabId, e); } } diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/scanners/ControlledVocabularyFolderScanner.java b/src/main/java/it/gov/innovazione/ndc/harvester/scanners/ControlledVocabularyFolderScanner.java index 850bab0..d4c9d75 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/scanners/ControlledVocabularyFolderScanner.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/scanners/ControlledVocabularyFolderScanner.java @@ -4,6 +4,9 @@ import it.gov.innovazione.ndc.harvester.model.CvPath; import it.gov.innovazione.ndc.harvester.util.FileUtils; import it.gov.innovazione.ndc.harvester.util.PropertiesUtils; +import it.gov.innovazione.ndc.model.harvester.HarvesterRun; +import it.gov.innovazione.ndc.service.logging.HarvesterStage; +import it.gov.innovazione.ndc.service.logging.LoggingContext; import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Component; @@ -15,6 +18,10 @@ import java.util.Optional; import java.util.stream.Collectors; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticError; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticInfo; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticWarn; + @Component @Slf4j public class ControlledVocabularyFolderScanner implements FolderScanner { @@ -35,6 +42,12 @@ public List scanFolder(Path folder) throws IOException { Optional maybeTtl = findAtMostOne(folder, TURTLE_FILE_EXTENSION, "turtle controlled vocabulary"); if (maybeTtl.isEmpty()) { + logSemanticWarn(LoggingContext.builder() + .stage(HarvesterStage.PATH_SCANNING) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .additionalInfo("folder", folder.toString()) + .message("Controlled vocabulary folder does not contain any TTL file") + .build()); log.warn("Controlled vocabulary folder '{}' does not contain any TTL file", folder.toString()); return Collections.emptyList(); } @@ -44,15 +57,41 @@ public List scanFolder(Path folder) throws IOException { Optional maybeCsv = findAtMostOne(folder, ".csv", "flattened controlled vocabulary"); if (maybeCsv.isPresent()) { + logSemanticInfo(LoggingContext.builder() + .stage(HarvesterStage.PATH_SCANNING) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .additionalInfo("folder", folder.toString()) + .additionalInfo("csvPath", maybeCsv.get().toString()) + .additionalInfo("ttlPath", ttlPath) + .message("Found CSV file associated to TTL file") + .build()); return List.of(CvPath.of(ttlPath, maybeCsv.get().toString())); } else { + logSemanticInfo(LoggingContext.builder() + .stage(HarvesterStage.PATH_SCANNING) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .additionalInfo("folder", folder.toString()) + .additionalInfo("ttlPath", ttlPath) + .message("No CSV file associated to TTL file") + .build()); log.info("No CSV file associated to {} in {}", ttlPath, folder); return List.of(CvPath.of(ttlPath, null)); } } private boolean fileNameDoesNotContainSkipWords(Path path) { - return lowerSkipWords.stream().noneMatch(fileUtils.getLowerCaseFileName(path)::contains); + boolean skip = lowerSkipWords.stream().noneMatch(fileUtils.getLowerCaseFileName(path)::contains); + if (!skip) { + logSemanticWarn(LoggingContext.builder() + .stage(HarvesterStage.PATH_SCANNING) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .additionalInfo("path", path.toString()) + .message("Skipping file because it contains skip words") + .additionalInfo("skipWords", lowerSkipWords) + .build()); + log.info("Skipping file '{}' because it contains skip words", path); + } + return skip; } private Optional findAtMostOne(Path parent, String extension, String fileTypeDescription) throws IOException { @@ -63,6 +102,12 @@ private Optional findAtMostOne(Path parent, String extension, String fileT .collect(Collectors.toList()); if (hits.size() > 1) { + logSemanticError(LoggingContext.builder() + .stage(HarvesterStage.PATH_SCANNING) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .additionalInfo("folder", parent.toString()) + .message("Folder contains more than one " + fileTypeDescription) + .build()); log.error("Folder '{}' contains more than one {}", parent.toString(), fileTypeDescription); throw new InvalidAssetFolderException(String.format("Folder '%s' has more than one %s", parent, fileTypeDescription)); diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/scanners/OntologyFolderScanner.java b/src/main/java/it/gov/innovazione/ndc/harvester/scanners/OntologyFolderScanner.java index 1e644e2..47f3f24 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/scanners/OntologyFolderScanner.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/scanners/OntologyFolderScanner.java @@ -3,6 +3,9 @@ import it.gov.innovazione.ndc.harvester.model.SemanticAssetPath; import it.gov.innovazione.ndc.harvester.util.FileUtils; import it.gov.innovazione.ndc.harvester.util.PropertiesUtils; +import it.gov.innovazione.ndc.model.harvester.HarvesterRun; +import it.gov.innovazione.ndc.service.logging.HarvesterStage; +import it.gov.innovazione.ndc.service.logging.LoggingContext; import org.springframework.stereotype.Component; import java.io.IOException; @@ -10,6 +13,8 @@ import java.util.List; import java.util.stream.Collectors; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticInfo; + @Component public class OntologyFolderScanner implements FolderScanner { public static final int MIN_SKIP_WORD_LENGTH = 3; @@ -27,7 +32,7 @@ public OntologyFolderScanner(FileUtils fileUtils, OntologyFolderScannerPropertie @Override public List scanFolder(Path folder) throws IOException { - return fileUtils.listContents(folder) + List assets = fileUtils.listContents(folder) .stream() // only accept ttls .filter(this::isTurtleFilePath) @@ -37,13 +42,42 @@ public List scanFolder(Path folder) throws IOException { .map(path -> SemanticAssetPath.of(path.toString())) // collect to a list .collect(Collectors.toList()); + + logSemanticInfo(LoggingContext.builder() + .stage(HarvesterStage.PATH_SCANNING) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .message("Scanned folder for ontology files") + .additionalInfo("folder", folder.toString()) + .additionalInfo("assets", assets.size()) + .build()); + + return assets; + } private boolean isTurtleFilePath(Path path) { - return fileUtils.getLowerCaseFileName(path).endsWith(TURTLE_FILE_EXTENSION); + boolean isTurtle = fileUtils.getLowerCaseFileName(path).endsWith(TURTLE_FILE_EXTENSION); + if (!isTurtle) { + logSemanticInfo(LoggingContext.builder() + .stage(HarvesterStage.PATH_SCANNING) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .additionalInfo("file", path.toString()) + .message("Skipping file due to extension - it's not a turtle file") + .build()); + } + return isTurtle; } private boolean fileNameDoesNotContainSkipWords(Path path) { - return lowerSkipWords.stream().noneMatch(fileUtils.getLowerCaseFileName(path)::contains); + boolean skip = lowerSkipWords.stream().noneMatch(fileUtils.getLowerCaseFileName(path)::contains); + if (!skip) { + logSemanticInfo(LoggingContext.builder() + .stage(HarvesterStage.PATH_SCANNING) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .additionalInfo("file", path.toString()) + .message("Skipping file due to skip words") + .build()); + } + return skip; } } diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/scanners/SchemaFolderScanner.java b/src/main/java/it/gov/innovazione/ndc/harvester/scanners/SchemaFolderScanner.java index 42ae869..c91281b 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/scanners/SchemaFolderScanner.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/scanners/SchemaFolderScanner.java @@ -2,6 +2,9 @@ import it.gov.innovazione.ndc.harvester.model.SemanticAssetPath; import it.gov.innovazione.ndc.harvester.util.FileUtils; +import it.gov.innovazione.ndc.model.harvester.HarvesterRun; +import it.gov.innovazione.ndc.service.logging.HarvesterStage; +import it.gov.innovazione.ndc.service.logging.LoggingContext; import lombok.RequiredArgsConstructor; import org.springframework.stereotype.Component; @@ -9,18 +12,42 @@ import java.nio.file.Path; import java.util.List; import java.util.Locale; -import java.util.stream.Collectors; + +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticInfo; @Component @RequiredArgsConstructor public class SchemaFolderScanner implements FolderScanner { private final FileUtils fileUtils; + private static boolean isIndex(Path path) { + boolean isIndex = path.toString().toLowerCase(Locale.ROOT).endsWith("/index.ttl"); + if (!isIndex) { + logSemanticInfo(LoggingContext.builder() + .stage(HarvesterStage.PATH_SCANNING) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .additionalInfo("path", path.toString()) + .message("Skipping file not named 'index.ttl'") + .build()); + } + return isIndex; + } + @Override public List scanFolder(Path folder) throws IOException { - return fileUtils.listContents(folder).stream() - .filter(path -> path.toString().toLowerCase(Locale.ROOT).endsWith("/index.ttl")) - .map(path -> SemanticAssetPath.of(path.toString())) - .collect(Collectors.toList()); + List schemas = fileUtils.listContents(folder).stream() + .filter(SchemaFolderScanner::isIndex) + .map(path -> SemanticAssetPath.of(path.toString())) + .toList(); + + logSemanticInfo(LoggingContext.builder() + .stage(HarvesterStage.PATH_SCANNING) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .message("Scanned folder for schema files") + .additionalInfo("folder", folder.toString()) + .additionalInfo("schemas", schemas.size()) + .build()); + + return schemas; } } diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/service/RepositoryService.java b/src/main/java/it/gov/innovazione/ndc/harvester/service/RepositoryService.java index ac89a39..f9b2226 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/service/RepositoryService.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/service/RepositoryService.java @@ -4,6 +4,8 @@ import it.gov.innovazione.ndc.controller.RepositoryController; import it.gov.innovazione.ndc.harvester.model.index.RightsHolder; import it.gov.innovazione.ndc.model.harvester.Repository; +import it.gov.innovazione.ndc.service.logging.HarvesterStage; +import it.gov.innovazione.ndc.service.logging.LoggingContext; import lombok.RequiredArgsConstructor; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; @@ -22,6 +24,7 @@ import java.util.Optional; import java.util.stream.Collectors; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticInfo; import static java.util.Collections.emptyList; import static java.util.Collections.emptySet; import static java.util.stream.Collectors.groupingBy; @@ -281,6 +284,11 @@ public void storeRightsHolders(Repository repository, Map getRightsHolders() { diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/service/startupjob/TempEraserStartupJob.java b/src/main/java/it/gov/innovazione/ndc/harvester/service/startupjob/TempEraserStartupJob.java index acec58c..ed8d3a5 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/service/startupjob/TempEraserStartupJob.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/service/startupjob/TempEraserStartupJob.java @@ -1,6 +1,8 @@ package it.gov.innovazione.ndc.harvester.service.startupjob; import it.gov.innovazione.ndc.harvester.HarvesterService; +import it.gov.innovazione.ndc.service.logging.LoggingContext; +import it.gov.innovazione.ndc.service.logging.NDCHarvesterLoggerUtils; import lombok.RequiredArgsConstructor; import org.springframework.stereotype.Service; @@ -12,6 +14,9 @@ public class TempEraserStartupJob implements StartupJob { @Override public void run() { + NDCHarvesterLoggerUtils.setInitialContext(LoggingContext.builder() + .component("TempEraserStartupJob") + .build()); harvesterService.cleanTempGraphsForConfiguredRepo(); } diff --git a/src/main/java/it/gov/innovazione/ndc/repository/TripleStoreRepository.java b/src/main/java/it/gov/innovazione/ndc/repository/TripleStoreRepository.java index 5a36aca..cf8bbd9 100644 --- a/src/main/java/it/gov/innovazione/ndc/repository/TripleStoreRepository.java +++ b/src/main/java/it/gov/innovazione/ndc/repository/TripleStoreRepository.java @@ -1,6 +1,8 @@ package it.gov.innovazione.ndc.repository; import com.apicatalog.jsonld.StringUtils; +import it.gov.innovazione.ndc.service.logging.HarvesterStage; +import it.gov.innovazione.ndc.service.logging.LoggingContext; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; import org.apache.jena.arq.querybuilder.SelectBuilder; @@ -14,6 +16,9 @@ import java.net.URL; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logInfrastructureError; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticError; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logSemanticInfo; import static java.lang.String.format; @Slf4j @@ -74,6 +79,13 @@ public void clearExistingNamedGraph(String repoUrl) { public void clearExistingNamedGraph(String repoUrl, String prefix) { try { log.info("Clearing existing named graph for {} with prefix {}", repoUrl, prefix); + + logSemanticInfo(LoggingContext.builder() + .stage(HarvesterStage.CLEANING_VIRTUOSO) + .message("Clearing existing named graph for repo " + repoUrl) + .additionalInfo("prefix", prefix) + .build()); + String sparqlEndpoint = virtuosoClient.getSparqlEndpoint(); UpdateExecution .service(sparqlEndpoint) @@ -81,10 +93,22 @@ public void clearExistingNamedGraph(String repoUrl, String prefix) { .execute(); } catch (Exception e) { log.error(format("Could not clear existing named graph! - %s", repoUrl), e); - if (e instanceof HttpException) { + boolean isHttpException = e instanceof HttpException; + if (isHttpException) { HttpException httpException = (HttpException) e; log.error("HttpException: {}", httpException.getResponse()); } + LoggingContext loggingContext = LoggingContext.builder() + .stage(HarvesterStage.CLEANING_VIRTUOSO) + .message("Could not clear existing named graph for repo " + repoUrl) + .details(e.getMessage()) + .additionalInfo("prefix", prefix) + .build(); + if (isHttpException) { + logInfrastructureError(loggingContext); + } else { + logSemanticError(loggingContext); + } throw new TripleStoreRepositoryException(format("Could not delete graph - '%s'", repoUrl), e); } } diff --git a/src/main/java/it/gov/innovazione/ndc/service/VocabularyDataService.java b/src/main/java/it/gov/innovazione/ndc/service/VocabularyDataService.java index 0b1d6b4..ca5153a 100644 --- a/src/main/java/it/gov/innovazione/ndc/service/VocabularyDataService.java +++ b/src/main/java/it/gov/innovazione/ndc/service/VocabularyDataService.java @@ -5,6 +5,9 @@ import it.gov.innovazione.ndc.gen.dto.VocabularyData; import it.gov.innovazione.ndc.harvester.csv.CsvParser; import it.gov.innovazione.ndc.model.Builders; +import it.gov.innovazione.ndc.model.harvester.HarvesterRun; +import it.gov.innovazione.ndc.service.logging.HarvesterStage; +import it.gov.innovazione.ndc.service.logging.LoggingContext; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; @@ -23,6 +26,8 @@ import java.util.Map; import java.util.stream.Collectors; +import static it.gov.innovazione.ndc.service.logging.NDCHarvesterLogger.logInfrastructureInfo; + @Service @Slf4j public class VocabularyDataService { @@ -73,6 +78,14 @@ public void indexData(VocabularyIdentifier vocabularyIdentifier, .collect(Collectors.toList()); elasticsearchOperations.bulkIndex(indexQueries, IndexCoordinates.of(indexName)); + + logInfrastructureInfo(LoggingContext.builder() + .stage(HarvesterStage.PROCESS_RESOURCE) + .harvesterStatus(HarvesterRun.Status.RUNNING) + .message("Indexed CSV for " + vocabularyIdentifier) + .additionalInfo("vocabularyIdentifier", vocabularyIdentifier) + .additionalInfo("records", indexQueries.size()) + .build()); } private IndexQuery buildIndexQuery(String idName, Map record) { diff --git a/src/main/java/it/gov/innovazione/ndc/service/logging/HarvesterStage.java b/src/main/java/it/gov/innovazione/ndc/service/logging/HarvesterStage.java index c73940c..990a2a8 100644 --- a/src/main/java/it/gov/innovazione/ndc/service/logging/HarvesterStage.java +++ b/src/main/java/it/gov/innovazione/ndc/service/logging/HarvesterStage.java @@ -6,5 +6,6 @@ public enum HarvesterStage { MAINTAINER_EXTRACTION, CLEANING_VIRTUOSO, CLEANING_METADATA, + PATH_SCANNING, PROCESS_RESOURCE } diff --git a/src/main/java/it/gov/innovazione/ndc/service/logging/LoggingContext.java b/src/main/java/it/gov/innovazione/ndc/service/logging/LoggingContext.java index 7e6e4b1..393cc5a 100644 --- a/src/main/java/it/gov/innovazione/ndc/service/logging/LoggingContext.java +++ b/src/main/java/it/gov/innovazione/ndc/service/logging/LoggingContext.java @@ -12,6 +12,7 @@ import java.util.Map; import java.util.Objects; +import java.util.Optional; import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -32,7 +33,7 @@ public class LoggingContext { @Builder.Default private final LogLevel level = LogLevel.INFO; @Builder.Default - private final String component = "HARVESTER"; + private final String component; private final String jobId; private final String repoUrl; private final String path; @@ -58,9 +59,10 @@ String makeLogEntry() { .collect(Collectors.joining(" ")); String logHeaders = - Stream.of(component, - stage.name(), - jobId) + Stream.of("NDC_HARVESTER_LOGGER", + Optional.ofNullable(component).orElse("NO_COMPONENT"), + Optional.ofNullable(stage).map(HarvesterStage::name).orElse("NO_STAGE"), + Optional.ofNullable(jobId).orElse("NO_JOB_ID")) .map(s -> Objects.isNull(s) ? "" : s) .map(String::toUpperCase) .map(String::trim) @@ -78,6 +80,10 @@ public LoggingContext infrastructure() { return this.withEventCategory(EventCategory.INFRASTRUCTURE); } + public LoggingContext application() { + return this.withEventCategory(EventCategory.APPLICATION); + } + public LoggingContext warn() { return this.withLevel(LogLevel.WARN); } diff --git a/src/main/java/it/gov/innovazione/ndc/service/logging/NDCHarvesterLogger.java b/src/main/java/it/gov/innovazione/ndc/service/logging/NDCHarvesterLogger.java index 88aba78..b500bc6 100644 --- a/src/main/java/it/gov/innovazione/ndc/service/logging/NDCHarvesterLogger.java +++ b/src/main/java/it/gov/innovazione/ndc/service/logging/NDCHarvesterLogger.java @@ -1,7 +1,5 @@ package it.gov.innovazione.ndc.service.logging; -import it.gov.innovazione.ndc.harvester.context.HarvestExecutionContext; -import it.gov.innovazione.ndc.harvester.context.HarvestExecutionContextUtils; import lombok.extern.slf4j.Slf4j; import org.springframework.boot.logging.LogLevel; @@ -20,34 +18,41 @@ public class NDCHarvesterLogger { ); private static void log(LoggingContext loggingContext) { - HarvestExecutionContext context = HarvestExecutionContextUtils.getContext(); + LoggingContext context = NDCHarvesterLoggerUtils.getContext(); if (context != null) { - loggingContext = loggingContext.toBuilder() - .additionalInfo("revision", context.getRevision()) - .additionalInfo("correlationId", context.getCorrelationId()) - .additionalInfo("currentUserId", context.getCurrentUserId()) - .additionalInfo("rootPath", context.getRootPath()) - .additionalInfo("instance", context.getInstance()) - .build() - .withJobId(context.getRunId()) - .withRepoUrl(context.getRepository().getUrl()); + loggingContext = NDCHarvesterLoggerUtils.mergeContexts(context, loggingContext); } try { Consumer logMethod = mapper.get(loggingContext.getLevel()); - logMethod.accept(loggingContext.makeLogEntry()); + String message = loggingContext.makeLogEntry(); + logIfNecessary(logMethod, message); } catch (Exception e) { log.error("There was an exception while logging", e); } } + private static void logIfNecessary(Consumer logMethod, String message) { + if (NDCHarvesterLoggerUtils.notSeen(message)) { + logMethod.accept(message); + } + } + public static void logSemanticInfo(LoggingContext loggingContext) { log(loggingContext.semantic().withLevel(LogLevel.INFO)); } + public static void logInfrastructureInfo(LoggingContext loggingContext) { + log(loggingContext.infrastructure().withLevel(LogLevel.INFO)); + } + public static void logSemanticError(LoggingContext loggingContext) { log(loggingContext.error().semantic()); } + public static void logInfrastructureError(LoggingContext loggingContext) { + log(loggingContext.error().infrastructure()); + } + public static void logSemanticWarn(LoggingContext loggingContext) { log(loggingContext.warn().semantic()); } @@ -56,4 +61,12 @@ public static void logSemanticTrace(LoggingContext loggingContext) { log(loggingContext.trace().semantic()); } + public static void logApplicationInfo(LoggingContext loggingContext) { + log(loggingContext.withLevel(LogLevel.INFO).application()); + + } + + public static void logApplicationWarn(LoggingContext build) { + log(build.withLevel(LogLevel.WARN).application()); + } } diff --git a/src/main/java/it/gov/innovazione/ndc/service/logging/NDCHarvesterLoggerUtils.java b/src/main/java/it/gov/innovazione/ndc/service/logging/NDCHarvesterLoggerUtils.java new file mode 100644 index 0000000..68a9de5 --- /dev/null +++ b/src/main/java/it/gov/innovazione/ndc/service/logging/NDCHarvesterLoggerUtils.java @@ -0,0 +1,81 @@ +package it.gov.innovazione.ndc.service.logging; + +import lombok.NoArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import java.util.HashSet; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.function.BiFunction; +import java.util.function.Function; + +@NoArgsConstructor(access = lombok.AccessLevel.PRIVATE) +@Slf4j +public class NDCHarvesterLoggerUtils { + + private static final ThreadLocal CONTEXT_HOLDER = new ThreadLocal<>(); + private static final ThreadLocal> SEEN_MESSAGES = ThreadLocal.withInitial(HashSet::new); + + public static LoggingContext getContext() { + return CONTEXT_HOLDER.get(); + } + + public static void setInitialContext(LoggingContext context) { + CONTEXT_HOLDER.set(context); + } + + public static void overrideContext(LoggingContext context) { + CONTEXT_HOLDER.set(NDCHarvesterLoggerUtils.mergeContexts( + CONTEXT_HOLDER.get(), + context)); + } + + public static void clearContext() { + SEEN_MESSAGES.get().clear(); + CONTEXT_HOLDER.remove(); + log.info("Context cleared"); + } + + public static boolean notSeen(String message) { + return SEEN_MESSAGES.get().add(message); + } + + public static LoggingContext mergeContexts(LoggingContext context, LoggingContext additionalContext) { + LoggingContext.LoggingContextBuilder builder = Optional.ofNullable(context) + .map(LoggingContext::toBuilder) + .orElseGet(LoggingContext::builder); + if (additionalContext != null) { + builder = addToBuilder(additionalContext, builder, LoggingContext::getLevel, LoggingContext.LoggingContextBuilder::level); + builder = addToBuilder(additionalContext, builder, LoggingContext::getComponent, LoggingContext.LoggingContextBuilder::component); + builder = addToBuilder(additionalContext, builder, LoggingContext::getJobId, LoggingContext.LoggingContextBuilder::jobId); + builder = addToBuilder(additionalContext, builder, LoggingContext::getRepoUrl, LoggingContext.LoggingContextBuilder::repoUrl); + builder = addToBuilder(additionalContext, builder, LoggingContext::getPath, LoggingContext.LoggingContextBuilder::path); + builder = addToBuilder(additionalContext, builder, LoggingContext::getMainResource, LoggingContext.LoggingContextBuilder::mainResource); + builder = addToBuilder(additionalContext, builder, LoggingContext::getMessage, LoggingContext.LoggingContextBuilder::message); + builder = addToBuilder(additionalContext, builder, LoggingContext::getDetails, LoggingContext.LoggingContextBuilder::details); + builder = addToBuilder(additionalContext, builder, LoggingContext::getHarvesterStatus, LoggingContext.LoggingContextBuilder::harvesterStatus); + builder = addToBuilder(additionalContext, builder, LoggingContext::getStage, LoggingContext.LoggingContextBuilder::stage); + builder = addToBuilder(additionalContext, builder, LoggingContext::getEventCategory, LoggingContext.LoggingContextBuilder::eventCategory); + for (Map.Entry entry : additionalContext.getAdditionalInfos().entrySet()) { + if (entry.getValue() != null) { + builder = builder.additionalInfo(entry.getKey(), entry.getValue()); + } + } + } + return builder.build(); + } + + private static LoggingContext.LoggingContextBuilder addToBuilder( + LoggingContext additionalContext, + LoggingContext.LoggingContextBuilder builder, + Function getter, + BiFunction setter) { + return Optional.of(additionalContext) + .map(getter) + .map(value -> setter.apply(builder, value)) + .orElse(builder); + } + + +} diff --git a/src/main/resources/application-local.properties b/src/main/resources/application-local.properties index e14d0ac..d838ca8 100644 --- a/src/main/resources/application-local.properties +++ b/src/main/resources/application-local.properties @@ -20,3 +20,4 @@ logging.level.it.gov.innovazione.ndc=debug spring.datasource.url = jdbc:mysql://localhost:3306/dev_ndc_harvest?createDatabaseIfNotExist=true&allowPublicKeyRetrieval=true&useSSL=false spring.datasource.username = root spring.datasource.password = example +springdoc.swagger-ui.url=/v3/api-docs diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 6b1aec8..cbb858c 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -25,8 +25,9 @@ harvester.controlled-vocabulary.scanner.skip-words=transparency-obligation-organ harvester.auth.user=${HARVESTER_USER:harv-user} harvester.auth.password=${HARVESTER_PASSWORD:harv-password} + springdoc.api-docs.enabled=true -#springdoc.swagger-ui.url=/openapi.yaml +springdoc.swagger-ui.url=/api/v3/api-docs #Disable restrictions on multipart requests to validate semantic assets files spring.servlet.multipart.max-file-size=-1 spring.servlet.multipart.max-request-size=-1 diff --git a/src/main/resources/logback-spring.xml b/src/main/resources/logback-spring.xml index f4fc42f..93e099c 100644 --- a/src/main/resources/logback-spring.xml +++ b/src/main/resources/logback-spring.xml @@ -16,9 +16,9 @@ - + From 0295940b6779454c8e202b81819bb2faa2a0dbb8 Mon Sep 17 00:00:00 2001 From: ndc-dxc Date: Wed, 13 Nov 2024 18:22:49 +0100 Subject: [PATCH 5/5] fixing lombok's warning --- .../it/gov/innovazione/ndc/service/logging/LoggingContext.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/it/gov/innovazione/ndc/service/logging/LoggingContext.java b/src/main/java/it/gov/innovazione/ndc/service/logging/LoggingContext.java index 393cc5a..01449da 100644 --- a/src/main/java/it/gov/innovazione/ndc/service/logging/LoggingContext.java +++ b/src/main/java/it/gov/innovazione/ndc/service/logging/LoggingContext.java @@ -32,7 +32,6 @@ public class LoggingContext { 8, Pair.of("EventCategory", LoggingContext::getEventCategory)); @Builder.Default private final LogLevel level = LogLevel.INFO; - @Builder.Default private final String component; private final String jobId; private final String repoUrl;