diff --git a/.github/actions/build-connector-image/action.yml b/.github/actions/build-connector-image/action.yml new file mode 100644 index 000000000..41638a2a7 --- /dev/null +++ b/.github/actions/build-connector-image/action.yml @@ -0,0 +1,75 @@ +name: "Build EDC Connector Image" +description: "Builds and deploys the React frontend to AWS S3" +inputs: + registry-url: + required: true + description: "Docker Registry" + registry-user: + required: true + description: "Docker Registry Login Username" + registry-password: + required: true + description: "Docker Registry Login Password" + image-base-name: + required: true + description: "Docker Image Base Name (Company)" + image-name: + required: true + description: "Docker Image Name (Artifact Name)" + connector-name: + required: true + description: "EDC Connector Name in launchers/connectors/{connector-name}" + title: + required: true + description: "Docker Image Title" + description: + required: true + description: "Docker Image Description" +runs: + using: "composite" + steps: + - name: "Docker: Log in to the Container registry" + uses: docker/login-action@v2 + with: + registry: ${{ inputs.registry-url }} + username: ${{ inputs.registry-user }} + password: ${{ inputs.registry-password }} + - name: "Docker: Store last commit info and build date" + id: last-commit-information + shell: bash + run: | + echo "LAST_COMMIT_INFO<> $GITHUB_ENV + export LAST_COMMIT_INFO=$(git log -1) + echo "$LAST_COMMIT_INFO" >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + echo "BUILD_DATE=$(date --utc +%FT%TZ)" >> $GITHUB_ENV + - name: "Docker: Extract metadata (tags, labels)" + id: meta + uses: docker/metadata-action@v4 + with: + images: ${{ inputs.registry-url }}/${{ inputs.image-base-name }}/${{ inputs.image-name }} + labels: | + org.opencontainers.image.title=${{ inputs.title }} + org.opencontainers.image.description=${{ inputs.description }} + tags: | + type=schedule + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=semver,pattern={{major}} + type=ref,event=branch + type=ref,event=pr + type=sha + type=raw,value=latest,enable={{is_default_branch}} + type=raw,value=release,enable=${{ startsWith(github.ref, 'refs/tags/') }} + - name: "Docker: Build and Push" + uses: docker/build-push-action@v5 + with: + file: authority-portal-backend/catalog-crawler/Dockerfile + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + build-args: | + CONNECTOR_NAME=${{ inputs.connector-name }} + "EDC_LAST_COMMIT_INFO_ARG=${{ env.LAST_COMMIT_INFO }}" + EDC_BUILD_DATE_ARG=${{ env.BUILD_DATE }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3995e7a47..e42af9d7c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -92,7 +92,7 @@ jobs: type=sha type=raw,value=latest,enable={{is_default_branch}} type=raw,value=release,enable=${{ startsWith(github.ref, 'refs/tags/') }} - - name: "Docker: Build and Push Image" + - name: "Docker: Build and Push Image (authority-portal-backend)" uses: docker/build-push-action@v4 with: file: authority-portal-backend/authority-portal-quarkus/src/main/docker/Dockerfile.jvm @@ -101,7 +101,17 @@ jobs: tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} network: host - + - name: "Docker: Build and Push Image (authority-portal-crawler)" + uses: ./.github/actions/build-connector-image + with: + registry-url: ${{ env.REGISTRY }} + registry-user: ${{ github.actor }} + registry-password: ${{ secrets.GITHUB_TOKEN }} + image-base-name: ${{ env.IMAGE_NAME_BASE }} + image-name: "authority-portal-crawler" + connector-name: "catalog-crawler-ce" + title: "Catalog Crawler (Community Edition, DAPS)" + description: "sovity CE Catalog crawler for the sovity CE Authority Portal. Requires DAPS dataspace credentials to join an existing dataspace." frontend: name: Frontend diff --git a/CHANGELOG.md b/CHANGELOG.md index 048158a9c..072691448 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ please see [changelog_updates.md](docs/dev/changelog_updates.md). #### Major +- The Catalog Crawler has been moved to the AP repository. + #### Minor #### Patch @@ -25,11 +27,15 @@ please see [changelog_updates.md](docs/dev/changelog_updates.md). ### Deployment Migration Notes +- The Crawler image name and version changed due to the crawler being moved into the AP repository and versions being aligned + - Previously: `ghcr.io/sovity/catalog-crawler-ce` + - Now: `ghcr.io/sovity/authority-portal-crawler` + #### Compatible Versions - Authority Portal Backend Docker Image: `ghcr.io/sovity/authority-portal-backend:{{ version }}` - Authority Portal Frontend Docker Image: `ghcr.io/sovity/authority-portal-frontend:{{ version }}` -- Catalog Crawler CE: `ghcr.io/sovity/catalog-crawler-ce:{{ CE VERSION }}` +- Catalog Crawler CE: `ghcr.io/sovity/authority-portal-crawler:{{ version }}` - Sovity EDC CE: {{ CE Release Link }} ## [v4.1.2] - 2024-09-26 diff --git a/authority-portal-backend/authority-portal-api/src/main/java/de/sovity/authorityportal/api/UiResource.kt b/authority-portal-backend/authority-portal-api/src/main/java/de/sovity/authorityportal/api/UiResource.kt index b3e7ee32d..0745d5fee 100644 --- a/authority-portal-backend/authority-portal-api/src/main/java/de/sovity/authorityportal/api/UiResource.kt +++ b/authority-portal-backend/authority-portal-api/src/main/java/de/sovity/authorityportal/api/UiResource.kt @@ -17,11 +17,11 @@ import de.sovity.authorityportal.api.model.CentralComponentCreateRequest import de.sovity.authorityportal.api.model.CentralComponentDto import de.sovity.authorityportal.api.model.ComponentStatusOverview import de.sovity.authorityportal.api.model.ConfigureProvidedConnectorWithCertificateRequest +import de.sovity.authorityportal.api.model.ConfigureProvidedConnectorWithJwksRequest import de.sovity.authorityportal.api.model.ConnectorDetailsDto import de.sovity.authorityportal.api.model.ConnectorOverviewResult import de.sovity.authorityportal.api.model.CreateCaasRequest import de.sovity.authorityportal.api.model.CreateConnectorRequest -import de.sovity.authorityportal.api.model.ConfigureProvidedConnectorWithJwksRequest import de.sovity.authorityportal.api.model.CreateConnectorResponse import de.sovity.authorityportal.api.model.DeploymentEnvironmentDto import de.sovity.authorityportal.api.model.IdResponse diff --git a/authority-portal-backend/authority-portal-quarkus/src/main/kotlin/de/sovity/authorityportal/web/thirdparty/daps/DapsClient.kt b/authority-portal-backend/authority-portal-quarkus/src/main/kotlin/de/sovity/authorityportal/web/thirdparty/daps/DapsClient.kt index c26b2e6f3..4e32c6ac0 100644 --- a/authority-portal-backend/authority-portal-quarkus/src/main/kotlin/de/sovity/authorityportal/web/thirdparty/daps/DapsClient.kt +++ b/authority-portal-backend/authority-portal-quarkus/src/main/kotlin/de/sovity/authorityportal/web/thirdparty/daps/DapsClient.kt @@ -16,6 +16,7 @@ package de.sovity.authorityportal.web.thirdparty.daps import de.sovity.authorityportal.web.environment.DeploymentEnvironmentConfiguration.DeploymentEnvironment.DapsConfig import de.sovity.authorityportal.web.thirdparty.daps.ext.CustomKeycloakResource import de.sovity.authorityportal.web.thirdparty.daps.ext.instantiateResource +import io.quarkus.logging.Log import org.keycloak.admin.client.KeycloakBuilder import org.keycloak.representations.idm.ClientRepresentation import org.keycloak.representations.idm.ProtocolMapperRepresentation @@ -43,7 +44,9 @@ class DapsClient(dapsConfig: DapsConfig): AutoCloseable { } fun createClient(clientId: String) { + Log.info("Creating client $clientId in realm $realmName") keycloak.realm(realmName).clients().create(buildClientRepresentation(clientId)) + Log.info("Client $clientId created in realm $realmName") } fun deleteClient(clientId: String) { @@ -66,6 +69,7 @@ class DapsClient(dapsConfig: DapsConfig): AutoCloseable { } fun addJwksUrl(clientId: String, jwksUrl: String) { + Log.info("Getting client $clientId in realm $realmName") val client = getClientById(clientId) ?: error("Client not found") client.attributes["jwks.url"] = jwksUrl diff --git a/authority-portal-backend/authority-portal-quarkus/src/main/resources/application.properties b/authority-portal-backend/authority-portal-quarkus/src/main/resources/application.properties index 470688e82..96ad48184 100644 --- a/authority-portal-backend/authority-portal-quarkus/src/main/resources/application.properties +++ b/authority-portal-backend/authority-portal-quarkus/src/main/resources/application.properties @@ -78,6 +78,8 @@ quarkus.arc.exclude-types=io.swagger.v3.jaxrs2.** %test.quarkus.log.console.json=false %test.quarkus.otel.traces.exporter=none +quarkus.otel.sdk.disabled=true + # Rest Client quarkus.rest-client.http2=true diff --git a/authority-portal-backend/build.gradle.kts b/authority-portal-backend/build.gradle.kts index 8b5b8158f..91a03e847 100644 --- a/authority-portal-backend/build.gradle.kts +++ b/authority-portal-backend/build.gradle.kts @@ -23,6 +23,10 @@ subprojects { password = project.findProperty("gpr.key") as String? ?: System.getenv("GPR_KEY") } } + maven { + url = uri("https://pkgs.dev.azure.com/sovity/41799556-91c8-4df6-8ddb-4471d6f15953/_packaging/core-edc/maven/v1") + name = "AzureRepo" + } } configurations.all { diff --git a/authority-portal-backend/catalog-crawler/Dockerfile b/authority-portal-backend/catalog-crawler/Dockerfile new file mode 100644 index 000000000..f03775cad --- /dev/null +++ b/authority-portal-backend/catalog-crawler/Dockerfile @@ -0,0 +1,34 @@ +FROM eclipse-temurin:17-jre-alpine + +# Install curl for healthcheck, bash for entrypoint +RUN apk add --no-cache curl bash +SHELL ["/bin/bash", "-c"] + +# Use a non-root user +RUN adduser -D -H -s /sbin/nologin edc +USER edc:edc + +# Which app.jar to include +ARG CONNECTOR_NAME="catalog-crawler-ce" + +# For last-commit-info extension +ARG EDC_LAST_COMMIT_INFO_ARG="The docker container was built outside of github actions and you didn't provide the build arg EDC_LAST_COMMIT_INFO_ARG, so there's no last commit info." +ARG EDC_BUILD_DATE_ARG="The docker container was built outside of github actions and you didn't provide the build arg EDC_BUILD_DATE_ARG, so there's no build date." + +WORKDIR /app +COPY ./authority-portal-backend/catalog-crawler/${CONNECTOR_NAME}-launcher/build/libs/app.jar /app +COPY ./authority-portal-backend/catalog-crawler/logging.properties /app +COPY ./authority-portal-backend/catalog-crawler/logging.dev.properties /app + +RUN touch /app/empty-properties-file.properties + +ENV EDC_LAST_COMMIT_INFO=$EDC_LAST_COMMIT_INFO_ARG +ENV EDC_BUILD_DATE=$EDC_BUILD_DATE_ARG +ENV JVM_ARGS="" + +COPY ./authority-portal-backend/catalog-crawler/docker-entrypoint.sh /app/entrypoint.sh +ENTRYPOINT ["/app/entrypoint.sh"] +CMD ["start"] + +# health status is determined by the availability of the /health endpoint +HEALTHCHECK --interval=5s --timeout=5s --retries=10 CMD curl -H "x-api-key: $EDC_API_AUTH_KEY" --fail http://localhost:11001/api/check/health || curl -H "x-api-key: $EDC_API_AUTH_KEY" --fail http://localhost:11001/backend/api/check/health diff --git a/authority-portal-backend/catalog-crawler/README.md b/authority-portal-backend/catalog-crawler/README.md new file mode 100644 index 000000000..05633cd3f --- /dev/null +++ b/authority-portal-backend/catalog-crawler/README.md @@ -0,0 +1,41 @@ + +
+
+ + Logo + + +

EDC-Connector Extension:
Catalog Crawler

+ +

+ Report Bug + ยท + Request Feature +

+
+ +## About this Extension + +The Catalog Crawler is an additional deployment unit needed to determine the online status of registered connectors and populate the Data Catalog: + +- It is a modified EDC connector with the task to crawl the other connectors' public data offers. +- It periodically checks the Authority Portal's connector list for its environment. +- It crawls the given connectors in regular intervals. +- It writes the data offers and connector statuses into the Authority Portal DB. +- Each environment configured in the Authority Portal requires its own Catalog Crawler with credentials for that environment's DAPS. + +## Why does this component exist? + +The Authority Portal uses a non-EDC stack and thus it cannot read the catalogs of participating connectors directly. + +## Deployment + +Please see the [Productive Deployment Guide](../../docs/deployment-guide/goals/production/README.md) for more information. + +## License + +Apache License 2.0 - see [LICENSE](../../LICENSE) + +## Contact + +sovity GmbH - contact@sovity.de diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler-ce-launcher/build.gradle.kts b/authority-portal-backend/catalog-crawler/catalog-crawler-ce-launcher/build.gradle.kts new file mode 100644 index 000000000..4b535bded --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler-ce-launcher/build.gradle.kts @@ -0,0 +1,26 @@ +plugins { + `java-library` + id("application") + alias(libs.plugins.shadow) +} + +dependencies { + implementation(project(":catalog-crawler:catalog-crawler-launcher-base")) + + api(libs.edc.monitorJdkLogger) + api(libs.edc.apiObservability) + + implementation(libs.edc.oauth2Core) + implementation(libs.edc.vaultFilesystem) +} + +application { + mainClass.set("de.sovity.edc.ext.catalog.crawler.Main") +} + +tasks.withType { + mergeServiceFiles() + archiveFileName.set("app.jar") +} + +group = libs.versions.sovityEdcGroup.get() diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler-launcher-base/build.gradle.kts b/authority-portal-backend/catalog-crawler/catalog-crawler-launcher-base/build.gradle.kts new file mode 100644 index 000000000..3b30edd30 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler-launcher-base/build.gradle.kts @@ -0,0 +1,19 @@ +plugins { + `java-library` +} + +dependencies { + // A minimal EDC that can request catalogs + api(libs.edc.controlPlaneCore) + api(libs.edc.dataPlaneSelectorCore) + api(libs.edc.configurationFilesystem) + api(libs.edc.controlPlaneAggregateServices) + api(libs.edc.http) + api(libs.edc.dsp) + api(libs.edc.jsonLd) + + // Data Catalog Crawler + api(project(":catalog-crawler:catalog-crawler")) +} + +group = libs.versions.sovityEdcGroup.get() diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/build.gradle.kts b/authority-portal-backend/catalog-crawler/catalog-crawler/build.gradle.kts new file mode 100644 index 000000000..e4e30b170 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/build.gradle.kts @@ -0,0 +1,41 @@ +plugins { + `java-library` +} + +dependencies { + annotationProcessor(libs.lombok) + compileOnly(libs.lombok) + + implementation(libs.edc.controlPlaneSpi) + implementation(libs.edc.managementApiConfiguration) + + implementation(libs.quartz.quartz) + implementation(libs.commons.lang3) + implementation(libs.quarkus.jooq) + + api(libs.sovity.edc.catalogParser) + api(libs.sovity.edc.jsonAndJsonLdUtils) + api(libs.sovity.edc.wrapperCommonMappers) + api(libs.sovity.edc.ext.postgresFlywayCore) + api(libs.sovity.edc.config) + api(project(":authority-portal-db")) + + testAnnotationProcessor(libs.lombok) + testCompileOnly(libs.lombok) + testImplementation(libs.sovity.edc.ext.testUtils) + testImplementation(libs.assertj.core) + testImplementation(libs.mockito.core) + testImplementation(libs.restAssured.restAssured) + testImplementation(libs.testcontainers.testcontainers) + testImplementation(libs.flyway.core) + testImplementation(libs.testcontainers.junitJupiter) + testImplementation(libs.testcontainers.postgresql) + testImplementation(libs.junit.api) + testImplementation(libs.jsonAssert) + testRuntimeOnly(libs.junit.engine) +} + +tasks.getByName("test") { + useJUnitPlatform() + maxParallelForks = 1 +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/CrawlerConfigProps.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/CrawlerConfigProps.java new file mode 100644 index 000000000..b62f3e3b1 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/CrawlerConfigProps.java @@ -0,0 +1,549 @@ +/* + * Copyright (c) 2024 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial implementation + */ + +package de.sovity.edc.ext.catalog.crawler; + +import de.sovity.edc.utils.config.ConfigUtils; +import de.sovity.edc.utils.config.model.ConfigProp; +import de.sovity.edc.utils.config.utils.UrlPathUtils; +import lombok.RequiredArgsConstructor; +import lombok.Setter; +import lombok.experimental.Accessors; +import lombok.experimental.UtilityClass; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.function.Consumer; +import java.util.function.Supplier; + +@UtilityClass +public class CrawlerConfigProps { + public static final List ALL_CRAWLER_PROPS = new ArrayList<>(); + + /* Crawler-specific Configuration */ + public static final ConfigProp CRAWLER_ENVIRONMENT_ID = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.BASIC) + .property("crawler.environment.id") + .description("Environment ID") + .required(true) + ); + + public static final ConfigProp CRAWLER_DB_JDBC_URL = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.BASIC) + .property("crawler.db.jdbc.url") + .description("PostgreSQL DB Connection: JDBC URL") + .required(true) + ); + + public static final ConfigProp CRAWLER_DB_JDBC_USER = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.BASIC) + .property("crawler.db.jdbc.user") + .description("PostgreSQL DB Connection: Username") + .required(true) + ); + + public static final ConfigProp CRAWLER_DB_JDBC_PASSWORD = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.BASIC) + .property("crawler.db.jdbc.password") + .description("PostgreSQL DB Connection: Password") + .required(true) + ); + + public static final ConfigProp CRAWLER_DB_CONNECTION_POOL_SIZE = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.ADVANCED) + .property("crawler.db.connection.pool.size") + .description("Size of the Hikari Connection Pool") + .defaultValue("30") + ); + + public static final ConfigProp CRAWLER_DB_CONNECTION_TIMEOUT_IN_MS = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.ADVANCED) + .property("crawler.db.connection.timeout.in.ms") + .description("Sets the connection timeout for the datasource in milliseconds.") + .defaultValue("30000") + ); + + public static final ConfigProp CRAWLER_CRON_ONLINE_CONNECTOR_REFRESH = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.ADVANCED) + .property("crawler.cron.online.connector.refresh") + .description("Cron expression for crawling ONLINE connectors") + .defaultValue("*/20 * * ? * *") + ); + + public static final ConfigProp CRAWLER_CRON_OFFLINE_CONNECTOR_REFRESH = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.ADVANCED) + .property("crawler.cron.offline.connector.refresh") + .description("Cron expression for crawling OFFLINE connectors") + .defaultValue("0 */5 * ? * *") + ); + + public static final ConfigProp CRAWLER_CRON_DEAD_CONNECTOR_REFRESH = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.ADVANCED) + .property("crawler.cron.dead.connector.refresh") + .description("Cron expression for crawling DEAD connectors") + .defaultValue("0 0 * ? * *") + ); + + public static final ConfigProp CRAWLER_SCHEDULED_KILL_OFFLINE_CONNECTORS = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.ADVANCED) + .property("crawler.scheduled.kill.offline.connectors") + .description("Scheduled task for marking connectors as DEAD") + .defaultValue("0 0 2 ? * *") + ); + + public static final ConfigProp CRAWLER_KILL_OFFLINE_CONNECTORS_AFTER = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.ADVANCED) + .property("crawler.kill.offline.connectors.after") + .description("Time in Java Duration Format after which an OFFLINE connector is marked as DEAD") + .defaultValue("P5D") + ); + + public static final ConfigProp CRAWLER_HIDE_OFFLINE_DATA_OFFERS_AFTER = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.ADVANCED) + .property("crawler.hide.offline.data.offers.after") + .description("Time in Java Duration Format after which an OFFLINE data offer is hidden") + .defaultValue("P1D") + ); + + public static final ConfigProp CRAWLER_NUM_THREADS = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.ADVANCED) + .property("crawler.num.threads") + .description("Number of threads for crawling") + .defaultValue("32") + ); + + public static final ConfigProp CRAWLER_MAX_DATA_OFFERS_PER_CONNECTOR = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.ADVANCED) + .property("crawler.max.data.offers.per.connector") + .description("Maximum number of data offers per connector") + .defaultValue("50") + ); + + public static final ConfigProp CRAWLER_MAX_CONTRACT_OFFERS_PER_DATA_OFFER = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.ADVANCED) + .property("crawler.max.contract.offers.per.data.offer") + .description("Maximum number of contract offers per data offer") + .defaultValue("10") + ); + + public static final ConfigProp MY_EDC_NETWORK_TYPE = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.ADVANCED) + .property("my.edc.network.type") + .description("Configuring EDCs for different environments. Available values are: %s".formatted( + String.join(", ", CrawlerConfigProps.NetworkType.ALL_NETWORK_TYPES))) + .warnIfOverridden(true) + .defaultValue(CrawlerConfigProps.NetworkType.PRODUCTION) + ); + + /* Basic Configuration */ + + public static final ConfigProp MY_EDC_PARTICIPANT_ID = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.BASIC) + .property("my.edc.participant.id") + .description("Participant ID / Connector ID") + .defaultValue("broker") + .warnIfOverridden(true) + .required(true) + ); + + public static final ConfigProp MY_EDC_FQDN = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.BASIC) + .property("my.edc.fqdn") + .description("Fully Qualified Domain Name of where the Connector is hosted, e.g. my-connector.myorg.com") + .requiredIf(props -> CrawlerConfigProps.NetworkType.isProduction(props) || CrawlerConfigProps.NetworkType.isLocalDemoDockerCompose(props)) + .defaultValueFn(props -> new CrawlerConfigProps.NetworkTypeMatcher(props).unitTest(() -> "localhost").orElseThrow()) + ); + + /* Auth */ + + public static final ConfigProp MY_EDC_C2C_IAM_TYPE = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.C2C_IAM) + .property("my.edc.c2c.iam.type") + .description("Type of Connector-to-Connector IAM / Authentication Mechanism used. " + + "Available values are: 'daps-sovity', 'daps-omejdn', 'mock-iam'. Default: 'daps-sovity'") + .warnIfOverridden(true) + .defaultValue("daps-sovity") + ); + + public static final ConfigProp EDC_OAUTH_TOKEN_URL = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.C2C_IAM) + .property("edc.oauth.token.url") + .description("OAuth2 / DAPS: Token URL") + .relevantIf(props -> MY_EDC_C2C_IAM_TYPE.getRaw(props).startsWith("daps")) + .required(true) + ); + + public static final ConfigProp EDC_OAUTH_PROVIDER_JWKS_URL = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.C2C_IAM) + .property("edc.oauth.provider.jwks.url") + .description("OAuth2 / DAPS: JWKS URL") + .relevantIf(props -> MY_EDC_C2C_IAM_TYPE.getRaw(props).startsWith("daps")) + .required(true) + ); + + public static final ConfigProp EDC_OAUTH_CLIENT_ID = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.C2C_IAM) + .property("edc.oauth.client.id") + .description("OAuth2 / DAPS: Client ID. Defaults to Participant ID") + .relevantIf(props -> MY_EDC_C2C_IAM_TYPE.getRaw(props).startsWith("daps")) + .defaultValueFn(MY_EDC_PARTICIPANT_ID::getRaw) + ); + + public static final ConfigProp EDC_KEYSTORE = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.C2C_IAM) + .property("edc.keystore") + .description("File-Based Vault: Keystore file (.jks)") + .relevantIf(props -> MY_EDC_C2C_IAM_TYPE.getRaw(props).startsWith("daps")) + .required(true) + ); + + public static final ConfigProp EDC_KEYSTORE_PASSWORD = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.C2C_IAM) + .property("edc.keystore.password") + .description("File-Based Vault: Keystore password") + .relevantIf(props -> MY_EDC_C2C_IAM_TYPE.getRaw(props).startsWith("daps")) + .required(true) + ); + + public static final ConfigProp EDC_OAUTH_CERTIFICATE_ALIAS = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.C2C_IAM) + .property("edc.oauth.certificate.alias") + .description("OAuth2 / DAPS: Certificate Vault Entry for the Public Key. Default: '1'") + .relevantIf(props -> MY_EDC_C2C_IAM_TYPE.getRaw(props).startsWith("daps")) + .defaultValue("1") + ); + + public static final ConfigProp EDC_OAUTH_PRIVATE_KEY_ALIAS = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.C2C_IAM) + .property("edc.oauth.private.key.alias") + .description("OAuth2 / DAPS: Certificate Vault Entry for the Private Key. Default: '1'") + .relevantIf(props -> MY_EDC_C2C_IAM_TYPE.getRaw(props).startsWith("daps")) + .defaultValue("1") + ); + + public static final ConfigProp EDC_OAUTH_PROVIDER_AUDIENCE = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.C2C_IAM) + .property("edc.oauth.provider.audience") + .description("OAuth2 / DAPS: Provider Audience") + .relevantIf(props -> MY_EDC_C2C_IAM_TYPE.getRaw(props).startsWith("daps")) + .warnIfOverridden(true) + .defaultValueFn(props -> { + if ("daps-omejdn".equals(MY_EDC_C2C_IAM_TYPE.getRaw(props))) { + return "idsc:IDS_CONNECTORS_ALL"; + } + + // daps-sovity + return EDC_OAUTH_TOKEN_URL.getRaw(props); + }) + ); + + public static final ConfigProp EDC_OAUTH_ENDPOINT_AUDIENCE = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.C2C_IAM) + .property("edc.oauth.endpoint.audience") + .description("OAuth2 / DAPS: Endpoint Audience") + .relevantIf(props -> MY_EDC_C2C_IAM_TYPE.getRaw(props).startsWith("daps")) + .warnIfOverridden(true) + .defaultValue("idsc:IDS_CONNECTORS_ALL") + ); + + public static final ConfigProp EDC_AGENT_IDENTITY_KEY = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.C2C_IAM) + .property("edc.agent.identity.key") + .description("OAuth2 / DAPS: Agent Identity Key") + .relevantIf(props -> MY_EDC_C2C_IAM_TYPE.getRaw(props).startsWith("daps")) + .warnIfOverridden(true) + .defaultValueFn(props -> { + if ("daps-omejdn".equals(MY_EDC_C2C_IAM_TYPE.getRaw(props))) { + return "client_id"; + } + + // daps-sovity + return "referringConnector"; + }) + ); + + /* Advanced */ + + public static final ConfigProp MY_EDC_FIRST_PORT = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.ADVANCED) + .property("my.edc.first.port") + .description("The first port of several ports to be used for the several API endpoints. " + + "Useful when starting two EDCs on the host machine network / during tests") + .warnIfOverridden(true) + .defaultValue("11000") + ); + + public static final ConfigProp EDC_WEB_REST_CORS_ENABLED = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.ADVANCED) + .property("edc.web.rest.cors.enabled") + .description("Enable CORS") + .warnIfOverridden(true) + .relevantIf(props -> !CrawlerConfigProps.NetworkType.isProduction(props)) + .defaultValue("true") + ); + + public static final ConfigProp EDC_WEB_REST_CORS_HEADERS = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.ADVANCED) + .property("edc.web.rest.cors.headers") + .description("CORS: Allowed Headers") + .warnIfOverridden(true) + .relevantIf(props -> !CrawlerConfigProps.NetworkType.isProduction(props)) + .defaultValue("origin,content-type,accept,authorization,X-Api-Key") + ); + + public static final ConfigProp EDC_WEB_REST_CORS_ORIGINS = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.ADVANCED) + .property("edc.web.rest.cors.origins") + .description("CORS: Allowed Origins") + .warnIfOverridden(true) + .relevantIf(props -> !CrawlerConfigProps.NetworkType.isProduction(props)) + .defaultValue("*") + ); + + /* Defaults of EDC Configuration */ + + public static final ConfigProp MY_EDC_PROTOCOL = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.RAW_EDC_CONFIG_DEFAULTS) + .property("my.edc.protocol") + .description("HTTP Protocol for when the EDC exposes its own URL for callbacks") + .warnIfOverridden(true) + .defaultValueFn(props -> CrawlerConfigProps.NetworkType.isProduction(props) ? "https://" : "http://") + ); + + public static final ConfigProp MY_EDC_BASE_PATH = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.RAW_EDC_CONFIG_DEFAULTS) + .property("my.edc.base.path") + .description("Optional prefix to be added before all API paths") + .warnIfOverridden(true) + .defaultValue("/") + ); + + public static final ConfigProp WEB_HTTP_PATH = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.RAW_EDC_CONFIG_DEFAULTS) + .property("web.http.path") + .description("API Group 'Web' contains misc API endpoints, usually not meant to be public, this is the base path.") + .warnIfOverridden(true) + .defaultValueFn(props -> UrlPathUtils.urlPathJoin(MY_EDC_BASE_PATH.getRaw(props), "api")) + ); + + public static final ConfigProp WEB_HTTP_PORT = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.RAW_EDC_CONFIG_DEFAULTS) + .property("web.http.port") + .description("API Group 'Web' contains misc API endpoints, usually not meant to be public, this is the port.") + .warnIfOverridden(true) + .defaultValueFn(props -> plus(props, MY_EDC_FIRST_PORT, 1)) + ); + + public static final ConfigProp WEB_HTTP_MANAGEMENT_PATH = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.RAW_EDC_CONFIG_DEFAULTS) + .property("web.http.management.path") + .description("API Group 'Management' contains API endpoints for EDC interaction and " + + "should be protected from unauthorized access. This is the base path.") + .warnIfOverridden(true) + .defaultValueFn(props -> UrlPathUtils.urlPathJoin(MY_EDC_BASE_PATH.getRaw(props), "api/management")) + ); + + public static final ConfigProp WEB_HTTP_MANAGEMENT_PORT = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.RAW_EDC_CONFIG_DEFAULTS) + .property("web.http.management.port") + .description( + "API Group 'Management' contains API endpoints for EDC interaction and " + + "should be protected from unauthorized access. This is the port.") + .warnIfOverridden(true) + .defaultValueFn(props -> plus(props, MY_EDC_FIRST_PORT, 2)) + ); + + public static final ConfigProp WEB_HTTP_PROTOCOL_PATH = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.RAW_EDC_CONFIG_DEFAULTS) + .property("web.http.protocol.path") + .description("API Group 'Protocol' must be public as it is used for connector to connector communication, this is the base path.") + .warnIfOverridden(true) + .defaultValueFn(props -> UrlPathUtils.urlPathJoin(MY_EDC_BASE_PATH.getRaw(props), "api/dsp")) + ); + + public static final ConfigProp WEB_HTTP_PROTOCOL_PORT = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.RAW_EDC_CONFIG_DEFAULTS) + .property("web.http.protocol.port") + .description("API Group 'Protocol' must be public as it is used for connector to connector communication, this is the port.") + .warnIfOverridden(true) + .defaultValueFn(props -> plus(props, MY_EDC_FIRST_PORT, 3)) + ); + + public static final ConfigProp WEB_HTTP_CONTROL_PATH = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.RAW_EDC_CONFIG_DEFAULTS) + .property("web.http.control.path") + .description("API Group 'Control' contains API endpoints for control plane/data plane interaction and " + + "should be non-public, this is the base path.") + .warnIfOverridden(true) + .defaultValueFn(props -> UrlPathUtils.urlPathJoin(MY_EDC_BASE_PATH.getRaw(props), "api/control")) + ); + + public static final ConfigProp WEB_HTTP_CONTROL_PORT = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.RAW_EDC_CONFIG_DEFAULTS) + .property("web.http.control.port") + .description("API Group 'Control' contains API endpoints for control plane/data plane interaction and " + + "should be non-public, this is the port.") + .warnIfOverridden(true) + .defaultValueFn(props -> plus(props, MY_EDC_FIRST_PORT, 4)) + ); + + public static final ConfigProp WEB_HTTP_PUBLIC_PATH = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.RAW_EDC_CONFIG_DEFAULTS) + .property("web.http.public.path") + .description("API Group 'Public' contains public data plane API endpoints. This is the base path.") + .warnIfOverridden(true) + .defaultValueFn(props -> UrlPathUtils.urlPathJoin(MY_EDC_BASE_PATH.getRaw(props), "api/public")) + ); + + public static final ConfigProp WEB_HTTP_PUBLIC_PORT = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.RAW_EDC_CONFIG_DEFAULTS) + .property("web.http.public.port") + .description("API Group 'Public' contains public data plane API endpoints. This is the port.") + .warnIfOverridden(true) + .defaultValueFn(props -> plus(props, MY_EDC_FIRST_PORT, 5)) + ); + + public static final ConfigProp EDC_JSONLD_HTTPS_ENABLED = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.RAW_EDC_CONFIG_DEFAULTS) + .property("edc.jsonld.https.enabled") + .description("Required to be set since Eclipse EDC 0.2.1") + .warnIfOverridden(true) + .defaultValue("true") + ); + + public static final ConfigProp EDC_CONNECTOR_NAME = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.RAW_EDC_CONFIG_DEFAULTS) + .property("edc.connector.name") + .description("Connector Name") + .warnIfOverridden(true) + .defaultValueFn(MY_EDC_PARTICIPANT_ID::getRaw) + ); + + public static final ConfigProp EDC_PARTICIPANT_ID = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.RAW_EDC_CONFIG_DEFAULTS) + .property("edc.participant.id") + .description("Participant ID / Connector ID") + .warnIfOverridden(true) + .defaultValueFn(MY_EDC_PARTICIPANT_ID::getRaw) + ); + + public static final ConfigProp EDC_HOSTNAME = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.RAW_EDC_CONFIG_DEFAULTS) + .property("edc.hostname") + .description("Same as %s".formatted(MY_EDC_FQDN.getProperty())) + .warnIfOverridden(true) + .defaultValueFn(MY_EDC_FQDN::getRaw) + ); + + public static final ConfigProp EDC_DSP_CALLBACK_ADDRESS = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.RAW_EDC_CONFIG_DEFAULTS) + .property("edc.dsp.callback.address") + .description("Full URL for the DSP callback address") + .warnIfOverridden(true) + .defaultValueFn(ConfigUtils::getProtocolApiUrl) + ); + + public static final ConfigProp EDC_VAULT = addCeProp(builder -> builder + .category(CrawlerConfigProps.Category.RAW_EDC_CONFIG_DEFAULTS) + .property("edc.vault") + .description("This file could contain an entry replacing the EDC_KEYSTORE ENV var, " + + "but for some reason it is required, and EDC won't start up if it isn't configured." + + "It is created in the Dockerfile") + .relevantIf(CrawlerConfigProps.NetworkType::isProduction) + .defaultValue("/app/empty-properties-file.properties") + ); + + /* Helpers */ + + private static ConfigProp addCeProp(Consumer builderFn) { + var builder = ConfigProp.builder(); + builderFn.accept(builder); + var built = builder.build(); + + // Register the property in the list of all available CE properties + // Order matters here, as the property defaults are calculated in order + built.also(ALL_CRAWLER_PROPS::add); + return built; + } + + private static String plus(Map props, ConfigProp prop, int add) { + var raw = prop.getRaw(props); + var result = Integer.parseInt(raw == null ? "0" : raw) + add; + return String.valueOf(result); + } + + @UtilityClass + public static class NetworkType { + public static final String PRODUCTION = "production"; + public static final String LOCAL_DEMO_DOCKER_COMPOSE = "local-demo-docker-compose"; + public static final String UNIT_TEST = "unit-test"; + public static final List ALL_NETWORK_TYPES = List.of(PRODUCTION, LOCAL_DEMO_DOCKER_COMPOSE, UNIT_TEST); + + public static boolean isProduction(Map props) { + return CrawlerConfigProps.NetworkType.PRODUCTION.equals(MY_EDC_NETWORK_TYPE.getRaw(props)); + } + + public static boolean isLocalDemoDockerCompose(Map props) { + return CrawlerConfigProps.NetworkType.LOCAL_DEMO_DOCKER_COMPOSE.equals(MY_EDC_NETWORK_TYPE.getRaw(props)); + } + + public static boolean isUnitTest(Map props) { + return CrawlerConfigProps.NetworkType.UNIT_TEST.equals(MY_EDC_NETWORK_TYPE.getRaw(props)); + } + } + + @Setter + @Accessors(fluent = true, chain = true) + @RequiredArgsConstructor + public static class NetworkTypeMatcher { + private final Map props; + private Supplier production; + private Supplier localDemoDockerCompose; + private Supplier unitTest; + + public T orElse(Supplier elseFn) { + if (production != null && CrawlerConfigProps.NetworkType.isProduction(props)) { + return production.get(); + } + + if (localDemoDockerCompose != null && CrawlerConfigProps.NetworkType.isLocalDemoDockerCompose(props)) { + return localDemoDockerCompose.get(); + } + + if (unitTest != null && CrawlerConfigProps.NetworkType.isUnitTest(props)) { + return unitTest.get(); + } + + return elseFn.get(); + } + + public T orElseThrow() { + return orElse(() -> { + var msg = "Unhandled %s: %s".formatted( + MY_EDC_NETWORK_TYPE.getProperty(), + MY_EDC_NETWORK_TYPE.getRaw(props) + ); + throw new IllegalArgumentException(msg); + }); + } + } + + @UtilityClass + private static class Category { + public static final String BASIC = "Basic Configuration"; + public static final String ADVANCED = "Advanced configuration"; + public static final String C2C_IAM = "Connector-to-Connector IAM"; + public static final String RAW_EDC_CONFIG_DEFAULTS = "EDC Config Defaults / Overrides"; + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/CrawlerExtension.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/CrawlerExtension.java new file mode 100644 index 000000000..00bda31d6 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/CrawlerExtension.java @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler; + +import de.sovity.edc.ext.wrapper.api.common.mappers.PlaceholderEndpointService; +import org.eclipse.edc.connector.api.management.configuration.transform.ManagementApiTypeTransformerRegistry; +import org.eclipse.edc.connector.spi.catalog.CatalogService; +import org.eclipse.edc.jsonld.spi.JsonLd; +import org.eclipse.edc.runtime.metamodel.annotation.Inject; +import org.eclipse.edc.runtime.metamodel.annotation.Provides; +import org.eclipse.edc.spi.system.ServiceExtension; +import org.eclipse.edc.spi.system.ServiceExtensionContext; +import org.eclipse.edc.spi.types.TypeManager; + +@Provides({CrawlerExtensionContext.class}) +public class CrawlerExtension implements ServiceExtension { + + public static final String EXTENSION_NAME = "Authority Portal Data Catalog Crawler"; + + @Inject + private TypeManager typeManager; + + @Inject + private ManagementApiTypeTransformerRegistry typeTransformerRegistry; + + @Inject + private JsonLd jsonLd; + + @Inject + private CatalogService catalogService; + + /** + * Manual Dependency Injection Result + */ + private CrawlerExtensionContext services; + + @Override + public String name() { + return EXTENSION_NAME; + } + + @Override + public void initialize(ServiceExtensionContext context) { + services = CrawlerExtensionContextBuilder.buildContext( + context.getConfig(), + context.getMonitor(), + typeManager, + typeTransformerRegistry, + jsonLd, + catalogService, + new PlaceholderEndpointService("http://0.0.0.0/") + ); + + // Provide access for the tests + context.registerService(CrawlerExtensionContext.class, services); + } + + @Override + public void start() { + if (services == null) { + return; + } + services.crawlerInitializer().onStartup(); + } + + @Override + public void shutdown() { + if (services == null) { + return; + } + services.dataSource().close(); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/CrawlerExtensionContext.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/CrawlerExtensionContext.java new file mode 100644 index 000000000..264831fe8 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/CrawlerExtensionContext.java @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler; + +import com.zaxxer.hikari.HikariDataSource; +import de.sovity.edc.ext.catalog.crawler.crawling.ConnectorCrawler; +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.FetchedCatalogBuilder; +import de.sovity.edc.ext.catalog.crawler.dao.config.DslContextFactory; +import de.sovity.edc.ext.catalog.crawler.dao.data_offers.DataOfferRecordUpdater; +import de.sovity.edc.ext.wrapper.api.common.mappers.PolicyMapper; + + +/** + * Manual Dependency Injection result + * + * @param crawlerInitializer Startup Logic + */ +public record CrawlerExtensionContext( + CrawlerInitializer crawlerInitializer, + // Required for stopping connections on closing + HikariDataSource dataSource, + DslContextFactory dslContextFactory, + + // Required for Integration Tests + ConnectorCrawler connectorCrawler, + PolicyMapper policyMapper, + FetchedCatalogBuilder catalogPatchBuilder, + DataOfferRecordUpdater dataOfferRecordUpdater +) { +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/CrawlerExtensionContextBuilder.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/CrawlerExtensionContextBuilder.java new file mode 100644 index 000000000..6aaf52998 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/CrawlerExtensionContextBuilder.java @@ -0,0 +1,318 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; +import de.sovity.edc.ext.catalog.crawler.crawling.ConnectorCrawler; +import de.sovity.edc.ext.catalog.crawler.crawling.OfflineConnectorCleaner; +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.FetchedCatalogBuilder; +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.FetchedCatalogMappingUtils; +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.FetchedCatalogService; +import de.sovity.edc.ext.catalog.crawler.crawling.logging.CrawlerEventLogger; +import de.sovity.edc.ext.catalog.crawler.crawling.logging.CrawlerExecutionTimeLogger; +import de.sovity.edc.ext.catalog.crawler.crawling.writing.CatalogPatchBuilder; +import de.sovity.edc.ext.catalog.crawler.crawling.writing.ConnectorUpdateCatalogWriter; +import de.sovity.edc.ext.catalog.crawler.crawling.writing.ConnectorUpdateFailureWriter; +import de.sovity.edc.ext.catalog.crawler.crawling.writing.ConnectorUpdateSuccessWriter; +import de.sovity.edc.ext.catalog.crawler.crawling.writing.DataOfferLimitsEnforcer; +import de.sovity.edc.ext.catalog.crawler.dao.CatalogCleaner; +import de.sovity.edc.ext.catalog.crawler.dao.CatalogPatchApplier; +import de.sovity.edc.ext.catalog.crawler.dao.config.DataSourceFactory; +import de.sovity.edc.ext.catalog.crawler.dao.config.DslContextFactory; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorQueries; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorStatusUpdater; +import de.sovity.edc.ext.catalog.crawler.dao.contract_offers.ContractOfferQueries; +import de.sovity.edc.ext.catalog.crawler.dao.contract_offers.ContractOfferRecordUpdater; +import de.sovity.edc.ext.catalog.crawler.dao.data_offers.DataOfferQueries; +import de.sovity.edc.ext.catalog.crawler.dao.data_offers.DataOfferRecordUpdater; +import de.sovity.edc.ext.catalog.crawler.orchestration.config.CrawlerConfigFactory; +import de.sovity.edc.ext.catalog.crawler.orchestration.queue.ConnectorQueue; +import de.sovity.edc.ext.catalog.crawler.orchestration.queue.ConnectorQueueFiller; +import de.sovity.edc.ext.catalog.crawler.orchestration.queue.ThreadPool; +import de.sovity.edc.ext.catalog.crawler.orchestration.queue.ThreadPoolTaskQueue; +import de.sovity.edc.ext.catalog.crawler.orchestration.schedules.DeadConnectorRefreshJob; +import de.sovity.edc.ext.catalog.crawler.orchestration.schedules.OfflineConnectorCleanerJob; +import de.sovity.edc.ext.catalog.crawler.orchestration.schedules.OfflineConnectorRefreshJob; +import de.sovity.edc.ext.catalog.crawler.orchestration.schedules.OnlineConnectorRefreshJob; +import de.sovity.edc.ext.catalog.crawler.orchestration.schedules.QuartzScheduleInitializer; +import de.sovity.edc.ext.catalog.crawler.orchestration.schedules.utils.CronJobRef; +import de.sovity.edc.ext.wrapper.api.common.mappers.AssetMapper; +import de.sovity.edc.ext.wrapper.api.common.mappers.PlaceholderEndpointService; +import de.sovity.edc.ext.wrapper.api.common.mappers.PolicyMapper; +import de.sovity.edc.ext.wrapper.api.common.mappers.asset.AssetEditRequestMapper; +import de.sovity.edc.ext.wrapper.api.common.mappers.asset.AssetJsonLdBuilder; +import de.sovity.edc.ext.wrapper.api.common.mappers.asset.AssetJsonLdParser; +import de.sovity.edc.ext.wrapper.api.common.mappers.asset.utils.AssetJsonLdUtils; +import de.sovity.edc.ext.wrapper.api.common.mappers.asset.utils.EdcPropertyUtils; +import de.sovity.edc.ext.wrapper.api.common.mappers.asset.utils.ShortDescriptionBuilder; +import de.sovity.edc.ext.wrapper.api.common.mappers.dataaddress.DataSourceMapper; +import de.sovity.edc.ext.wrapper.api.common.mappers.dataaddress.http.HttpDataSourceMapper; +import de.sovity.edc.ext.wrapper.api.common.mappers.dataaddress.http.HttpHeaderMapper; +import de.sovity.edc.ext.wrapper.api.common.mappers.policy.AtomicConstraintMapper; +import de.sovity.edc.ext.wrapper.api.common.mappers.policy.ExpressionExtractor; +import de.sovity.edc.ext.wrapper.api.common.mappers.policy.ExpressionMapper; +import de.sovity.edc.ext.wrapper.api.common.mappers.policy.LiteralMapper; +import de.sovity.edc.ext.wrapper.api.common.mappers.policy.OperatorMapper; +import de.sovity.edc.ext.wrapper.api.common.mappers.policy.PolicyValidator; +import de.sovity.edc.utils.catalog.DspCatalogService; +import de.sovity.edc.utils.catalog.mapper.DspDataOfferBuilder; +import lombok.NoArgsConstructor; +import org.eclipse.edc.connector.spi.catalog.CatalogService; +import org.eclipse.edc.jsonld.spi.JsonLd; +import org.eclipse.edc.runtime.metamodel.annotation.Inject; +import org.eclipse.edc.spi.CoreConstants; +import org.eclipse.edc.spi.monitor.Monitor; +import org.eclipse.edc.spi.system.configuration.Config; +import org.eclipse.edc.spi.types.TypeManager; +import org.eclipse.edc.transform.spi.TypeTransformerRegistry; +import org.jetbrains.annotations.NotNull; + +import java.util.List; + + +/** + * Manual Dependency Injection (DYDI). + *

+ * We want to develop as Java Backend Development is done, but we have + * no CDI / DI Framework to rely on. + *

+ * EDC {@link Inject} only works in {@link CrawlerExtension}. + */ +@NoArgsConstructor(access = lombok.AccessLevel.PRIVATE) +public class CrawlerExtensionContextBuilder { + + public static CrawlerExtensionContext buildContext( + Config config, + Monitor monitor, + TypeManager typeManager, + TypeTransformerRegistry typeTransformerRegistry, + JsonLd jsonLd, + CatalogService catalogService, + PlaceholderEndpointService placeholderEndpointService + ) { + // Config + var crawlerConfigFactory = new CrawlerConfigFactory(config); + var crawlerConfig = crawlerConfigFactory.buildCrawlerConfig(); + + // DB + var dataSourceFactory = new DataSourceFactory(config); + var dataSource = dataSourceFactory.newDataSource(); + + // Dao + var dataOfferQueries = new DataOfferQueries(); + var dslContextFactory = new DslContextFactory(dataSource); + var connectorQueries = new ConnectorQueries(crawlerConfig); + + // Services + var objectMapperJsonLd = getJsonLdObjectMapper(typeManager); + var assetMapper = newAssetMapper(typeTransformerRegistry, jsonLd, placeholderEndpointService); + var policyMapper = newPolicyMapper(typeTransformerRegistry, objectMapperJsonLd); + var crawlerEventLogger = new CrawlerEventLogger(); + var crawlerExecutionTimeLogger = new CrawlerExecutionTimeLogger(); + var dataOfferMappingUtils = new FetchedCatalogMappingUtils( + policyMapper, + assetMapper, + objectMapperJsonLd + ); + var contractOfferRecordUpdater = new ContractOfferRecordUpdater(); + var shortDescriptionBuilder = new ShortDescriptionBuilder(); + var dataOfferRecordUpdater = new DataOfferRecordUpdater(shortDescriptionBuilder); + var contractOfferQueries = new ContractOfferQueries(); + var dataOfferLimitsEnforcer = new DataOfferLimitsEnforcer(crawlerConfig, crawlerEventLogger); + var dataOfferPatchBuilder = new CatalogPatchBuilder( + contractOfferQueries, + dataOfferQueries, + dataOfferRecordUpdater, + contractOfferRecordUpdater + ); + var dataOfferPatchApplier = new CatalogPatchApplier(); + var dataOfferWriter = new ConnectorUpdateCatalogWriter(dataOfferPatchBuilder, dataOfferPatchApplier); + var connectorUpdateSuccessWriter = new ConnectorUpdateSuccessWriter( + crawlerEventLogger, + dataOfferWriter, + dataOfferLimitsEnforcer + ); + var fetchedDataOfferBuilder = new FetchedCatalogBuilder(dataOfferMappingUtils); + var dspDataOfferBuilder = new DspDataOfferBuilder(jsonLd); + var dspCatalogService = new DspCatalogService( + catalogService, + dspDataOfferBuilder + ); + var dataOfferFetcher = new FetchedCatalogService(dspCatalogService, fetchedDataOfferBuilder); + var connectorUpdateFailureWriter = new ConnectorUpdateFailureWriter(crawlerEventLogger, monitor); + var connectorUpdater = new ConnectorCrawler( + dataOfferFetcher, + connectorUpdateSuccessWriter, + connectorUpdateFailureWriter, + connectorQueries, + dslContextFactory, + monitor, + crawlerExecutionTimeLogger + ); + + var threadPoolTaskQueue = new ThreadPoolTaskQueue(); + var threadPool = new ThreadPool(threadPoolTaskQueue, crawlerConfig, monitor); + var connectorQueue = new ConnectorQueue(connectorUpdater, threadPool); + var connectorQueueFiller = new ConnectorQueueFiller(connectorQueue, connectorQueries); + var connectorStatusUpdater = new ConnectorStatusUpdater(); + var catalogCleaner = new CatalogCleaner(); + var offlineConnectorCleaner = new OfflineConnectorCleaner( + crawlerConfig, + connectorQueries, + crawlerEventLogger, + connectorStatusUpdater, + catalogCleaner + ); + + // Schedules + List> jobs = List.of( + getOnlineConnectorRefreshCronJob(dslContextFactory, connectorQueueFiller, config), + getOfflineConnectorRefreshCronJob(dslContextFactory, connectorQueueFiller, config), + getDeadConnectorRefreshCronJob(dslContextFactory, connectorQueueFiller, config), + getOfflineConnectorCleanerCronJob(dslContextFactory, offlineConnectorCleaner, config) + ); + + // Startup + var quartzScheduleInitializer = new QuartzScheduleInitializer(config, monitor, jobs); + var crawlerInitializer = new CrawlerInitializer(quartzScheduleInitializer); + + return new CrawlerExtensionContext( + crawlerInitializer, + dataSource, + dslContextFactory, + connectorUpdater, + policyMapper, + fetchedDataOfferBuilder, + dataOfferRecordUpdater + ); + } + + @NotNull + private static PolicyMapper newPolicyMapper( + TypeTransformerRegistry typeTransformerRegistry, + ObjectMapper objectMapperJsonLd + ) { + var operatorMapper = new OperatorMapper(); + var literalMapper = new LiteralMapper(objectMapperJsonLd); + var atomicConstraintMapper = new AtomicConstraintMapper( + literalMapper, + operatorMapper + ); + var policyValidator = new PolicyValidator(); + var expressionMapper = new ExpressionMapper(atomicConstraintMapper); + var constraintExtractor = new ExpressionExtractor( + policyValidator, + expressionMapper + ); + return new PolicyMapper( + constraintExtractor, + expressionMapper, + typeTransformerRegistry + ); + } + + @NotNull + private static AssetMapper newAssetMapper( + TypeTransformerRegistry typeTransformerRegistry, + JsonLd jsonLd, + PlaceholderEndpointService placeholderEndpointService + ) { + var edcPropertyUtils = new EdcPropertyUtils(); + var assetJsonLdUtils = new AssetJsonLdUtils(); + var assetEditRequestMapper = new AssetEditRequestMapper(); + var shortDescriptionBuilder = new ShortDescriptionBuilder(); + var assetJsonLdParser = new AssetJsonLdParser( + assetJsonLdUtils, + shortDescriptionBuilder, + endpoint -> false + ); + var httpHeaderMapper = new HttpHeaderMapper(); + var httpDataSourceMapper = new HttpDataSourceMapper(httpHeaderMapper, placeholderEndpointService); + var dataSourceMapper = new DataSourceMapper( + edcPropertyUtils, + httpDataSourceMapper + ); + var assetJsonLdBuilder = new AssetJsonLdBuilder( + dataSourceMapper, + assetJsonLdParser, + assetEditRequestMapper + ); + return new AssetMapper( + typeTransformerRegistry, + assetJsonLdBuilder, + assetJsonLdParser, + jsonLd + ); + } + + @NotNull + private static CronJobRef getOfflineConnectorCleanerCronJob(DslContextFactory dslContextFactory, + OfflineConnectorCleaner offlineConnectorCleaner, Config config) { + return new CronJobRef<>( + CrawlerConfigProps.CRAWLER_SCHEDULED_KILL_OFFLINE_CONNECTORS.getProperty(), + OfflineConnectorCleanerJob.class, + () -> new OfflineConnectorCleanerJob(dslContextFactory, offlineConnectorCleaner) + ); + } + + @NotNull + private static CronJobRef getOnlineConnectorRefreshCronJob( + DslContextFactory dslContextFactory, + ConnectorQueueFiller connectorQueueFiller, + Config config + ) { + return new CronJobRef<>( + CrawlerConfigProps.CRAWLER_CRON_ONLINE_CONNECTOR_REFRESH.getProperty(), + OnlineConnectorRefreshJob.class, + () -> new OnlineConnectorRefreshJob(dslContextFactory, connectorQueueFiller) + ); + } + + @NotNull + private static CronJobRef getOfflineConnectorRefreshCronJob( + DslContextFactory dslContextFactory, + ConnectorQueueFiller connectorQueueFiller, + Config config + ) { + return new CronJobRef<>( + CrawlerConfigProps.CRAWLER_CRON_OFFLINE_CONNECTOR_REFRESH.getProperty(), + OfflineConnectorRefreshJob.class, + () -> new OfflineConnectorRefreshJob(dslContextFactory, connectorQueueFiller) + ); + } + + @NotNull + private static CronJobRef getDeadConnectorRefreshCronJob(DslContextFactory dslContextFactory, + ConnectorQueueFiller connectorQueueFiller, Config config) { + return new CronJobRef<>( + CrawlerConfigProps.CRAWLER_CRON_DEAD_CONNECTOR_REFRESH.getProperty(), + DeadConnectorRefreshJob.class, + () -> new DeadConnectorRefreshJob(dslContextFactory, connectorQueueFiller) + ); + } + + private static ObjectMapper getJsonLdObjectMapper(TypeManager typeManager) { + var objectMapper = typeManager.getMapper(CoreConstants.JSON_LD); + + // Fixes Dates in JSON-LD Object Mapper + // The Core EDC uses longs over OffsetDateTime, so they never fixed the date format + objectMapper.registerModule(new JavaTimeModule()); + objectMapper.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS); + + return objectMapper; + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/CrawlerInitializer.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/CrawlerInitializer.java new file mode 100644 index 000000000..719ace1c3 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/CrawlerInitializer.java @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler; + +import de.sovity.edc.ext.catalog.crawler.orchestration.schedules.QuartzScheduleInitializer; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +public class CrawlerInitializer { + private final QuartzScheduleInitializer quartzScheduleInitializer; + + public void onStartup() { + quartzScheduleInitializer.startSchedules(); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/Main.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/Main.java new file mode 100644 index 000000000..457d78e24 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/Main.java @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2024 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial implementation + */ + +package de.sovity.edc.ext.catalog.crawler; + +import de.sovity.edc.utils.config.SovityEdcRuntime; + +public class Main { + public static void main(String[] args) { + SovityEdcRuntime.boot(CrawlerConfigProps.ALL_CRAWLER_PROPS); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/ConnectorCrawler.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/ConnectorCrawler.java new file mode 100644 index 000000000..3611d24e6 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/ConnectorCrawler.java @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling; + +import de.sovity.authorityportal.db.jooq.enums.MeasurementErrorStatus; +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.FetchedCatalogService; +import de.sovity.edc.ext.catalog.crawler.crawling.logging.CrawlerExecutionTimeLogger; +import de.sovity.edc.ext.catalog.crawler.crawling.writing.ConnectorUpdateFailureWriter; +import de.sovity.edc.ext.catalog.crawler.crawling.writing.ConnectorUpdateSuccessWriter; +import de.sovity.edc.ext.catalog.crawler.dao.config.DslContextFactory; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorQueries; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; +import lombok.RequiredArgsConstructor; +import org.apache.commons.lang3.time.StopWatch; +import org.eclipse.edc.spi.monitor.Monitor; + +import java.util.concurrent.TimeUnit; + +/** + * Updates a single connector. + */ +@RequiredArgsConstructor +public class ConnectorCrawler { + private final FetchedCatalogService fetchedCatalogService; + private final ConnectorUpdateSuccessWriter connectorUpdateSuccessWriter; + private final ConnectorUpdateFailureWriter connectorUpdateFailureWriter; + private final ConnectorQueries connectorQueries; + private final DslContextFactory dslContextFactory; + private final Monitor monitor; + private final CrawlerExecutionTimeLogger crawlerExecutionTimeLogger; + + /** + * Updates single connector. + * + * @param connectorRef connector + */ + public void crawlConnector(ConnectorRef connectorRef) { + var executionTime = StopWatch.createStarted(); + var failed = false; + + try { + monitor.info("Updating connector: " + connectorRef); + + var catalog = fetchedCatalogService.fetchCatalog(connectorRef); + + // Update connector in a single transaction + dslContextFactory.transaction(dsl -> { + var connectorRecord = connectorQueries.findByConnectorId(dsl, connectorRef.getConnectorId()); + connectorUpdateSuccessWriter.handleConnectorOnline(dsl, connectorRef, connectorRecord, catalog); + }); + } catch (Exception e) { + failed = true; + try { + // Update connector in a single transaction + dslContextFactory.transaction(dsl -> { + var connectorRecord = connectorQueries.findByConnectorId(dsl, connectorRef.getConnectorId()); + connectorUpdateFailureWriter.handleConnectorOffline(dsl, connectorRef, connectorRecord, e); + }); + } catch (Exception e1) { + e1.addSuppressed(e); + monitor.severe("Failed updating connector as failed.", e1); + } + } finally { + executionTime.stop(); + try { + var status = failed ? MeasurementErrorStatus.ERROR : MeasurementErrorStatus.OK; + dslContextFactory.transaction(dsl -> crawlerExecutionTimeLogger.logExecutionTime( + dsl, + connectorRef, + executionTime.getTime(TimeUnit.MILLISECONDS), + status + )); + } catch (Exception e) { + monitor.severe("Failed logging connector update execution time.", e); + } + } + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/OfflineConnectorCleaner.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/OfflineConnectorCleaner.java new file mode 100644 index 000000000..51f5c9bd4 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/OfflineConnectorCleaner.java @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling; + +import de.sovity.edc.ext.catalog.crawler.crawling.logging.CrawlerEventLogger; +import de.sovity.edc.ext.catalog.crawler.dao.CatalogCleaner; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorQueries; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorStatusUpdater; +import de.sovity.edc.ext.catalog.crawler.orchestration.config.CrawlerConfig; +import lombok.RequiredArgsConstructor; +import org.jooq.DSLContext; + +@RequiredArgsConstructor +public class OfflineConnectorCleaner { + private final CrawlerConfig crawlerConfig; + private final ConnectorQueries connectorQueries; + private final CrawlerEventLogger crawlerEventLogger; + private final ConnectorStatusUpdater connectorStatusUpdater; + private final CatalogCleaner catalogCleaner; + + public void cleanConnectorsIfOfflineTooLong(DSLContext dsl) { + var killOfflineConnectorsAfter = crawlerConfig.getKillOfflineConnectorsAfter(); + var connectorsToKill = connectorQueries.findAllConnectorsForKilling(dsl, killOfflineConnectorsAfter); + + catalogCleaner.removeCatalogByConnectors(dsl, connectorsToKill); + connectorStatusUpdater.markAsDead(dsl, connectorsToKill); + + crawlerEventLogger.addKilledDueToOfflineTooLongMessages(dsl, connectorsToKill); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/FetchedCatalogBuilder.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/FetchedCatalogBuilder.java new file mode 100644 index 000000000..2ef73ffa2 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/FetchedCatalogBuilder.java @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.fetching; + +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.model.FetchedCatalog; +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.model.FetchedContractOffer; +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.model.FetchedDataOffer; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; +import de.sovity.edc.utils.catalog.model.DspCatalog; +import de.sovity.edc.utils.catalog.model.DspContractOffer; +import de.sovity.edc.utils.catalog.model.DspDataOffer; +import lombok.RequiredArgsConstructor; +import org.apache.commons.lang3.Validate; +import org.jetbrains.annotations.NotNull; + +import java.util.List; + +@RequiredArgsConstructor +public class FetchedCatalogBuilder { + private final FetchedCatalogMappingUtils fetchedCatalogMappingUtils; + + public FetchedCatalog buildFetchedCatalog(DspCatalog catalog, ConnectorRef connectorRef) { + assertEqualEndpoint(catalog, connectorRef); + assertEqualParticipantId(catalog, connectorRef); + + var fetchedDataOffers = catalog.getDataOffers().stream() + .map(dspDataOffer -> buildFetchedDataOffer(dspDataOffer, connectorRef)) + .toList(); + + var fetchedCatalog = new FetchedCatalog(); + fetchedCatalog.setConnectorRef(connectorRef); + fetchedCatalog.setDataOffers(fetchedDataOffers); + return fetchedCatalog; + } + + private void assertEqualParticipantId(DspCatalog catalog, ConnectorRef connectorRef) { + Validate.isTrue( + connectorRef.getConnectorId().equals(catalog.getParticipantId()), + String.format( + "Connector connectorId does not match the participantId: connectorId %s, participantId %s", + connectorRef.getConnectorId(), + catalog.getParticipantId() + ) + ); + } + + private void assertEqualEndpoint(DspCatalog catalog, ConnectorRef connectorRef) { + Validate.isTrue( + connectorRef.getEndpoint().equals(catalog.getEndpoint()), + String.format( + "Connector endpoint mismatch: expected %s, got %s", + connectorRef.getEndpoint(), + catalog.getEndpoint() + ) + ); + } + + @NotNull + private FetchedDataOffer buildFetchedDataOffer( + DspDataOffer dspDataOffer, + ConnectorRef connectorRef + ) { + var uiAsset = fetchedCatalogMappingUtils.buildUiAsset(dspDataOffer, connectorRef); + var uiAssetJson = fetchedCatalogMappingUtils.buildUiAssetJson(uiAsset); + + var fetchedDataOffer = new FetchedDataOffer(); + fetchedDataOffer.setAssetId(uiAsset.getAssetId()); + fetchedDataOffer.setUiAsset(uiAsset); + fetchedDataOffer.setUiAssetJson(uiAssetJson); + fetchedDataOffer.setContractOffers(buildFetchedContractOffers(dspDataOffer.getContractOffers())); + return fetchedDataOffer; + } + + @NotNull + private List buildFetchedContractOffers(List offers) { + return offers.stream() + .map(this::buildFetchedContractOffer) + .toList(); + } + + @NotNull + private FetchedContractOffer buildFetchedContractOffer(DspContractOffer offer) { + var uiPolicyJson = fetchedCatalogMappingUtils.buildUiPolicyJson(offer.getPolicyJsonLd()); + var contractOffer = new FetchedContractOffer(); + contractOffer.setContractOfferId(offer.getContractOfferId()); + contractOffer.setUiPolicyJson(uiPolicyJson); + return contractOffer; + } + +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/FetchedCatalogMappingUtils.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/FetchedCatalogMappingUtils.java new file mode 100644 index 000000000..1ed02ef3e --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/FetchedCatalogMappingUtils.java @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.fetching; + +import com.fasterxml.jackson.databind.ObjectMapper; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; +import de.sovity.edc.ext.wrapper.api.common.mappers.AssetMapper; +import de.sovity.edc.ext.wrapper.api.common.mappers.PolicyMapper; +import de.sovity.edc.ext.wrapper.api.common.model.UiAsset; +import de.sovity.edc.utils.catalog.model.DspDataOffer; +import jakarta.json.JsonObject; +import lombok.RequiredArgsConstructor; +import lombok.SneakyThrows; + +@RequiredArgsConstructor +public class FetchedCatalogMappingUtils { + private final PolicyMapper policyMapper; + private final AssetMapper assetMapper; + private final ObjectMapper objectMapper; + + public UiAsset buildUiAsset( + DspDataOffer dspDataOffer, + ConnectorRef connectorRef + ) { + var assetJsonLd = assetMapper.buildAssetJsonLdFromDatasetProperties(dspDataOffer.getAssetPropertiesJsonLd()); + var asset = assetMapper.buildAsset(assetJsonLd); + var uiAsset = assetMapper.buildUiAsset(asset, connectorRef.getEndpoint(), connectorRef.getConnectorId()); + uiAsset.setCreatorOrganizationName(connectorRef.getOrganizationLegalName()); + uiAsset.setParticipantId(connectorRef.getConnectorId()); + return uiAsset; + } + + @SneakyThrows + public String buildUiAssetJson(UiAsset uiAsset) { + return objectMapper.writeValueAsString(uiAsset); + } + + @SneakyThrows + public String buildUiPolicyJson(JsonObject policyJsonLd) { + var policy = policyMapper.buildPolicy(policyJsonLd); + var uiPolicy = policyMapper.buildUiPolicy(policy); + return objectMapper.writeValueAsString(uiPolicy); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/FetchedCatalogService.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/FetchedCatalogService.java new file mode 100644 index 000000000..c55973bbe --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/FetchedCatalogService.java @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.fetching; + +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.model.FetchedCatalog; +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.model.FetchedDataOffer; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; +import de.sovity.edc.utils.catalog.DspCatalogService; +import lombok.RequiredArgsConstructor; +import lombok.SneakyThrows; +import org.eclipse.edc.connector.contract.spi.types.offer.ContractOffer; + +@RequiredArgsConstructor +public class FetchedCatalogService { + private final DspCatalogService dspCatalogService; + private final FetchedCatalogBuilder catalogPatchBuilder; + + /** + * Fetches {@link ContractOffer}s and de-duplicates them into {@link FetchedDataOffer}s. + * + * @param connectorRef connector + * @return updated connector db row + */ + @SneakyThrows + public FetchedCatalog fetchCatalog(ConnectorRef connectorRef) { + var dspCatalog = dspCatalogService.fetchDataOffers(connectorRef.getEndpoint()); + return catalogPatchBuilder.buildFetchedCatalog(dspCatalog, connectorRef); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/model/FetchedCatalog.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/model/FetchedCatalog.java new file mode 100644 index 000000000..027d2c8b3 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/model/FetchedCatalog.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.fetching.model; + +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.Setter; +import lombok.experimental.FieldDefaults; + +import java.util.List; + +/** + * Contains catalog response as required for writing into DB. + */ +@Getter +@Setter +@Builder +@RequiredArgsConstructor +@AllArgsConstructor +@FieldDefaults(level = AccessLevel.PRIVATE) +public class FetchedCatalog { + ConnectorRef connectorRef; + List dataOffers; +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/model/FetchedContractOffer.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/model/FetchedContractOffer.java new file mode 100644 index 000000000..2b1993749 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/model/FetchedContractOffer.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.fetching.model; + +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; +import lombok.experimental.FieldDefaults; + +@Getter +@Setter +@Builder +@NoArgsConstructor +@AllArgsConstructor +@FieldDefaults(level = AccessLevel.PRIVATE) +public class FetchedContractOffer { + String contractOfferId; + String uiPolicyJson; +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/model/FetchedDataOffer.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/model/FetchedDataOffer.java new file mode 100644 index 000000000..a2e6fe99b --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/fetching/model/FetchedDataOffer.java @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.fetching.model; + +import de.sovity.edc.ext.wrapper.api.common.model.UiAsset; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; +import lombok.experimental.FieldDefaults; + +import java.util.List; + +/** + * Contains data offer response as required for writing into DB. + */ +@Getter +@Setter +@Builder +@NoArgsConstructor +@AllArgsConstructor +@FieldDefaults(level = AccessLevel.PRIVATE) +public class FetchedDataOffer { + String assetId; + UiAsset uiAsset; + String uiAssetJson; + List contractOffers; +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/logging/ConnectorChangeTracker.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/logging/ConnectorChangeTracker.java new file mode 100644 index 000000000..e87eabb01 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/logging/ConnectorChangeTracker.java @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.logging; + +import lombok.Getter; +import lombok.Setter; + +import java.util.ArrayList; +import java.util.List; + +/** + * Utility for collecting the information required to build log messages about what was updated. + */ +@Getter +public class ConnectorChangeTracker { + @Setter + private int numOffersAdded = 0; + + @Setter + private int numOffersDeleted = 0; + + @Setter + private int numOffersUpdated = 0; + + @Setter + private String participantIdChanged = null; + + public boolean isEmpty() { + return numOffersAdded == 0 && numOffersDeleted == 0 && numOffersUpdated == 0 && participantIdChanged == null; + } + + @Override + public String toString() { + if (isEmpty()) { + return "Connector is up to date."; + } + + var msg = "Connector Updated."; + if (numOffersAdded > 0 || numOffersDeleted > 0 || numOffersUpdated > 0) { + List offersMsgs = new ArrayList<>(); + if (numOffersAdded > 0) { + offersMsgs.add("%d added".formatted(numOffersAdded)); + } + if (numOffersUpdated > 0) { + offersMsgs.add("%d updated".formatted(numOffersUpdated)); + } + if (numOffersDeleted > 0) { + offersMsgs.add("%d deleted".formatted(numOffersDeleted)); + } + msg += " Data Offers changed: %s.".formatted(String.join(", ", offersMsgs)); + } + if (participantIdChanged != null) { + msg += " Participant ID changed to %s.".formatted(participantIdChanged); + } + return msg; + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/logging/CrawlerEventErrorMessage.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/logging/CrawlerEventErrorMessage.java new file mode 100644 index 000000000..92bc595ed --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/logging/CrawlerEventErrorMessage.java @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.logging; + +import de.sovity.edc.ext.catalog.crawler.utils.StringUtils2; +import lombok.NonNull; +import org.apache.commons.lang3.exception.ExceptionUtils; + +/** + * Helper Dto that contains User Message + Error Stack Trace to be written into + * {@link de.sovity.authorityportal.db.jooq.tables.CrawlerEventLog}. + *
+ * This class exists so that logging exceptions has a consistent format. + * + * @param message message + * @param stackTraceOrNull stack trace + */ +public record CrawlerEventErrorMessage(String message, String stackTraceOrNull) { + + public static CrawlerEventErrorMessage ofMessage(@NonNull String message) { + return new CrawlerEventErrorMessage(message, null); + } + + public static CrawlerEventErrorMessage ofStackTrace(@NonNull String baseMessage, @NonNull Throwable cause) { + var message = baseMessage; + message = StringUtils2.removeSuffix(message, "."); + message = StringUtils2.removeSuffix(message, ":"); + message = "%s: %s".formatted(message, cause.getClass().getName()); + return new CrawlerEventErrorMessage(message, ExceptionUtils.getStackTrace(cause)); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/logging/CrawlerEventLogger.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/logging/CrawlerEventLogger.java new file mode 100644 index 000000000..80f32f864 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/logging/CrawlerEventLogger.java @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.logging; + +import de.sovity.authorityportal.db.jooq.Tables; +import de.sovity.authorityportal.db.jooq.enums.CrawlerEventStatus; +import de.sovity.authorityportal.db.jooq.enums.CrawlerEventType; +import de.sovity.authorityportal.db.jooq.tables.records.CrawlerEventLogRecord; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; +import lombok.RequiredArgsConstructor; +import org.jooq.DSLContext; + +import java.time.OffsetDateTime; +import java.util.Collection; +import java.util.UUID; + +/** + * Updates a single connector. + */ +@RequiredArgsConstructor +public class CrawlerEventLogger { + + public void logConnectorUpdated(DSLContext dsl, ConnectorRef connectorRef, ConnectorChangeTracker changes) { + var logEntry = newLogEntry(dsl, connectorRef); + logEntry.setEvent(CrawlerEventType.CONNECTOR_UPDATED); + logEntry.setEventStatus(CrawlerEventStatus.OK); + logEntry.setUserMessage(changes.toString()); + logEntry.insert(); + } + + public void logConnectorOffline(DSLContext dsl, ConnectorRef connectorRef, CrawlerEventErrorMessage errorMessage) { + var logEntry = newLogEntry(dsl, connectorRef); + logEntry.setEvent(CrawlerEventType.CONNECTOR_STATUS_CHANGE_OFFLINE); + logEntry.setEventStatus(CrawlerEventStatus.ERROR); + logEntry.setUserMessage("Connector is offline."); + logEntry.setErrorStack(errorMessage.stackTraceOrNull()); + logEntry.insert(); + } + + public void logConnectorOnline(DSLContext dsl, ConnectorRef connectorRef) { + var logEntry = newLogEntry(dsl, connectorRef); + logEntry.setEvent(CrawlerEventType.CONNECTOR_STATUS_CHANGE_ONLINE); + logEntry.setEventStatus(CrawlerEventStatus.OK); + logEntry.setUserMessage("Connector is online."); + logEntry.insert(); + } + + public void logConnectorUpdateDataOfferLimitExceeded( + DSLContext dsl, + ConnectorRef connectorRef, + Integer maxDataOffersPerConnector + ) { + var logEntry = newLogEntry(dsl, connectorRef); + logEntry.setEvent(CrawlerEventType.CONNECTOR_DATA_OFFER_LIMIT_EXCEEDED); + logEntry.setEventStatus(CrawlerEventStatus.OK); + logEntry.setUserMessage( + "Connector has more than %d data offers. Exceeding data offers will be ignored.".formatted(maxDataOffersPerConnector)); + logEntry.insert(); + } + + public void logConnectorUpdateDataOfferLimitOk(DSLContext dsl, ConnectorRef connectorRef) { + var logEntry = newLogEntry(dsl, connectorRef); + logEntry.setEvent(CrawlerEventType.CONNECTOR_DATA_OFFER_LIMIT_OK); + logEntry.setEventStatus(CrawlerEventStatus.OK); + logEntry.setUserMessage("Connector is not exceeding the maximum number of data offers limit anymore."); + logEntry.insert(); + } + + public void logConnectorUpdateContractOfferLimitExceeded( + DSLContext dsl, + ConnectorRef connectorRef, + Integer maxContractOffersPerConnector + ) { + var logEntry = newLogEntry(dsl, connectorRef); + logEntry.setEvent(CrawlerEventType.CONNECTOR_CONTRACT_OFFER_LIMIT_EXCEEDED); + logEntry.setEventStatus(CrawlerEventStatus.OK); + logEntry.setUserMessage(String.format( + "Some data offers have more than %d contract offers. Exceeding contract offers will be ignored.: ", + maxContractOffersPerConnector + )); + logEntry.insert(); + } + + public void logConnectorUpdateContractOfferLimitOk(DSLContext dsl, ConnectorRef connectorRef) { + var logEntry = newLogEntry(dsl, connectorRef); + logEntry.setEvent(CrawlerEventType.CONNECTOR_CONTRACT_OFFER_LIMIT_OK); + logEntry.setEventStatus(CrawlerEventStatus.OK); + logEntry.setUserMessage("Connector is not exceeding the maximum number of contract offers per data offer limit anymore."); + logEntry.insert(); + } + + public void addKilledDueToOfflineTooLongMessages(DSLContext dsl, Collection connectorRefs) { + var logEntries = connectorRefs.stream() + .map(connectorRef -> buildKilledDueToOfflineTooLongMessage(dsl, connectorRef)) + .toList(); + dsl.batchInsert(logEntries).execute(); + } + + private CrawlerEventLogRecord buildKilledDueToOfflineTooLongMessage(DSLContext dsl, ConnectorRef connectorRef) { + var logEntry = newLogEntry(dsl, connectorRef); + logEntry.setEvent(CrawlerEventType.CONNECTOR_KILLED_DUE_TO_OFFLINE_FOR_TOO_LONG); + logEntry.setEventStatus(CrawlerEventStatus.OK); + logEntry.setUserMessage("Connector was marked as dead for being offline too long."); + return logEntry; + } + + private CrawlerEventLogRecord newLogEntry(DSLContext dsl, ConnectorRef connectorRef) { + var logEntry = dsl.newRecord(Tables.CRAWLER_EVENT_LOG); + logEntry.setId(UUID.randomUUID()); + logEntry.setEnvironment(connectorRef.getEnvironmentId()); + logEntry.setConnectorId(connectorRef.getConnectorId()); + logEntry.setCreatedAt(OffsetDateTime.now()); + return logEntry; + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/logging/CrawlerExecutionTimeLogger.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/logging/CrawlerExecutionTimeLogger.java new file mode 100644 index 000000000..cacf23daf --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/logging/CrawlerExecutionTimeLogger.java @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.logging; + +import de.sovity.authorityportal.db.jooq.Tables; +import de.sovity.authorityportal.db.jooq.enums.MeasurementErrorStatus; +import de.sovity.authorityportal.db.jooq.enums.MeasurementType; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; +import lombok.RequiredArgsConstructor; +import org.jooq.DSLContext; + +import java.time.OffsetDateTime; +import java.util.UUID; + +/** + * Updates a single connector. + */ +@RequiredArgsConstructor +public class CrawlerExecutionTimeLogger { + public void logExecutionTime(DSLContext dsl, ConnectorRef connectorRef, long executionTimeInMs, MeasurementErrorStatus errorStatus) { + var logEntry = dsl.newRecord(Tables.CRAWLER_EXECUTION_TIME_MEASUREMENT); + logEntry.setId(UUID.randomUUID()); + logEntry.setEnvironment(connectorRef.getEnvironmentId()); + logEntry.setConnectorId(connectorRef.getConnectorId()); + logEntry.setDurationInMs(executionTimeInMs); + logEntry.setType(MeasurementType.CONNECTOR_REFRESH); + logEntry.setErrorStatus(errorStatus); + logEntry.setCreatedAt(OffsetDateTime.now()); + logEntry.insert(); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/CatalogPatchBuilder.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/CatalogPatchBuilder.java new file mode 100644 index 000000000..e5b726c27 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/CatalogPatchBuilder.java @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.writing; + +import de.sovity.authorityportal.db.jooq.tables.records.ContractOfferRecord; +import de.sovity.authorityportal.db.jooq.tables.records.DataOfferRecord; +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.model.FetchedContractOffer; +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.model.FetchedDataOffer; +import de.sovity.edc.ext.catalog.crawler.crawling.writing.utils.DiffUtils; +import de.sovity.edc.ext.catalog.crawler.dao.CatalogPatch; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; +import de.sovity.edc.ext.catalog.crawler.dao.contract_offers.ContractOfferQueries; +import de.sovity.edc.ext.catalog.crawler.dao.contract_offers.ContractOfferRecordUpdater; +import de.sovity.edc.ext.catalog.crawler.dao.data_offers.DataOfferQueries; +import de.sovity.edc.ext.catalog.crawler.dao.data_offers.DataOfferRecordUpdater; +import lombok.RequiredArgsConstructor; +import org.jooq.DSLContext; + +import java.util.Collection; +import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; + +import static java.util.stream.Collectors.groupingBy; + +@RequiredArgsConstructor +public class CatalogPatchBuilder { + private final ContractOfferQueries contractOfferQueries; + private final DataOfferQueries dataOfferQueries; + private final DataOfferRecordUpdater dataOfferRecordUpdater; + private final ContractOfferRecordUpdater contractOfferRecordUpdater; + + /** + * Fetches existing data offers of given connector endpoint and compares them with fetched data offers. + * + * @param dsl dsl + * @param connectorRef connector + * @param fetchedDataOffers fetched data offers + * @return change list / patch + */ + public CatalogPatch buildDataOfferPatch( + DSLContext dsl, + ConnectorRef connectorRef, + Collection fetchedDataOffers + ) { + var patch = new CatalogPatch(); + var dataOffers = dataOfferQueries.findByConnectorId(dsl, connectorRef.getConnectorId()); + var contractOffersByAssetId = contractOfferQueries.findByConnectorId(dsl, connectorRef.getConnectorId()) + .stream() + .collect(groupingBy(ContractOfferRecord::getAssetId)); + + var diff = DiffUtils.compareLists( + dataOffers, + DataOfferRecord::getAssetId, + fetchedDataOffers, + FetchedDataOffer::getAssetId + ); + + diff.added().forEach(fetched -> { + var newRecord = dataOfferRecordUpdater.newDataOffer(connectorRef, fetched); + patch.dataOffers().insert(newRecord); + patchContractOffers(patch, newRecord, List.of(), fetched.getContractOffers()); + }); + + diff.updated().forEach(match -> { + var existing = match.existing(); + var fetched = match.fetched(); + + // Update Contract Offers + var contractOffers = contractOffersByAssetId.getOrDefault(existing.getAssetId(), List.of()); + var changed = patchContractOffers(patch, existing, contractOffers, fetched.getContractOffers()); + + // Update Data Offer (and update updatedAt if contractOffers changed) + changed = dataOfferRecordUpdater.updateDataOffer(existing, fetched, changed); + + if (changed) { + patch.dataOffers().update(existing); + } + }); + + diff.removed().forEach(dataOffer -> { + patch.dataOffers().delete(dataOffer); + var contractOffers = contractOffersByAssetId.getOrDefault(dataOffer.getAssetId(), List.of()); + contractOffers.forEach(it -> patch.contractOffers().delete(it)); + }); + + return patch; + } + + private boolean patchContractOffers( + CatalogPatch patch, + DataOfferRecord dataOffer, + Collection contractOffers, + Collection fetchedContractOffers + ) { + var hasUpdates = new AtomicBoolean(false); + + var diff = DiffUtils.compareLists( + contractOffers, + ContractOfferRecord::getContractOfferId, + fetchedContractOffers, + FetchedContractOffer::getContractOfferId + ); + + diff.added().forEach(fetched -> { + var newRecord = contractOfferRecordUpdater.newContractOffer(dataOffer, fetched); + patch.contractOffers().insert(newRecord); + hasUpdates.set(true); + }); + + diff.updated().forEach(match -> { + var existing = match.existing(); + var fetched = match.fetched(); + + if (contractOfferRecordUpdater.updateContractOffer(existing, fetched)) { + patch.contractOffers().update(existing); + hasUpdates.set(true); + } + }); + + diff.removed().forEach(existing -> { + patch.contractOffers().delete(existing); + hasUpdates.set(true); + }); + + return hasUpdates.get(); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/ConnectorUpdateCatalogWriter.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/ConnectorUpdateCatalogWriter.java new file mode 100644 index 000000000..ec67dd2a2 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/ConnectorUpdateCatalogWriter.java @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.writing; + +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.model.FetchedDataOffer; +import de.sovity.edc.ext.catalog.crawler.crawling.logging.ConnectorChangeTracker; +import de.sovity.edc.ext.catalog.crawler.dao.CatalogPatchApplier; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; +import lombok.RequiredArgsConstructor; +import lombok.SneakyThrows; +import org.jooq.DSLContext; + +import java.util.Collection; + +@RequiredArgsConstructor +public class ConnectorUpdateCatalogWriter { + private final CatalogPatchBuilder catalogPatchBuilder; + private final CatalogPatchApplier catalogPatchApplier; + + /** + * Updates a connector's data offers with given {@link FetchedDataOffer}s. + * + * @param dsl dsl + * @param connectorRef connector + * @param fetchedDataOffers fetched data offers + * @param changes change tracker for log message + */ + @SneakyThrows + public void updateDataOffers( + DSLContext dsl, + ConnectorRef connectorRef, + Collection fetchedDataOffers, + ConnectorChangeTracker changes + ) { + var patch = catalogPatchBuilder.buildDataOfferPatch(dsl, connectorRef, fetchedDataOffers); + changes.setNumOffersAdded(patch.dataOffers().getInsertions().size()); + changes.setNumOffersUpdated(patch.dataOffers().getUpdates().size()); + changes.setNumOffersDeleted(patch.dataOffers().getDeletions().size()); + catalogPatchApplier.applyDbUpdatesBatched(dsl, patch); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/ConnectorUpdateFailureWriter.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/ConnectorUpdateFailureWriter.java new file mode 100644 index 000000000..efa4614af --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/ConnectorUpdateFailureWriter.java @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.writing; + +import de.sovity.authorityportal.db.jooq.enums.ConnectorOnlineStatus; +import de.sovity.authorityportal.db.jooq.tables.records.ConnectorRecord; +import de.sovity.edc.ext.catalog.crawler.crawling.logging.CrawlerEventErrorMessage; +import de.sovity.edc.ext.catalog.crawler.crawling.logging.CrawlerEventLogger; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; +import lombok.RequiredArgsConstructor; +import org.eclipse.edc.spi.monitor.Monitor; +import org.jooq.DSLContext; + +import java.time.OffsetDateTime; + +@RequiredArgsConstructor +public class ConnectorUpdateFailureWriter { + private final CrawlerEventLogger crawlerEventLogger; + private final Monitor monitor; + + public void handleConnectorOffline( + DSLContext dsl, + ConnectorRef connectorRef, + ConnectorRecord connector, + Throwable e + ) { + // Log Status Change and set status to offline if necessary + if (connector.getOnlineStatus() == ConnectorOnlineStatus.ONLINE || connector.getLastRefreshAttemptAt() == null) { + monitor.info("Connector is offline: " + connector.getEndpointUrl(), e); + crawlerEventLogger.logConnectorOffline(dsl, connectorRef, getFailureMessage(e)); + connector.setOnlineStatus(ConnectorOnlineStatus.OFFLINE); + } + + connector.setLastRefreshAttemptAt(OffsetDateTime.now()); + connector.update(); + } + + public CrawlerEventErrorMessage getFailureMessage(Throwable e) { + return CrawlerEventErrorMessage.ofStackTrace("Unexpected exception during connector update.", e); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/ConnectorUpdateSuccessWriter.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/ConnectorUpdateSuccessWriter.java new file mode 100644 index 000000000..ce2ba322b --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/ConnectorUpdateSuccessWriter.java @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.writing; + +import de.sovity.authorityportal.db.jooq.enums.ConnectorOnlineStatus; +import de.sovity.authorityportal.db.jooq.tables.records.ConnectorRecord; +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.model.FetchedCatalog; +import de.sovity.edc.ext.catalog.crawler.crawling.logging.ConnectorChangeTracker; +import de.sovity.edc.ext.catalog.crawler.crawling.logging.CrawlerEventLogger; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; +import lombok.RequiredArgsConstructor; +import org.jooq.DSLContext; + +import java.time.OffsetDateTime; + +@RequiredArgsConstructor +public class ConnectorUpdateSuccessWriter { + private final CrawlerEventLogger crawlerEventLogger; + private final ConnectorUpdateCatalogWriter connectorUpdateCatalogWriter; + private final DataOfferLimitsEnforcer dataOfferLimitsEnforcer; + + public void handleConnectorOnline( + DSLContext dsl, + ConnectorRef connectorRef, + ConnectorRecord connector, + FetchedCatalog catalog + ) { + // Limit data offers and log limitation if necessary + var limitedDataOffers = dataOfferLimitsEnforcer.enforceLimits(catalog.getDataOffers()); + dataOfferLimitsEnforcer.logEnforcedLimitsIfChanged(dsl, connectorRef, connector, limitedDataOffers); + + // Log Status Change and set status to online if necessary + if (connector.getOnlineStatus() != ConnectorOnlineStatus.ONLINE || connector.getLastRefreshAttemptAt() == null) { + crawlerEventLogger.logConnectorOnline(dsl, connectorRef); + connector.setOnlineStatus(ConnectorOnlineStatus.ONLINE); + } + + // Track changes for final log message + var changes = new ConnectorChangeTracker(); + var now = OffsetDateTime.now(); + connector.setLastSuccessfulRefreshAt(now); + connector.setLastRefreshAttemptAt(now); + connector.update(); + + // Update data offers + connectorUpdateCatalogWriter.updateDataOffers( + dsl, + connectorRef, + limitedDataOffers.abbreviatedDataOffers(), + changes + ); + + // Log event if changes are present + if (!changes.isEmpty()) { + crawlerEventLogger.logConnectorUpdated(dsl, connectorRef, changes); + } + } + +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/DataOfferLimitsEnforcer.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/DataOfferLimitsEnforcer.java new file mode 100644 index 000000000..1997b0b79 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/DataOfferLimitsEnforcer.java @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.writing; + +import de.sovity.authorityportal.db.jooq.enums.ConnectorContractOffersExceeded; +import de.sovity.authorityportal.db.jooq.enums.ConnectorDataOffersExceeded; +import de.sovity.authorityportal.db.jooq.tables.records.ConnectorRecord; +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.model.FetchedDataOffer; +import de.sovity.edc.ext.catalog.crawler.crawling.logging.CrawlerEventLogger; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; +import de.sovity.edc.ext.catalog.crawler.orchestration.config.CrawlerConfig; +import lombok.RequiredArgsConstructor; +import org.jooq.DSLContext; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +@RequiredArgsConstructor +public class DataOfferLimitsEnforcer { + private final CrawlerConfig crawlerConfig; + private final CrawlerEventLogger crawlerEventLogger; + + public record DataOfferLimitsEnforced( + Collection abbreviatedDataOffers, + boolean dataOfferLimitsExceeded, + boolean contractOfferLimitsExceeded + ) { + } + + public DataOfferLimitsEnforced enforceLimits(Collection dataOffers) { + // Get limits from config + var maxDataOffers = crawlerConfig.getMaxDataOffersPerConnector(); + var maxContractOffers = crawlerConfig.getMaxContractOffersPerDataOffer(); + List offerList = new ArrayList<>(dataOffers); + + // No limits set + if (maxDataOffers == -1 && maxContractOffers == -1) { + return new DataOfferLimitsEnforced(dataOffers, false, false); + } + + // Validate if limits exceeded + var dataOfferLimitsExceeded = false; + if (maxDataOffers != -1 && offerList.size() > maxDataOffers) { + offerList = offerList.subList(0, maxDataOffers); + dataOfferLimitsExceeded = true; + } + + var contractOfferLimitsExceeded = false; + for (var dataOffer : offerList) { + var contractOffers = dataOffer.getContractOffers(); + if (contractOffers != null && maxContractOffers != -1 && contractOffers.size() > maxContractOffers) { + dataOffer.setContractOffers(contractOffers.subList(0, maxContractOffers)); + contractOfferLimitsExceeded = true; + } + } + + // Create new list with limited offers + return new DataOfferLimitsEnforced(offerList, dataOfferLimitsExceeded, contractOfferLimitsExceeded); + } + + public void logEnforcedLimitsIfChanged( + DSLContext dsl, + ConnectorRef connectorRef, + ConnectorRecord connector, + DataOfferLimitsEnforced enforcedLimits + ) { + + // DataOffer + if (enforcedLimits.dataOfferLimitsExceeded() && connector.getDataOffersExceeded() == ConnectorDataOffersExceeded.OK) { + var maxDataOffers = crawlerConfig.getMaxDataOffersPerConnector(); + crawlerEventLogger.logConnectorUpdateDataOfferLimitExceeded(dsl, connectorRef, maxDataOffers); + connector.setDataOffersExceeded(ConnectorDataOffersExceeded.EXCEEDED); + } else if (!enforcedLimits.dataOfferLimitsExceeded() && connector.getDataOffersExceeded() == ConnectorDataOffersExceeded.EXCEEDED) { + crawlerEventLogger.logConnectorUpdateDataOfferLimitOk(dsl, connectorRef); + connector.setDataOffersExceeded(ConnectorDataOffersExceeded.OK); + } + + // ContractOffer + if (enforcedLimits.contractOfferLimitsExceeded() && connector.getContractOffersExceeded() == ConnectorContractOffersExceeded.OK) { + var maxContractOffers = crawlerConfig.getMaxContractOffersPerDataOffer(); + crawlerEventLogger.logConnectorUpdateContractOfferLimitExceeded(dsl, connectorRef, maxContractOffers); + connector.setContractOffersExceeded(ConnectorContractOffersExceeded.EXCEEDED); + } else if (!enforcedLimits.contractOfferLimitsExceeded() && + connector.getContractOffersExceeded() == ConnectorContractOffersExceeded.EXCEEDED) { + crawlerEventLogger.logConnectorUpdateContractOfferLimitOk(dsl, connectorRef); + connector.setContractOffersExceeded(ConnectorContractOffersExceeded.OK); + } + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/utils/ChangeTracker.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/utils/ChangeTracker.java new file mode 100644 index 000000000..dad7f9cae --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/utils/ChangeTracker.java @@ -0,0 +1,36 @@ +package de.sovity.edc.ext.catalog.crawler.crawling.writing.utils; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NoArgsConstructor; + +import java.util.Objects; +import java.util.function.BiPredicate; +import java.util.function.Consumer; + +@Getter +@NoArgsConstructor +@AllArgsConstructor +public class ChangeTracker { + private boolean changed = false; + + public void setIfChanged( + T existing, + T fetched, + Consumer setter + ) { + setIfChanged(existing, fetched, setter, Objects::equals); + } + + public void setIfChanged( + T existing, + T fetched, + Consumer setter, + BiPredicate equalityChecker + ) { + if (!equalityChecker.test(existing, fetched)) { + setter.accept(fetched); + changed = true; + } + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/utils/DiffUtils.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/utils/DiffUtils.java new file mode 100644 index 000000000..7db2dac4f --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/utils/DiffUtils.java @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.writing.utils; + +import de.sovity.edc.ext.catalog.crawler.utils.MapUtils; +import lombok.AccessLevel; +import lombok.NoArgsConstructor; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.function.Function; + + +@NoArgsConstructor(access = AccessLevel.PRIVATE) +public class DiffUtils { + /** + * Tries to match two collections by a key, then collects planned change sets as {@link DiffResult}. + * + * @param existing list of existing elements + * @param existingKeyFn existing elements key extractor + * @param fetched list of fetched elements + * @param fetchedKeyFn fetched elements key extractor + * @param first collection type + * @param second collection type + * @param key type + */ + public static DiffResult compareLists( + Collection existing, + Function existingKeyFn, + Collection fetched, + Function fetchedKeyFn + ) { + var existingByKey = MapUtils.associateBy(existing, existingKeyFn); + var fetchedByKey = MapUtils.associateBy(fetched, fetchedKeyFn); + + var keys = new HashSet<>(existingByKey.keySet()); + keys.addAll(fetchedByKey.keySet()); + + var result = new DiffResult(); + + keys.forEach(key -> { + var existingItem = existingByKey.get(key); + var fetchedItem = fetchedByKey.get(key); + + if (existingItem == null) { + result.added.add(fetchedItem); + } else if (fetchedItem == null) { + result.removed.add(existingItem); + } else { + result.updated.add(new DiffResultMatch<>(existingItem, fetchedItem)); + } + }); + + return result; + } + + /** + * Result of comparing two collections by keys. + * + * @param added elements that are present in fetched collection but not in existing + * @param updated elements that are present in both collections + * @param removed elements that are present in existing collection but not in fetched + * @param existing item type + * @param fetched item type + */ + public record DiffResult(List added, List> updated, List removed) { + DiffResult() { + this(new ArrayList<>(), new ArrayList<>(), new ArrayList<>()); + } + } + + /** + * Pair of elements that are present in both collections. + * + * @param existing existing item + * @param fetched fetched item + * @param existing item type + * @param fetched item type + */ + public record DiffResultMatch(A existing, B fetched) { + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/CatalogCleaner.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/CatalogCleaner.java new file mode 100644 index 000000000..3be0e8809 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/CatalogCleaner.java @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.dao; + +import de.sovity.authorityportal.db.jooq.Tables; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; +import de.sovity.edc.ext.catalog.crawler.dao.utils.PostgresqlUtils; +import lombok.RequiredArgsConstructor; +import org.jooq.DSLContext; + +import java.util.Collection; + +import static java.util.stream.Collectors.toSet; + +@RequiredArgsConstructor +public class CatalogCleaner { + + public void removeCatalogByConnectors(DSLContext dsl, Collection connectorRefs) { + var co = Tables.CONTRACT_OFFER; + var d = Tables.DATA_OFFER; + + var connectorIds = connectorRefs.stream().map(ConnectorRef::getConnectorId).collect(toSet()); + + dsl.deleteFrom(co).where(PostgresqlUtils.in(co.CONNECTOR_ID, connectorIds)).execute(); + dsl.deleteFrom(d).where(PostgresqlUtils.in(d.CONNECTOR_ID, connectorIds)).execute(); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/CatalogPatch.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/CatalogPatch.java new file mode 100644 index 000000000..2a3df4ab4 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/CatalogPatch.java @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.dao; + +import de.sovity.authorityportal.db.jooq.tables.records.ContractOfferRecord; +import de.sovity.authorityportal.db.jooq.tables.records.DataOfferRecord; +import de.sovity.edc.ext.catalog.crawler.dao.utils.RecordPatch; +import lombok.AccessLevel; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; +import lombok.experimental.FieldDefaults; + +import java.util.List; + +/** + * Contains planned DB Row changes to be applied as batch. + */ +@Getter +@Setter +@Accessors(fluent = true) +@FieldDefaults(level = AccessLevel.PRIVATE) +public class CatalogPatch { + RecordPatch dataOffers = new RecordPatch<>(); + RecordPatch contractOffers = new RecordPatch<>(); + + public List> insertionOrder() { + return List.of(dataOffers, contractOffers); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/CatalogPatchApplier.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/CatalogPatchApplier.java new file mode 100644 index 000000000..9dce2fc9d --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/CatalogPatchApplier.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.dao; + +import de.sovity.edc.ext.catalog.crawler.dao.utils.RecordPatch; +import de.sovity.edc.ext.catalog.crawler.utils.CollectionUtils2; +import lombok.RequiredArgsConstructor; +import lombok.SneakyThrows; +import org.jooq.DSLContext; + +@RequiredArgsConstructor +public class CatalogPatchApplier { + + @SneakyThrows + public void applyDbUpdatesBatched(DSLContext dsl, CatalogPatch catalogPatch) { + var insertionOrder = catalogPatch.insertionOrder(); + var deletionOrder = CollectionUtils2.reverse(insertionOrder); + + insertionOrder.stream() + .map(RecordPatch::getInsertions) + .filter(CollectionUtils2::isNotEmpty) + .forEach(it -> dsl.batchInsert(it).execute()); + + insertionOrder.stream() + .map(RecordPatch::getUpdates) + .filter(CollectionUtils2::isNotEmpty) + .forEach(it -> dsl.batchUpdate(it).execute()); + + deletionOrder.stream() + .map(RecordPatch::getDeletions) + .filter(CollectionUtils2::isNotEmpty) + .forEach(it -> dsl.batchDelete(it).execute()); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/config/DataSourceFactory.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/config/DataSourceFactory.java new file mode 100644 index 000000000..4322be3ba --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/config/DataSourceFactory.java @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.dao.config; + +import com.zaxxer.hikari.HikariDataSource; +import de.sovity.edc.ext.catalog.crawler.CrawlerConfigProps; +import de.sovity.edc.extension.postgresql.HikariDataSourceFactory; +import de.sovity.edc.extension.postgresql.JdbcCredentials; +import lombok.RequiredArgsConstructor; +import org.eclipse.edc.spi.system.configuration.Config; + +import javax.sql.DataSource; + +@RequiredArgsConstructor +public class DataSourceFactory { + private final Config config; + + + /** + * Create a new {@link DataSource} from EDC Config. + * + * @return {@link DataSource}. + */ + public HikariDataSource newDataSource() { + var jdbcCredentials = getJdbcCredentials(); + int maxPoolSize = CrawlerConfigProps.CRAWLER_DB_CONNECTION_POOL_SIZE.getInt(config); + int connectionTimeoutInMs = CrawlerConfigProps.CRAWLER_DB_CONNECTION_TIMEOUT_IN_MS.getInt(config); + return HikariDataSourceFactory.newDataSource( + jdbcCredentials, + maxPoolSize, + connectionTimeoutInMs + ); + } + + + public JdbcCredentials getJdbcCredentials() { + return new JdbcCredentials( + CrawlerConfigProps.CRAWLER_DB_JDBC_URL.getStringOrThrow(config), + CrawlerConfigProps.CRAWLER_DB_JDBC_USER.getStringOrThrow(config), + CrawlerConfigProps.CRAWLER_DB_JDBC_PASSWORD.getStringOrThrow(config) + ); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/config/DslContextFactory.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/config/DslContextFactory.java new file mode 100644 index 000000000..3d4f10bfd --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/config/DslContextFactory.java @@ -0,0 +1,67 @@ +package de.sovity.edc.ext.catalog.crawler.dao.config; + +import lombok.RequiredArgsConstructor; +import org.jooq.DSLContext; +import org.jooq.SQLDialect; +import org.jooq.impl.DSL; + +import javax.sql.DataSource; +import java.util.function.Consumer; +import java.util.function.Function; + +/** + * Quickly launch {@link org.jooq.DSLContext}s from EDC configuration. + */ +@RequiredArgsConstructor +public class DslContextFactory { + private final DataSource dataSource; + + /** + * Create new {@link DSLContext} for querying DB. + * + * @return new {@link DSLContext} + */ + public DSLContext newDslContext() { + return DSL.using(dataSource, SQLDialect.POSTGRES); + } + + /** + * Utility method for when the {@link DSLContext} will be used only for a single transaction. + *
+ * An example would be a REST request. + * + * @param return type + * @return new {@link DSLContext} + opened transaction + */ + public R transactionResult(Function function) { + return newDslContext().transactionResult(transaction -> function.apply(transaction.dsl())); + } + + /** + * Utility method for when the {@link DSLContext} will be used only for a single transaction. + *
+ * An example would be a REST request. + */ + public void transaction(Consumer function) { + newDslContext().transaction(transaction -> function.accept(transaction.dsl())); + } + + /** + * Runs given code within a test transaction. + * + * @param code code to run within the test transaction + */ + public void testTransaction(Consumer code) { + try { + transaction(dsl -> { + code.accept(dsl); + throw new TestTransactionNoopException(); + }); + } catch (TestTransactionNoopException e) { + // Ignore + } + } + + private static class TestTransactionNoopException extends RuntimeException { + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/connectors/ConnectorQueries.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/connectors/ConnectorQueries.java new file mode 100644 index 000000000..75ff26274 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/connectors/ConnectorQueries.java @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.dao.connectors; + +import de.sovity.authorityportal.db.jooq.Tables; +import de.sovity.authorityportal.db.jooq.enums.ConnectorOnlineStatus; +import de.sovity.authorityportal.db.jooq.tables.Connector; +import de.sovity.authorityportal.db.jooq.tables.Organization; +import de.sovity.authorityportal.db.jooq.tables.records.ConnectorRecord; +import de.sovity.edc.ext.catalog.crawler.orchestration.config.CrawlerConfig; +import lombok.RequiredArgsConstructor; +import org.jetbrains.annotations.NotNull; +import org.jooq.Condition; +import org.jooq.DSLContext; + +import java.time.Duration; +import java.time.OffsetDateTime; +import java.util.HashSet; +import java.util.Set; +import java.util.function.BiFunction; + +@RequiredArgsConstructor +public class ConnectorQueries { + private final CrawlerConfig crawlerConfig; + + public ConnectorRecord findByConnectorId(DSLContext dsl, String connectorId) { + var c = Tables.CONNECTOR; + return dsl.fetchOne(c, c.CONNECTOR_ID.eq(connectorId)); + } + + public Set findConnectorsForScheduledRefresh(DSLContext dsl, ConnectorOnlineStatus onlineStatus) { + return queryConnectorRefs(dsl, (c, o) -> c.ONLINE_STATUS.eq(onlineStatus)); + } + + public Set findAllConnectorsForKilling(DSLContext dsl, Duration deleteOfflineConnectorsAfter) { + var minLastRefresh = OffsetDateTime.now().minus(deleteOfflineConnectorsAfter); + return queryConnectorRefs(dsl, (c, o) -> c.LAST_SUCCESSFUL_REFRESH_AT.lt(minLastRefresh)); + } + + @NotNull + private Set queryConnectorRefs( + DSLContext dsl, + BiFunction condition + ) { + var c = Tables.CONNECTOR; + var o = Tables.ORGANIZATION; + var query = dsl.select( + c.CONNECTOR_ID.as("connectorId"), + c.ENVIRONMENT.as("environmentId"), + o.NAME.as("organizationLegalName"), + o.ID.as("organizationId"), + c.ENDPOINT_URL.as("endpoint") + ) + .from(c) + .leftJoin(o).on(c.ORGANIZATION_ID.eq(o.ID)) + .where(condition.apply(c, o), c.ENVIRONMENT.eq(crawlerConfig.getEnvironmentId()), c.ENDPOINT_URL.isNotNull()) + .fetchInto(ConnectorRef.class); + + return new HashSet<>(query); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/connectors/ConnectorRef.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/connectors/ConnectorRef.java new file mode 100644 index 000000000..3d94fca72 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/connectors/ConnectorRef.java @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.dao.connectors; + +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; + +@Getter +@RequiredArgsConstructor +@EqualsAndHashCode(of = "connectorId", callSuper = false) +@ToString(of = "connectorId") +public class ConnectorRef { + private final String connectorId; + private final String environmentId; + private final String organizationLegalName; + private final String organizationId; + private final String endpoint; +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/connectors/ConnectorStatusUpdater.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/connectors/ConnectorStatusUpdater.java new file mode 100644 index 000000000..f806c2f03 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/connectors/ConnectorStatusUpdater.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.dao.connectors; + +import de.sovity.authorityportal.db.jooq.Tables; +import de.sovity.authorityportal.db.jooq.enums.ConnectorOnlineStatus; +import de.sovity.edc.ext.catalog.crawler.dao.utils.PostgresqlUtils; +import org.jooq.DSLContext; + +import java.util.Collection; +import java.util.stream.Collectors; + +public class ConnectorStatusUpdater { + public void markAsDead(DSLContext dsl, Collection connectorRefs) { + var connectorIds = connectorRefs.stream() + .map(ConnectorRef::getConnectorId) + .collect(Collectors.toSet()); + var c = Tables.CONNECTOR; + dsl.update(c).set(c.ONLINE_STATUS, ConnectorOnlineStatus.DEAD) + .where(PostgresqlUtils.in(c.CONNECTOR_ID, connectorIds)).execute(); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/contract_offers/ContractOfferQueries.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/contract_offers/ContractOfferQueries.java new file mode 100644 index 000000000..8b35d791f --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/contract_offers/ContractOfferQueries.java @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.dao.contract_offers; + +import de.sovity.authorityportal.db.jooq.Tables; +import de.sovity.authorityportal.db.jooq.tables.records.ContractOfferRecord; +import org.jooq.DSLContext; + +import java.util.List; + +public class ContractOfferQueries { + + public List findByConnectorId(DSLContext dsl, String connectorId) { + var co = Tables.CONTRACT_OFFER; + return dsl.selectFrom(co).where(co.CONNECTOR_ID.eq(connectorId)).stream().toList(); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/contract_offers/ContractOfferRecordUpdater.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/contract_offers/ContractOfferRecordUpdater.java new file mode 100644 index 000000000..b0f5da153 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/contract_offers/ContractOfferRecordUpdater.java @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.dao.contract_offers; + +import de.sovity.authorityportal.db.jooq.tables.records.ContractOfferRecord; +import de.sovity.authorityportal.db.jooq.tables.records.DataOfferRecord; +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.model.FetchedContractOffer; +import de.sovity.edc.ext.catalog.crawler.crawling.writing.utils.ChangeTracker; +import de.sovity.edc.ext.catalog.crawler.dao.utils.JsonbUtils; +import de.sovity.edc.ext.catalog.crawler.utils.JsonUtils2; +import lombok.RequiredArgsConstructor; +import org.jooq.JSONB; + +import java.time.OffsetDateTime; + +/** + * Creates or updates {@link ContractOfferRecord} DB Rows. + *

+ * (Or at least prepares them for batch inserts / updates) + */ +@RequiredArgsConstructor +public class ContractOfferRecordUpdater { + + /** + * Create new {@link ContractOfferRecord} from {@link FetchedContractOffer}. + * + * @param dataOffer parent data offer db row + * @param fetchedContractOffer fetched contract offer + * @return new db row + */ + public ContractOfferRecord newContractOffer( + DataOfferRecord dataOffer, + FetchedContractOffer fetchedContractOffer + ) { + var contractOffer = new ContractOfferRecord(); + + contractOffer.setConnectorId(dataOffer.getConnectorId()); + contractOffer.setContractOfferId(fetchedContractOffer.getContractOfferId()); + contractOffer.setAssetId(dataOffer.getAssetId()); + contractOffer.setCreatedAt(OffsetDateTime.now()); + updateContractOffer(contractOffer, fetchedContractOffer); + return contractOffer; + } + + /** + * Update existing {@link ContractOfferRecord} with changes from {@link FetchedContractOffer}. + * + * @param contractOffer existing row + * @param fetchedContractOffer changes to be integrated + * @return if anything was changed + */ + public boolean updateContractOffer( + ContractOfferRecord contractOffer, + FetchedContractOffer fetchedContractOffer + ) { + var changes = new ChangeTracker(); + + changes.setIfChanged( + JsonbUtils.getDataOrNull(contractOffer.getUiPolicyJson()), + fetchedContractOffer.getUiPolicyJson(), + it -> contractOffer.setUiPolicyJson(JSONB.jsonb(it)), + JsonUtils2::isEqualJson + ); + + if (changes.isChanged()) { + contractOffer.setUpdatedAt(OffsetDateTime.now()); + } + + return changes.isChanged(); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/data_offers/DataOfferQueries.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/data_offers/DataOfferQueries.java new file mode 100644 index 000000000..e9b9f5ee9 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/data_offers/DataOfferQueries.java @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.dao.data_offers; + +import de.sovity.authorityportal.db.jooq.Tables; +import de.sovity.authorityportal.db.jooq.tables.records.DataOfferRecord; +import lombok.RequiredArgsConstructor; +import org.jooq.DSLContext; + +import java.util.List; + +@RequiredArgsConstructor +public class DataOfferQueries { + + public List findByConnectorId(DSLContext dsl, String connectorId) { + var d = Tables.DATA_OFFER; + return dsl.selectFrom(d).where(d.CONNECTOR_ID.eq(connectorId)).stream().toList(); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/data_offers/DataOfferRecordUpdater.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/data_offers/DataOfferRecordUpdater.java new file mode 100644 index 000000000..9f1c3e676 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/data_offers/DataOfferRecordUpdater.java @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.dao.data_offers; + +import de.sovity.authorityportal.db.jooq.tables.records.DataOfferRecord; +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.model.FetchedDataOffer; +import de.sovity.edc.ext.catalog.crawler.crawling.writing.utils.ChangeTracker; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; +import de.sovity.edc.ext.catalog.crawler.dao.utils.JsonbUtils; +import de.sovity.edc.ext.catalog.crawler.utils.JsonUtils2; +import de.sovity.edc.ext.wrapper.api.common.mappers.asset.utils.ShortDescriptionBuilder; +import lombok.RequiredArgsConstructor; +import org.jooq.JSONB; + +import java.time.OffsetDateTime; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +/** + * Creates or updates {@link DataOfferRecord} DB Rows. + *

+ * (Or at least prepares them for batch inserts / updates) + */ +@RequiredArgsConstructor +public class DataOfferRecordUpdater { + private final ShortDescriptionBuilder shortDescriptionBuilder; + + /** + * Create a new {@link DataOfferRecord}. + * + * @param connectorRef connector + * @param fetchedDataOffer new db row data + * @return new db row + */ + public DataOfferRecord newDataOffer( + ConnectorRef connectorRef, + FetchedDataOffer fetchedDataOffer + ) { + var dataOffer = new DataOfferRecord(); + var connectorId = connectorRef.getConnectorId(); + + dataOffer.setConnectorId(connectorId); + dataOffer.setAssetId(fetchedDataOffer.getAssetId()); + dataOffer.setCreatedAt(OffsetDateTime.now()); + updateDataOffer(dataOffer, fetchedDataOffer, true); + return dataOffer; + } + + + /** + * Update existing {@link DataOfferRecord}. + * + * @param record existing row + * @param fetchedDataOffer changes to be incorporated + * @param changed whether the data offer should be marked as updated simply because the contract offers changed + * @return whether any fields were updated + */ + public boolean updateDataOffer( + DataOfferRecord record, + FetchedDataOffer fetchedDataOffer, + boolean changed + ) { + var asset = fetchedDataOffer.getUiAsset(); + var changes = new ChangeTracker(changed); + + changes.setIfChanged( + blankIfNull(record.getAssetTitle()), + blankIfNull(asset.getTitle()), + record::setAssetTitle + ); + + changes.setIfChanged( + blankIfNull(record.getDescriptionNoMarkdown()), + shortDescriptionBuilder.extractMarkdownText(blankIfNull(asset.getDescription())), + record::setDescriptionNoMarkdown + ); + + changes.setIfChanged( + blankIfNull(record.getShortDescriptionNoMarkdown()), + blankIfNull(asset.getDescriptionShortText()), + record::setShortDescriptionNoMarkdown + ); + + changes.setIfChanged( + blankIfNull(record.getDataCategory()), + blankIfNull(asset.getDataCategory()), + record::setDataCategory + ); + + changes.setIfChanged( + blankIfNull(record.getDataSubcategory()), + blankIfNull(asset.getDataSubcategory()), + record::setDataSubcategory + ); + + changes.setIfChanged( + blankIfNull(record.getDataModel()), + blankIfNull(asset.getDataModel()), + record::setDataModel + ); + + changes.setIfChanged( + blankIfNull(record.getTransportMode()), + blankIfNull(asset.getTransportMode()), + record::setTransportMode + ); + + changes.setIfChanged( + blankIfNull(record.getGeoReferenceMethod()), + blankIfNull(asset.getGeoReferenceMethod()), + record::setGeoReferenceMethod + ); + + changes.setIfChanged( + emptyIfNull(record.getKeywords()), + emptyIfNull(asset.getKeywords()), + it -> { + record.setKeywords(new ArrayList<>(it)); + record.setKeywordsCommaJoined(String.join(", ", it)); + } + ); + + changes.setIfChanged( + JsonbUtils.getDataOrNull(record.getUiAssetJson()), + fetchedDataOffer.getUiAssetJson(), + it -> record.setUiAssetJson(JSONB.jsonb(it)), + JsonUtils2::isEqualJson + ); + + if (changes.isChanged()) { + record.setUpdatedAt(OffsetDateTime.now()); + } + + return changes.isChanged(); + } + + private String blankIfNull(String string) { + return string == null ? "" : string; + } + + private Collection emptyIfNull(Collection collection) { + return collection == null ? List.of() : collection; + } + + private Collection emptyIfNull(T[] array) { + return array == null ? List.of() : Arrays.asList(array); + } + +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/utils/JsonbUtils.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/utils/JsonbUtils.java new file mode 100644 index 000000000..c3afdbabd --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/utils/JsonbUtils.java @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.dao.utils; + +import lombok.AccessLevel; +import lombok.NoArgsConstructor; +import org.jooq.JSONB; + +/** + * Utilities for dealing with {@link org.jooq.JSONB} fields. + */ +@NoArgsConstructor(access = AccessLevel.PRIVATE) +public class JsonbUtils { + + /** + * Returns the data of the given {@link JSONB} or null. + * + * @param jsonb {@link org.jooq.JSON} + * @return data or null + */ + public static String getDataOrNull(JSONB jsonb) { + return jsonb == null ? null : jsonb.data(); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/utils/PostgresqlUtils.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/utils/PostgresqlUtils.java new file mode 100644 index 000000000..4c7cd3e41 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/utils/PostgresqlUtils.java @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.dao.utils; + +import lombok.AccessLevel; +import lombok.NoArgsConstructor; +import org.jooq.Condition; +import org.jooq.Field; +import org.jooq.impl.DSL; + +import java.util.Collection; + +/** + * PostgreSQL + JooQ Utils + */ +@NoArgsConstructor(access = AccessLevel.PRIVATE) +public class PostgresqlUtils { + + /** + * Replaces the IN operation with "field = ANY(...)" + * + * @param field field + * @param values values + * @return condition + */ + public static Condition in(Field field, Collection values) { + return field.eq(DSL.any(values.toArray(String[]::new))); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/utils/RecordPatch.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/utils/RecordPatch.java new file mode 100644 index 000000000..99c6a025a --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/dao/utils/RecordPatch.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.dao.utils; + +import lombok.AccessLevel; +import lombok.Getter; +import lombok.experimental.FieldDefaults; +import org.jooq.UpdatableRecord; + +import java.util.ArrayList; +import java.util.List; + +/** + * Contains planned DB Row changes to be applied as batch. + */ +@Getter +@FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true) +public class RecordPatch> { + List insertions = new ArrayList<>(); + List updates = new ArrayList<>(); + List deletions = new ArrayList<>(); + + public void insert(T record) { + insertions.add(record); + } + + public void update(T record) { + updates.add(record); + } + + public void delete(T record) { + deletions.add(record); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/config/CrawlerConfig.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/config/CrawlerConfig.java new file mode 100644 index 000000000..af3e2a0f4 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/config/CrawlerConfig.java @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.orchestration.config; + +import lombok.Builder; +import lombok.Value; + +import java.time.Duration; + +@Value +@Builder +public class CrawlerConfig { + String environmentId; + int numThreads; + + Duration killOfflineConnectorsAfter; + + int maxDataOffersPerConnector; + int maxContractOffersPerDataOffer; +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/config/CrawlerConfigFactory.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/config/CrawlerConfigFactory.java new file mode 100644 index 000000000..c208c6052 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/config/CrawlerConfigFactory.java @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.orchestration.config; + +import de.sovity.edc.ext.catalog.crawler.CrawlerConfigProps; +import lombok.RequiredArgsConstructor; +import org.eclipse.edc.spi.system.configuration.Config; + +import java.time.Duration; + +@RequiredArgsConstructor +public class CrawlerConfigFactory { + private final Config config; + + public CrawlerConfig buildCrawlerConfig() { + var environmentId = CrawlerConfigProps.CRAWLER_ENVIRONMENT_ID.getStringOrThrow(config); + var numThreads = CrawlerConfigProps.CRAWLER_NUM_THREADS.getInt(config); + var killOfflineConnectorsAfter = Duration.parse(CrawlerConfigProps.CRAWLER_KILL_OFFLINE_CONNECTORS_AFTER.getStringOrThrow(config)); + var maxDataOffers = CrawlerConfigProps.CRAWLER_MAX_DATA_OFFERS_PER_CONNECTOR.getInt(config); + var maxContractOffers = CrawlerConfigProps.CRAWLER_MAX_CONTRACT_OFFERS_PER_DATA_OFFER.getInt(config); + + return CrawlerConfig.builder() + .environmentId(environmentId) + .numThreads(numThreads) + .killOfflineConnectorsAfter(killOfflineConnectorsAfter) + .maxDataOffersPerConnector(maxDataOffers) + .maxContractOffersPerDataOffer(maxContractOffers) + .build(); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/config/EdcConfigPropertyUtils.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/config/EdcConfigPropertyUtils.java new file mode 100644 index 000000000..c0d9bdb5f --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/config/EdcConfigPropertyUtils.java @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.orchestration.config; + +import lombok.AccessLevel; +import lombok.NoArgsConstructor; + +import java.util.Arrays; + +import static java.util.stream.Collectors.joining; + +@NoArgsConstructor(access = AccessLevel.PRIVATE) +public class EdcConfigPropertyUtils { + /** + * For better refactoring it is better if the string constant is + * found in the code as it is used and documented. + * + * @param envVarName e.g. "MY_EDC_PROP" + * @return e.g. "my.edc.prop" + */ + public static String toEdcProp(String envVarName) { + return Arrays.stream(envVarName.split("_")) + .map(String::toLowerCase) + .collect(joining(".")); + } + +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/queue/ConnectorQueue.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/queue/ConnectorQueue.java new file mode 100644 index 000000000..e46808a3a --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/queue/ConnectorQueue.java @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.orchestration.queue; + +import de.sovity.edc.ext.catalog.crawler.crawling.ConnectorCrawler; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; +import lombok.RequiredArgsConstructor; + +import java.util.ArrayList; +import java.util.Collection; + +@RequiredArgsConstructor +public class ConnectorQueue { + private final ConnectorCrawler connectorCrawler; + private final ThreadPool threadPool; + + /** + * Enqueues connectors for update. + * + * @param connectorRefs connectors + * @param priority priority from {@link ConnectorRefreshPriority} + */ + public void addAll(Collection connectorRefs, int priority) { + var queued = threadPool.getQueuedConnectorRefs(); + connectorRefs = new ArrayList<>(connectorRefs); + connectorRefs.removeIf(queued::contains); + + for (var connectorRef : connectorRefs) { + threadPool.enqueueConnectorRefreshTask( + priority, + () -> connectorCrawler.crawlConnector(connectorRef), + connectorRef + ); + } + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/queue/ConnectorQueueFiller.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/queue/ConnectorQueueFiller.java new file mode 100644 index 000000000..350b61545 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/queue/ConnectorQueueFiller.java @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.orchestration.queue; + +import de.sovity.authorityportal.db.jooq.enums.ConnectorOnlineStatus; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorQueries; +import lombok.RequiredArgsConstructor; +import org.jooq.DSLContext; + +@RequiredArgsConstructor +public class ConnectorQueueFiller { + private final ConnectorQueue connectorQueue; + private final ConnectorQueries connectorQueries; + + public void enqueueConnectors(DSLContext dsl, ConnectorOnlineStatus status, int priority) { + var connectorRefs = connectorQueries.findConnectorsForScheduledRefresh(dsl, status); + connectorQueue.addAll(connectorRefs, priority); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/queue/ConnectorRefreshPriority.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/queue/ConnectorRefreshPriority.java new file mode 100644 index 000000000..541c8528b --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/queue/ConnectorRefreshPriority.java @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.orchestration.queue; + +import lombok.NoArgsConstructor; + +@NoArgsConstructor(access = lombok.AccessLevel.PRIVATE) +public class ConnectorRefreshPriority { + public static final int SCHEDULED_ONLINE_REFRESH = 100; + public static final int SCHEDULED_OFFLINE_REFRESH = 200; + public static final int SCHEDULED_DEAD_REFRESH = 300; +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/queue/ThreadPool.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/queue/ThreadPool.java new file mode 100644 index 000000000..857bf32ed --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/queue/ThreadPool.java @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.orchestration.queue; + +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; +import de.sovity.edc.ext.catalog.crawler.orchestration.config.CrawlerConfig; +import org.eclipse.edc.spi.monitor.Monitor; + +import java.util.Set; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; + +public class ThreadPool { + private final ThreadPoolTaskQueue queue; + + private final boolean enabled; + private final ThreadPoolExecutor threadPoolExecutor; + + public ThreadPool(ThreadPoolTaskQueue queue, CrawlerConfig crawlerConfig, Monitor monitor) { + this.queue = queue; + int numThreads = crawlerConfig.getNumThreads(); + enabled = numThreads > 0; + + if (enabled) { + monitor.info("Initializing ThreadPoolExecutor with %d threads.".formatted(numThreads)); + threadPoolExecutor = new ThreadPoolExecutor( + numThreads, + numThreads, + 60, + TimeUnit.SECONDS, + queue.getAsRunnableQueue() + ); + threadPoolExecutor.prestartAllCoreThreads(); + } else { + monitor.info("Skipped ThreadPoolExecutor initialization."); + threadPoolExecutor = null; + } + } + + public void enqueueConnectorRefreshTask(int priority, Runnable runnable, ConnectorRef connectorRef) { + enqueueTask(new ThreadPoolTask(priority, runnable, connectorRef)); + } + + public Set getQueuedConnectorRefs() { + return queue.getConnectorRefs(); + } + + private void enqueueTask(ThreadPoolTask task) { + if (enabled) { + threadPoolExecutor.execute(task); + } else { + // Only relevant for test environment, where execution is disabled + queue.add(task); + } + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/queue/ThreadPoolTask.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/queue/ThreadPoolTask.java new file mode 100644 index 000000000..66712b2d7 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/queue/ThreadPoolTask.java @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.orchestration.queue; + +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; +import lombok.Getter; +import lombok.RequiredArgsConstructor; + +import java.util.Comparator; +import java.util.concurrent.atomic.AtomicLong; + + +@Getter +@RequiredArgsConstructor +public class ThreadPoolTask implements Runnable { + + public static final Comparator COMPARATOR = Comparator.comparing(ThreadPoolTask::getPriority) + .thenComparing(ThreadPoolTask::getSequence); + + /** + * {@link java.util.concurrent.PriorityBlockingQueue} does not guarantee sequential execution, so we need to add this. + */ + private static final AtomicLong SEQ = new AtomicLong(0); + private final long sequence = SEQ.incrementAndGet(); + private final int priority; + private final Runnable task; + private final ConnectorRef connectorRef; + + @Override + public void run() { + this.task.run(); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/queue/ThreadPoolTaskQueue.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/queue/ThreadPoolTaskQueue.java new file mode 100644 index 000000000..dc2b5598b --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/queue/ThreadPoolTaskQueue.java @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.orchestration.queue; + +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; +import lombok.Getter; +import lombok.RequiredArgsConstructor; + +import java.util.ArrayList; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.PriorityBlockingQueue; +import java.util.stream.Collectors; + +@RequiredArgsConstructor +public class ThreadPoolTaskQueue { + + @Getter + private final PriorityBlockingQueue queue = new PriorityBlockingQueue<>(50, ThreadPoolTask.COMPARATOR); + + @SuppressWarnings("unchecked") + public PriorityBlockingQueue getAsRunnableQueue() { + return (PriorityBlockingQueue) (PriorityBlockingQueue) queue; + } + + public void add(ThreadPoolTask task) { + queue.add(task); + } + + public Set getConnectorRefs() { + var queuedRunnables = new ArrayList<>(queue); + + return queuedRunnables.stream() + .map(ThreadPoolTask::getConnectorRef) + .filter(Objects::nonNull) + .collect(Collectors.toSet()); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/schedules/DeadConnectorRefreshJob.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/schedules/DeadConnectorRefreshJob.java new file mode 100644 index 000000000..df86a21b1 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/schedules/DeadConnectorRefreshJob.java @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.orchestration.schedules; + +import de.sovity.authorityportal.db.jooq.enums.ConnectorOnlineStatus; +import de.sovity.edc.ext.catalog.crawler.dao.config.DslContextFactory; +import de.sovity.edc.ext.catalog.crawler.orchestration.queue.ConnectorQueueFiller; +import de.sovity.edc.ext.catalog.crawler.orchestration.queue.ConnectorRefreshPriority; +import lombok.RequiredArgsConstructor; +import org.quartz.Job; +import org.quartz.JobExecutionContext; + +@RequiredArgsConstructor +public class DeadConnectorRefreshJob implements Job { + private final DslContextFactory dslContextFactory; + private final ConnectorQueueFiller connectorQueueFiller; + + @Override + public void execute(JobExecutionContext context) { + dslContextFactory.transaction(dsl -> connectorQueueFiller.enqueueConnectors(dsl, + ConnectorOnlineStatus.DEAD, ConnectorRefreshPriority.SCHEDULED_DEAD_REFRESH)); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/schedules/OfflineConnectorCleanerJob.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/schedules/OfflineConnectorCleanerJob.java new file mode 100644 index 000000000..2c2e71853 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/schedules/OfflineConnectorCleanerJob.java @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.orchestration.schedules; + +import de.sovity.edc.ext.catalog.crawler.crawling.OfflineConnectorCleaner; +import de.sovity.edc.ext.catalog.crawler.dao.config.DslContextFactory; +import lombok.RequiredArgsConstructor; +import org.quartz.Job; +import org.quartz.JobExecutionContext; + +@RequiredArgsConstructor +public class OfflineConnectorCleanerJob implements Job { + private final DslContextFactory dslContextFactory; + private final OfflineConnectorCleaner offlineConnectorCleaner; + + @Override + public void execute(JobExecutionContext context) { + dslContextFactory.transaction(offlineConnectorCleaner::cleanConnectorsIfOfflineTooLong); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/schedules/OfflineConnectorRefreshJob.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/schedules/OfflineConnectorRefreshJob.java new file mode 100644 index 000000000..e9fc1a5f8 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/schedules/OfflineConnectorRefreshJob.java @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.orchestration.schedules; + +import de.sovity.authorityportal.db.jooq.enums.ConnectorOnlineStatus; +import de.sovity.edc.ext.catalog.crawler.dao.config.DslContextFactory; +import de.sovity.edc.ext.catalog.crawler.orchestration.queue.ConnectorQueueFiller; +import de.sovity.edc.ext.catalog.crawler.orchestration.queue.ConnectorRefreshPriority; +import lombok.RequiredArgsConstructor; +import org.quartz.Job; +import org.quartz.JobExecutionContext; + +@RequiredArgsConstructor +public class OfflineConnectorRefreshJob implements Job { + private final DslContextFactory dslContextFactory; + private final ConnectorQueueFiller connectorQueueFiller; + + @Override + public void execute(JobExecutionContext context) { + dslContextFactory.transaction(dsl -> connectorQueueFiller.enqueueConnectors(dsl, + ConnectorOnlineStatus.OFFLINE, ConnectorRefreshPriority.SCHEDULED_OFFLINE_REFRESH)); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/schedules/OnlineConnectorRefreshJob.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/schedules/OnlineConnectorRefreshJob.java new file mode 100644 index 000000000..0b39d1bd9 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/schedules/OnlineConnectorRefreshJob.java @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.orchestration.schedules; + +import de.sovity.authorityportal.db.jooq.enums.ConnectorOnlineStatus; +import de.sovity.edc.ext.catalog.crawler.dao.config.DslContextFactory; +import de.sovity.edc.ext.catalog.crawler.orchestration.queue.ConnectorQueueFiller; +import de.sovity.edc.ext.catalog.crawler.orchestration.queue.ConnectorRefreshPriority; +import lombok.RequiredArgsConstructor; +import org.quartz.Job; +import org.quartz.JobExecutionContext; + +@RequiredArgsConstructor +public class OnlineConnectorRefreshJob implements Job { + private final DslContextFactory dslContextFactory; + private final ConnectorQueueFiller connectorQueueFiller; + + @Override + public void execute(JobExecutionContext context) { + dslContextFactory.transaction(dsl -> connectorQueueFiller.enqueueConnectors(dsl, + ConnectorOnlineStatus.ONLINE, ConnectorRefreshPriority.SCHEDULED_ONLINE_REFRESH)); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/schedules/QuartzScheduleInitializer.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/schedules/QuartzScheduleInitializer.java new file mode 100644 index 000000000..d4f4597e7 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/schedules/QuartzScheduleInitializer.java @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.orchestration.schedules; + +import de.sovity.edc.ext.catalog.crawler.orchestration.schedules.utils.CronJobRef; +import de.sovity.edc.ext.catalog.crawler.orchestration.schedules.utils.JobFactoryImpl; +import lombok.RequiredArgsConstructor; +import lombok.SneakyThrows; +import org.apache.commons.lang3.StringUtils; +import org.eclipse.edc.spi.monitor.Monitor; +import org.eclipse.edc.spi.system.configuration.Config; +import org.quartz.CronScheduleBuilder; +import org.quartz.JobBuilder; +import org.quartz.Scheduler; +import org.quartz.TriggerBuilder; +import org.quartz.impl.StdSchedulerFactory; + +import java.util.Collection; + +@RequiredArgsConstructor +public class QuartzScheduleInitializer { + private final Config config; + private final Monitor monitor; + private final Collection> jobs; + + @SneakyThrows + public void startSchedules() { + var jobFactory = new JobFactoryImpl(jobs); + var scheduler = StdSchedulerFactory.getDefaultScheduler(); + scheduler.setJobFactory(jobFactory); + + jobs.forEach(job -> scheduleCronJob(scheduler, job)); + scheduler.start(); + } + + @SneakyThrows + private void scheduleCronJob(Scheduler scheduler, CronJobRef cronJobRef) { + // CRON property name doubles as job name + var jobName = cronJobRef.configPropertyName(); + + // Skip scheduling if property not provided to ensure tests have no schedules running + var cronTrigger = config.getString(jobName, ""); + if (StringUtils.isBlank(cronTrigger)) { + monitor.info("No cron trigger configured for %s. Skipping.".formatted(jobName)); + return; + } + + monitor.info("Starting schedule %s=%s.".formatted(jobName, cronTrigger)); + var job = JobBuilder.newJob(cronJobRef.clazz()) + .withIdentity(jobName) + .build(); + var trigger = TriggerBuilder.newTrigger() + .withIdentity(jobName) + .withSchedule(CronScheduleBuilder.cronSchedule(cronTrigger)) + .build(); + + scheduler.scheduleJob(job, trigger); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/schedules/utils/CronJobRef.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/schedules/utils/CronJobRef.java new file mode 100644 index 000000000..8f435fd23 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/schedules/utils/CronJobRef.java @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.orchestration.schedules.utils; + +import org.quartz.Job; + +import java.util.function.Supplier; + +/** + * CRON Job. + * + * @param configPropertyName EDC Config property that decides cron expression + * @param clazz class of the job + * @param factory factory that initializes the task class + * @param job type + */ +public record CronJobRef( + String configPropertyName, + Class clazz, + Supplier factory +) { + + @SuppressWarnings("unchecked") + public Supplier asJobSupplier() { + return (Supplier) factory; + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/schedules/utils/JobFactoryImpl.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/schedules/utils/JobFactoryImpl.java new file mode 100644 index 000000000..003f0161d --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/orchestration/schedules/utils/JobFactoryImpl.java @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.orchestration.schedules.utils; + +import lombok.NonNull; +import org.quartz.Job; +import org.quartz.Scheduler; +import org.quartz.spi.JobFactory; +import org.quartz.spi.TriggerFiredBundle; + +import java.util.Collection; +import java.util.Map; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +public class JobFactoryImpl implements JobFactory { + private final Map, Supplier> factories; + + public JobFactoryImpl(@NonNull Collection> jobs) { + factories = jobs.stream().collect(Collectors.toMap( + CronJobRef::clazz, + CronJobRef::asJobSupplier + )); + } + + @Override + public Job newJob(TriggerFiredBundle bundle, Scheduler scheduler) { + Class jobClazz = bundle.getJobDetail().getJobClass(); + Supplier factory = factories.get(jobClazz); + if (factory == null) { + throw new IllegalArgumentException("No factory for Job class %s. Supported Job classes are: %s.".formatted( + jobClazz.getName(), + factories.keySet().stream().map(Class::getName).collect(Collectors.joining(", ")) + )); + } + return factory.get(); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/utils/CollectionUtils2.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/utils/CollectionUtils2.java new file mode 100644 index 000000000..f113d8601 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/utils/CollectionUtils2.java @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.utils; + +import lombok.AccessLevel; +import lombok.NoArgsConstructor; +import lombok.NonNull; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +@NoArgsConstructor(access = AccessLevel.PRIVATE) +public class CollectionUtils2 { + /** + * Set Difference + * + * @param a base set + * @param b remove these items + * @param set item type + * @return a difference b + */ + public static Set difference(@NonNull Collection a, @NonNull Collection b) { + var result = new HashSet<>(a); + result.removeAll(b); + return result; + } + + public static boolean isNotEmpty(Collection collection) { + return collection != null && !collection.isEmpty(); + } + + public static List reverse(List source) { + var result = new ArrayList<>(source); + java.util.Collections.reverse(result); + return result; + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/utils/JsonUtils2.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/utils/JsonUtils2.java new file mode 100644 index 000000000..a1d2d57bb --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/utils/JsonUtils2.java @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.utils; + +import com.fasterxml.jackson.databind.ObjectMapper; +import lombok.AccessLevel; +import lombok.NoArgsConstructor; +import lombok.SneakyThrows; + +@NoArgsConstructor(access = AccessLevel.PRIVATE) +public class JsonUtils2 { + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + @SneakyThrows + public static boolean isEqualJson(String json, String otherJson) { + return (json == null && otherJson == null) || + (json != null && otherJson != null && + OBJECT_MAPPER.readTree(json).equals(OBJECT_MAPPER.readTree(otherJson))); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/utils/MapUtils.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/utils/MapUtils.java new file mode 100644 index 000000000..8c2cd3a1d --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/utils/MapUtils.java @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.utils; + +import lombok.AccessLevel; +import lombok.NoArgsConstructor; + +import java.util.Collection; +import java.util.Map; +import java.util.function.Function; +import java.util.stream.Collectors; + +@NoArgsConstructor(access = AccessLevel.PRIVATE) +public class MapUtils { + public static Map associateBy(Collection collection, Function keyExtractor) { + return collection.stream().collect(Collectors.toMap(keyExtractor, Function.identity(), (a, b) -> { + throw new IllegalStateException("Duplicate key %s.".formatted(keyExtractor.apply(a))); + })); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/utils/StringUtils2.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/utils/StringUtils2.java new file mode 100644 index 000000000..190f16ede --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/java/de/sovity/edc/ext/catalog/crawler/utils/StringUtils2.java @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.utils; + +import lombok.AccessLevel; +import lombok.NoArgsConstructor; +import lombok.NonNull; + +@NoArgsConstructor(access = AccessLevel.PRIVATE) +public class StringUtils2 { + + /** + * Removes the suffix from the given string if it ends with it. + * + * @param string string + * @param suffix suffix to remove + * @return string without suffix + */ + public static String removeSuffix(@NonNull String string, @NonNull String suffix) { + if (string.endsWith(suffix)) { + return string.substring(0, string.length() - suffix.length()); + } + return string; + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/resources/META-INF/services/org.eclipse.edc.spi.system.ServiceExtension b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/resources/META-INF/services/org.eclipse.edc.spi.system.ServiceExtension new file mode 100644 index 000000000..5a369119d --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/main/resources/META-INF/services/org.eclipse.edc.spi.system.ServiceExtension @@ -0,0 +1 @@ +de.sovity.edc.ext.catalog.crawler.CrawlerExtension diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/AssertionUtils.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/AssertionUtils.java new file mode 100644 index 000000000..aef8012b7 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/AssertionUtils.java @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler; + +import lombok.AccessLevel; +import lombok.NoArgsConstructor; +import lombok.SneakyThrows; +import org.skyscreamer.jsonassert.JSONAssert; +import org.skyscreamer.jsonassert.JSONCompareMode; + +@NoArgsConstructor(access = AccessLevel.PRIVATE) +public class AssertionUtils { + @SneakyThrows + public static void assertEqualJson(String expected, String actual) { + JSONAssert.assertEquals(expected, actual, JSONCompareMode.STRICT); + } + + public static void assertEqualUsingJson(Object expected, Object actual) { + assertEqualJson(JsonTestUtils.serialize(expected), JsonTestUtils.serialize(actual)); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/CrawlerTestDb.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/CrawlerTestDb.java new file mode 100644 index 000000000..801f3d311 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/CrawlerTestDb.java @@ -0,0 +1,72 @@ +package de.sovity.edc.ext.catalog.crawler; + +import com.zaxxer.hikari.HikariDataSource; +import de.sovity.edc.ext.catalog.crawler.dao.config.DslContextFactory; +import de.sovity.edc.extension.e2e.db.TestDatabaseViaTestcontainers; +import de.sovity.edc.extension.postgresql.FlywayExecutionParams; +import de.sovity.edc.extension.postgresql.FlywayUtils; +import de.sovity.edc.extension.postgresql.HikariDataSourceFactory; +import de.sovity.edc.extension.postgresql.JdbcCredentials; +import org.jooq.DSLContext; +import org.junit.jupiter.api.extension.AfterAllCallback; +import org.junit.jupiter.api.extension.BeforeAllCallback; +import org.junit.jupiter.api.extension.ExtensionContext; + +import java.util.function.Consumer; + +public class CrawlerTestDb implements BeforeAllCallback, AfterAllCallback { + private final TestDatabaseViaTestcontainers db = new TestDatabaseViaTestcontainers(); + + private HikariDataSource dataSource = null; + private DslContextFactory dslContextFactory = null; + + public void testTransaction(Consumer code) { + dslContextFactory.testTransaction(code); + } + + @Override + public void beforeAll(ExtensionContext extensionContext) throws Exception { + // Init DB + db.beforeAll(extensionContext); + + // Init Data Source + var credentials = new JdbcCredentials( + db.getJdbcCredentials().jdbcUrl(), + db.getJdbcCredentials().jdbcUser(), + db.getJdbcCredentials().jdbcPassword() + ); + dataSource = HikariDataSourceFactory.newDataSource(credentials, 10, 1000); + dslContextFactory = new DslContextFactory(dataSource); + + // Migrate DB + var params = baseConfig("classpath:/migration-test-utils") + .migrate(true) + .build(); + try { + FlywayUtils.cleanAndMigrate(params, dataSource); + } catch (Exception e) { + var paramsWithClean = params.withClean(true).withCleanEnabled(true); + FlywayUtils.cleanAndMigrate(paramsWithClean, dataSource); + } + } + + @Override + public void afterAll(ExtensionContext extensionContext) throws Exception { + if (dataSource != null) { + dataSource.close(); + } + + // Close DB + db.afterAll(extensionContext); + } + + public static FlywayExecutionParams.FlywayExecutionParamsBuilder baseConfig(String additionalMigrationLocations) { + var migrationLocations = FlywayUtils.parseFlywayLocations( + "classpath:/db/migration,%s".formatted(additionalMigrationLocations) + ); + + return FlywayExecutionParams.builder() + .migrationLocations(migrationLocations) + .table("flyway_schema_history"); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/JsonTestUtils.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/JsonTestUtils.java new file mode 100644 index 000000000..ec4f26b5e --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/JsonTestUtils.java @@ -0,0 +1,23 @@ +package de.sovity.edc.ext.catalog.crawler; + +import lombok.SneakyThrows; +import org.testcontainers.shaded.com.fasterxml.jackson.databind.ObjectMapper; + +public class JsonTestUtils { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + @SneakyThrows + public static String serialize(Object obj) { + return OBJECT_MAPPER.writeValueAsString(obj); + } + + @SneakyThrows + public static T deserialize(String json, Class clazz) { + return OBJECT_MAPPER.readValue(json, clazz); + } + + public static T jsonCast(Object obj, Class clazz) { + return deserialize(serialize(obj), clazz); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/TestData.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/TestData.java new file mode 100644 index 000000000..fdab3b1b2 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/TestData.java @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler; + +import de.sovity.authorityportal.db.jooq.Tables; +import de.sovity.authorityportal.db.jooq.enums.ConnectorContractOffersExceeded; +import de.sovity.authorityportal.db.jooq.enums.ConnectorDataOffersExceeded; +import de.sovity.authorityportal.db.jooq.enums.ConnectorOnlineStatus; +import de.sovity.authorityportal.db.jooq.tables.records.ConnectorRecord; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; +import lombok.experimental.UtilityClass; +import org.jooq.DSLContext; + +import java.time.OffsetDateTime; +import java.util.function.Consumer; + +@UtilityClass +public class TestData { + public static OffsetDateTime old = OffsetDateTime.now().withNano(0).withSecond(0).withMinute(0).withHour(0).minusDays(100); + + public static ConnectorRef connectorRef = new ConnectorRef( + "MDSL1234XX.C1234XX", + "test", + "My Org", + "MDSL1234XX", + "https://example.com/api/dsp" + ); + + public static void insertConnector( + DSLContext dsl, + ConnectorRef connectorRef, + Consumer applier + ) { + var organization = dsl.newRecord(Tables.ORGANIZATION); + organization.setId(connectorRef.getOrganizationId()); + organization.setName(connectorRef.getOrganizationLegalName()); + organization.insert(); + + var connector = dsl.newRecord(Tables.CONNECTOR); + connector.setEnvironment(connectorRef.getEnvironmentId()); + connector.setOrganizationId(connectorRef.getOrganizationId()); + connector.setConnectorId(connectorRef.getConnectorId()); + connector.setName(connectorRef.getConnectorId() + " Name"); + connector.setEndpointUrl(connectorRef.getEndpoint()); + connector.setOnlineStatus(ConnectorOnlineStatus.OFFLINE); + connector.setLastRefreshAttemptAt(null); + connector.setLastSuccessfulRefreshAt(null); + connector.setCreatedAt(old); + connector.setDataOffersExceeded(ConnectorDataOffersExceeded.OK); + connector.setContractOffersExceeded(ConnectorContractOffersExceeded.OK); + applier.accept(connector); + connector.insert(); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/logging/CrawlerEventLoggerTest.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/logging/CrawlerEventLoggerTest.java new file mode 100644 index 000000000..f1fed1954 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/logging/CrawlerEventLoggerTest.java @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.logging; + +import de.sovity.authorityportal.db.jooq.Tables; +import de.sovity.edc.ext.catalog.crawler.CrawlerTestDb; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; +import org.jooq.DSLContext; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; + +import static org.assertj.core.api.Assertions.assertThat; + +class CrawlerEventLoggerTest { + @RegisterExtension + private static final CrawlerTestDb TEST_DATABASE = new CrawlerTestDb(); + + @Test + void testDataOfferWriter_allSortsOfUpdates() { + TEST_DATABASE.testTransaction(dsl -> { + var crawlerEventLogger = new CrawlerEventLogger(); + + // Test that insertions insert required fields and don't cause DB errors + var connectorRef = new ConnectorRef( + "MDSL1234XX.C1234XX", + "test", + "My Org", + "MDSL1234XX", + "https://example.com/api/dsp" + ); + crawlerEventLogger.logConnectorUpdated(dsl, connectorRef, new ConnectorChangeTracker()); + crawlerEventLogger.logConnectorOnline(dsl, connectorRef); + crawlerEventLogger.logConnectorOffline(dsl, connectorRef, new CrawlerEventErrorMessage("Message", "Stacktrace")); + crawlerEventLogger.logConnectorUpdateContractOfferLimitExceeded(dsl, connectorRef, 10); + crawlerEventLogger.logConnectorUpdateContractOfferLimitOk(dsl, connectorRef); + crawlerEventLogger.logConnectorUpdateDataOfferLimitExceeded(dsl, connectorRef, 10); + crawlerEventLogger.logConnectorUpdateDataOfferLimitOk(dsl, connectorRef); + + assertThat(numLogEntries(dsl)).isEqualTo(7); + }); + } + + private Integer numLogEntries(DSLContext dsl) { + return dsl.selectCount().from(Tables.CRAWLER_EVENT_LOG).fetchOne().component1(); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/ConnectorSuccessWriterTest.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/ConnectorSuccessWriterTest.java new file mode 100644 index 000000000..e7e2c3952 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/ConnectorSuccessWriterTest.java @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.writing; + +import de.sovity.authorityportal.db.jooq.Tables; +import de.sovity.authorityportal.db.jooq.enums.ConnectorContractOffersExceeded; +import de.sovity.authorityportal.db.jooq.enums.ConnectorDataOffersExceeded; +import de.sovity.authorityportal.db.jooq.enums.ConnectorOnlineStatus; +import de.sovity.edc.ext.catalog.crawler.CrawlerTestDb; +import de.sovity.edc.ext.catalog.crawler.TestData; +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.model.FetchedCatalog; +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.model.FetchedContractOffer; +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.model.FetchedDataOffer; +import de.sovity.edc.ext.wrapper.api.common.model.UiAsset; +import org.assertj.core.data.TemporalUnitLessThanOffset; +import org.jooq.impl.DSL; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; + +import java.time.OffsetDateTime; +import java.time.temporal.ChronoUnit; +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.when; + +class ConnectorSuccessWriterTest { + @RegisterExtension + private static final CrawlerTestDb TEST_DATABASE = new CrawlerTestDb(); + + ConnectorUpdateSuccessWriter connectorUpdateSuccessWriter; + + @BeforeEach + void setup() { + var container = new DataOfferWriterTestDydi(); + connectorUpdateSuccessWriter = container.getConnectorUpdateSuccessWriter(); + when(container.getCrawlerConfig().getMaxContractOffersPerDataOffer()).thenReturn(1); + when(container.getCrawlerConfig().getMaxDataOffersPerConnector()).thenReturn(1); + } + + @Test + void testDataOfferWriter_fullSingleUpdate() { + TEST_DATABASE.testTransaction(dsl -> { + // arrange + var connectorRef = TestData.connectorRef; + TestData.insertConnector(dsl, connectorRef, unused -> { + }); + var uiAsset = UiAsset.builder() + .assetId("assetId") + .title("title") + .description("# Description\n\n**with Markdown**") + .descriptionShortText("descriptionShortText") + .dataCategory("dataCategory") + .dataSubcategory("dataSubCategory") + .dataModel("dataModel") + .transportMode("transportMode") + .geoReferenceMethod("geoReferenceMethod") + .keywords(List.of("a", "b")) + .build(); + var fetchedContractOffer = FetchedContractOffer.builder() + .contractOfferId("contractOfferId") + .uiPolicyJson("\"test-policy\"") + .build(); + var fetchedDataOffer = FetchedDataOffer.builder() + .assetId("assetId") + .uiAsset(uiAsset) + .uiAssetJson("\"test\"") + .contractOffers(List.of(fetchedContractOffer)) + .build(); + var fetchedCatalog = FetchedCatalog.builder() + .connectorRef(connectorRef) + .dataOffers(List.of(fetchedDataOffer)) + .build(); + + // act + connectorUpdateSuccessWriter.handleConnectorOnline( + dsl, + connectorRef, + dsl.fetchOne( + Tables.CONNECTOR, + Tables.CONNECTOR.CONNECTOR_ID.eq(connectorRef.getConnectorId()) + ), + fetchedCatalog + ); + + // assert + var connector = dsl.fetchOne( + Tables.CONNECTOR, + Tables.CONNECTOR.CONNECTOR_ID.eq(connectorRef.getConnectorId()) + ); + var dataOffer = dsl.fetchOne( + Tables.DATA_OFFER, + DSL.and( + Tables.DATA_OFFER.CONNECTOR_ID.eq(connectorRef.getConnectorId()), + Tables.DATA_OFFER.ASSET_ID.eq("assetId") + ) + ); + var contractOffer = dsl.fetchOne( + Tables.CONTRACT_OFFER, + DSL.and( + Tables.CONTRACT_OFFER.CONNECTOR_ID.eq(connectorRef.getConnectorId()), + Tables.CONTRACT_OFFER.ASSET_ID.eq("assetId"), + Tables.CONTRACT_OFFER.CONTRACT_OFFER_ID.eq("contractOfferId") + ) + ); + + var now = OffsetDateTime.now(); + var minuteAccuracy = new TemporalUnitLessThanOffset(1, ChronoUnit.MINUTES); + assertThat(connector).isNotNull(); + assertThat(connector.getOnlineStatus()).isEqualTo(ConnectorOnlineStatus.ONLINE); + assertThat(connector.getLastRefreshAttemptAt()).isCloseTo(now, minuteAccuracy); + assertThat(connector.getLastSuccessfulRefreshAt()).isCloseTo(now, minuteAccuracy); + assertThat(connector.getDataOffersExceeded()).isEqualTo(ConnectorDataOffersExceeded.OK); + assertThat(connector.getContractOffersExceeded()).isEqualTo(ConnectorContractOffersExceeded.OK); + + assertThat(dataOffer).isNotNull(); + assertThat(dataOffer.getAssetTitle()).isEqualTo("title"); + assertThat(dataOffer.getDescriptionNoMarkdown()).isEqualTo("Description with Markdown"); + assertThat(dataOffer.getShortDescriptionNoMarkdown()).isEqualTo("descriptionShortText"); + assertThat(dataOffer.getDataCategory()).isEqualTo("dataCategory"); + assertThat(dataOffer.getDataSubcategory()).isEqualTo("dataSubCategory"); + assertThat(dataOffer.getDataModel()).isEqualTo("dataModel"); + assertThat(dataOffer.getTransportMode()).isEqualTo("transportMode"); + assertThat(dataOffer.getGeoReferenceMethod()).isEqualTo("geoReferenceMethod"); + assertThat(dataOffer.getKeywords()).containsExactly("a", "b"); + assertThat(dataOffer.getKeywordsCommaJoined()).isEqualTo("a, b"); + assertThat(dataOffer.getUiAssetJson().data()).isEqualTo("\"test\""); + + assertThat(contractOffer).isNotNull(); + assertThat(contractOffer.getUiPolicyJson().data()).isEqualTo("\"test-policy\""); + }); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/ConnectorUpdateCatalogWriterTest.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/ConnectorUpdateCatalogWriterTest.java new file mode 100644 index 000000000..d5ac0b883 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/ConnectorUpdateCatalogWriterTest.java @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.writing; + +import de.sovity.authorityportal.db.jooq.tables.records.DataOfferRecord; +import de.sovity.edc.ext.catalog.crawler.AssertionUtils; +import de.sovity.edc.ext.catalog.crawler.CrawlerTestDb; +import de.sovity.edc.ext.catalog.crawler.crawling.logging.ConnectorChangeTracker; +import org.assertj.core.data.TemporalUnitLessThanOffset; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; + +import java.time.OffsetDateTime; +import java.time.temporal.ChronoUnit; +import java.util.List; + +import static de.sovity.edc.ext.catalog.crawler.crawling.writing.DataOfferWriterTestDataModels.Co; +import static de.sovity.edc.ext.catalog.crawler.crawling.writing.DataOfferWriterTestDataModels.Do; +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.when; + +class ConnectorUpdateCatalogWriterTest { + @RegisterExtension + private static final CrawlerTestDb TEST_DATABASE = new CrawlerTestDb(); + + @Test + void testDataOfferWriter_allSortsOfUpdates() { + TEST_DATABASE.testTransaction(dsl -> { + var testDydi = new DataOfferWriterTestDydi(); + var testData = new DataOfferWriterTestDataHelper(); + var changes = new ConnectorChangeTracker(); + var dataOfferWriter = testDydi.getConnectorUpdateCatalogWriter(); + when(testDydi.getCrawlerConfig().getEnvironmentId()).thenReturn("test"); + + // arrange + var unchanged = Do.forName("unchanged"); + testData.existing(unchanged); + testData.fetched(unchanged); + + var fieldChangedExisting = Do.forName("fieldChanged"); + var fieldChangedFetched = fieldChangedExisting.withAssetTitle("changed"); + testData.existing(fieldChangedExisting); + testData.fetched(fieldChangedFetched); + + var added = Do.forName("added"); + testData.fetched(added); + + var removed = Do.forName("removed"); + testData.existing(removed); + + var changedCoExisting = Do.forName("contractOffer"); + var changedCoFetched = changedCoExisting.withContractOffers(List.of( + changedCoExisting.getContractOffers().get(0).withPolicyValue("changed") + )); + testData.existing(changedCoExisting); + testData.fetched(changedCoFetched); + + var addedCoExisting = Do.forName("contractOfferAdded"); + var addedCoFetched = addedCoExisting.withContractOffer(new Co("added co", "added co")); + testData.existing(addedCoExisting); + testData.fetched(addedCoFetched); + + var removedCoExisting = Do.forName("contractOfferRemoved") + .withContractOffer(new Co("removed co", "removed co")); + var removedCoFetched = Do.forName("contractOfferRemoved"); + testData.existing(removedCoExisting); + testData.fetched(removedCoFetched); + + // act + dsl.transaction(it -> testData.initialize(it.dsl())); + dsl.transaction(it -> dataOfferWriter.updateDataOffers( + it.dsl(), + testData.connectorRef, + testData.fetchedDataOffers, + changes + )); + var actual = dsl.transactionResult(it -> new DataOfferWriterTestResultHelper(it.dsl())); + + // assert + assertThat(actual.numDataOffers()).isEqualTo(6); + assertThat(changes.getNumOffersAdded()).isEqualTo(1); + assertThat(changes.getNumOffersUpdated()).isEqualTo(4); + assertThat(changes.getNumOffersDeleted()).isEqualTo(1); + + var now = OffsetDateTime.now(); + var minuteAccuracy = new TemporalUnitLessThanOffset(1, ChronoUnit.MINUTES); + var addedActual = actual.getDataOffer(added.getAssetId()); + assertAssetPropertiesEqual(testData, addedActual, added); + assertThat(addedActual.getCreatedAt()).isCloseTo(now, minuteAccuracy); + assertThat(addedActual.getUpdatedAt()).isCloseTo(now, minuteAccuracy); + assertThat(actual.numContractOffers(added.getAssetId())).isEqualTo(1); + assertPolicyEquals(actual, testData, added, added.getContractOffers().get(0)); + + var unchangedActual = actual.getDataOffer(unchanged.getAssetId()); + assertThat(unchangedActual.getUpdatedAt()).isEqualTo(testData.old); + assertThat(unchangedActual.getCreatedAt()).isEqualTo(testData.old); + + var fieldChangedActual = actual.getDataOffer(fieldChangedExisting.getAssetId()); + assertAssetPropertiesEqual(testData, fieldChangedActual, fieldChangedFetched); + assertThat(fieldChangedActual.getCreatedAt()).isEqualTo(testData.old); + assertThat(fieldChangedActual.getUpdatedAt()).isCloseTo(now, minuteAccuracy); + + var removedActual = actual.getDataOffer(removed.getAssetId()); + assertThat(removedActual).isNull(); + + var changedCoActual = actual.getDataOffer(changedCoExisting.getAssetId()); + assertThat(changedCoActual.getCreatedAt()).isEqualTo(testData.old); + assertThat(changedCoActual.getUpdatedAt()).isCloseTo(now, minuteAccuracy); + assertThat(actual.numContractOffers(changedCoExisting.getAssetId())).isEqualTo(1); + assertPolicyEquals(actual, testData, changedCoFetched, changedCoFetched.getContractOffers().get(0)); + + var addedCoActual = actual.getDataOffer(addedCoExisting.getAssetId()); + assertThat(addedCoActual.getCreatedAt()).isEqualTo(testData.old); + assertThat(addedCoActual.getUpdatedAt()).isCloseTo(now, minuteAccuracy); + assertThat(actual.numContractOffers(addedCoActual.getAssetId())).isEqualTo(2); + + var removedCoActual = actual.getDataOffer(removedCoExisting.getAssetId()); + assertThat(removedCoActual.getCreatedAt()).isEqualTo(testData.old); + assertThat(removedCoActual.getUpdatedAt()).isCloseTo(now, minuteAccuracy); + assertThat(actual.numContractOffers(removedCoActual.getAssetId())).isEqualTo(1); + }); + } + + private void assertAssetPropertiesEqual(DataOfferWriterTestDataHelper testData, DataOfferRecord actual, + Do expected) { + var actualUiAssetJson = actual.getUiAssetJson().data(); + var expectedUiAssetJson = testData.dummyAssetJson(expected); + AssertionUtils.assertEqualJson(actualUiAssetJson, expectedUiAssetJson); + } + + private void assertPolicyEquals( + DataOfferWriterTestResultHelper actual, + DataOfferWriterTestDataHelper scenario, + Do expectedDo, + Co expectedCo + ) { + var actualContractOffer = actual.getContractOffer(expectedDo.getAssetId(), expectedCo.getId()); + var actualUiPolicyJson = actualContractOffer.getUiPolicyJson().data(); + var expectedUiPolicyJson = scenario.dummyPolicyJson(expectedCo.getPolicyValue()); + assertThat(actualUiPolicyJson).isEqualTo(expectedUiPolicyJson); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/DataOfferLimitsEnforcerTest.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/DataOfferLimitsEnforcerTest.java new file mode 100644 index 000000000..9d486bffc --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/DataOfferLimitsEnforcerTest.java @@ -0,0 +1,218 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.writing; + +import de.sovity.authorityportal.db.jooq.enums.ConnectorContractOffersExceeded; +import de.sovity.authorityportal.db.jooq.enums.ConnectorDataOffersExceeded; +import de.sovity.authorityportal.db.jooq.tables.records.ConnectorRecord; +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.model.FetchedContractOffer; +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.model.FetchedDataOffer; +import de.sovity.edc.ext.catalog.crawler.crawling.logging.CrawlerEventLogger; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; +import de.sovity.edc.ext.catalog.crawler.orchestration.config.CrawlerConfig; +import org.assertj.core.api.Assertions; +import org.jooq.DSLContext; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +class DataOfferLimitsEnforcerTest { + DataOfferLimitsEnforcer dataOfferLimitsEnforcer; + CrawlerConfig settings; + CrawlerEventLogger crawlerEventLogger; + DSLContext dsl; + + ConnectorRef connectorRef = new DataOfferWriterTestDataHelper().connectorRef; + + @BeforeEach + void setup() { + settings = mock(CrawlerConfig.class); + crawlerEventLogger = mock(CrawlerEventLogger.class); + dataOfferLimitsEnforcer = new DataOfferLimitsEnforcer(settings, crawlerEventLogger); + dsl = mock(DSLContext.class); + } + + @Test + void no_limit_and_two_dataofffers_and_contractoffer_should_not_limit() { + // arrange + int maxDataOffers = -1; + int maxContractOffers = -1; + when(settings.getMaxDataOffersPerConnector()).thenReturn(maxDataOffers); + when(settings.getMaxContractOffersPerDataOffer()).thenReturn(maxContractOffers); + + var myDataOffer = new FetchedDataOffer(); + myDataOffer.setContractOffers(List.of(new FetchedContractOffer(), new FetchedContractOffer())); + var dataOffers = List.of(myDataOffer, myDataOffer); + + // act + var enforcedLimits = dataOfferLimitsEnforcer.enforceLimits(dataOffers); + var actual = enforcedLimits.abbreviatedDataOffers(); + var contractOffersLimitExceeded = enforcedLimits.contractOfferLimitsExceeded(); + var dataOffersLimitExceeded = enforcedLimits.dataOfferLimitsExceeded(); + + // assert + Assertions.assertThat(actual).hasSize(2); + assertFalse(contractOffersLimitExceeded); + assertFalse(dataOffersLimitExceeded); + } + + @Test + void limit_zero_and_one_dataoffers_should_result_to_none() { + // arrange + int maxDataOffers = 0; + int maxContractOffers = 0; + when(settings.getMaxDataOffersPerConnector()).thenReturn(maxDataOffers); + when(settings.getMaxContractOffersPerDataOffer()).thenReturn(maxContractOffers); + + var dataOffers = List.of(new FetchedDataOffer()); + + // act + var enforcedLimits = dataOfferLimitsEnforcer.enforceLimits(dataOffers); + var actual = new ArrayList<>(enforcedLimits.abbreviatedDataOffers()); + var contractOffersLimitExceeded = enforcedLimits.contractOfferLimitsExceeded(); + var dataOffersLimitExceeded = enforcedLimits.dataOfferLimitsExceeded(); + + // assert + assertThat(actual).isEmpty(); + assertFalse(contractOffersLimitExceeded); + assertTrue(dataOffersLimitExceeded); + } + + @Test + void limit_one_and_two_dataoffers_should_result_to_one() { + // arrange + int maxDataOffers = 1; + int maxContractOffers = 1; + when(settings.getMaxDataOffersPerConnector()).thenReturn(maxDataOffers); + when(settings.getMaxContractOffersPerDataOffer()).thenReturn(maxContractOffers); + + var myDataOffer = new FetchedDataOffer(); + myDataOffer.setContractOffers(List.of(new FetchedContractOffer(), new FetchedContractOffer())); + var dataOffers = List.of(myDataOffer, myDataOffer); + + // act + var enforcedLimits = dataOfferLimitsEnforcer.enforceLimits(dataOffers); + var actual = new ArrayList<>(enforcedLimits.abbreviatedDataOffers()); + var contractOffersLimitExceeded = enforcedLimits.contractOfferLimitsExceeded(); + var dataOffersLimitExceeded = enforcedLimits.dataOfferLimitsExceeded(); + + // assert + assertThat(actual).hasSize(1); + Assertions.assertThat(actual.get(0).getContractOffers()).hasSize(1); + assertTrue(contractOffersLimitExceeded); + assertTrue(dataOffersLimitExceeded); + } + + @Test + void verify_logConnectorUpdateDataOfferLimitExceeded() { + // arrange + var connector = new ConnectorRecord(); + connector.setDataOffersExceeded(ConnectorDataOffersExceeded.OK); + + int maxDataOffers = 1; + int maxContractOffers = 1; + when(settings.getMaxDataOffersPerConnector()).thenReturn(maxDataOffers); + when(settings.getMaxContractOffersPerDataOffer()).thenReturn(maxContractOffers); + + var myDataOffer = new FetchedDataOffer(); + myDataOffer.setContractOffers(List.of(new FetchedContractOffer(), new FetchedContractOffer())); + var dataOffers = List.of(myDataOffer, myDataOffer); + + // act + var enforcedLimits = dataOfferLimitsEnforcer.enforceLimits(dataOffers); + dataOfferLimitsEnforcer.logEnforcedLimitsIfChanged(dsl, connectorRef, connector, enforcedLimits); + + // assert + verify(crawlerEventLogger).logConnectorUpdateDataOfferLimitExceeded(dsl, connectorRef, 1); + } + + @Test + void verify_logConnectorUpdateDataOfferLimitOk() { + // arrange + var connector = new ConnectorRecord(); + connector.setDataOffersExceeded(ConnectorDataOffersExceeded.EXCEEDED); + + int maxDataOffers = -1; + int maxContractOffers = -1; + when(settings.getMaxDataOffersPerConnector()).thenReturn(maxDataOffers); + when(settings.getMaxContractOffersPerDataOffer()).thenReturn(maxContractOffers); + + var myDataOffer = new FetchedDataOffer(); + myDataOffer.setContractOffers(List.of(new FetchedContractOffer(), new FetchedContractOffer())); + var dataOffers = List.of(myDataOffer, myDataOffer); + + // act + var enforcedLimits = dataOfferLimitsEnforcer.enforceLimits(dataOffers); + dataOfferLimitsEnforcer.logEnforcedLimitsIfChanged(dsl, connectorRef, connector, enforcedLimits); + + // assert + verify(crawlerEventLogger).logConnectorUpdateDataOfferLimitOk(dsl, connectorRef); + } + + @Test + void verify_logConnectorUpdateContractOfferLimitExceeded() { + // arrange + var connector = new ConnectorRecord(); + connector.setContractOffersExceeded(ConnectorContractOffersExceeded.OK); + + int maxDataOffers = 1; + int maxContractOffers = 1; + when(settings.getMaxDataOffersPerConnector()).thenReturn(maxDataOffers); + when(settings.getMaxContractOffersPerDataOffer()).thenReturn(maxContractOffers); + + var myDataOffer = new FetchedDataOffer(); + myDataOffer.setContractOffers(List.of(new FetchedContractOffer(), new FetchedContractOffer())); + var dataOffers = List.of(myDataOffer, myDataOffer); + + // act + var enforcedLimits = dataOfferLimitsEnforcer.enforceLimits(dataOffers); + dataOfferLimitsEnforcer.logEnforcedLimitsIfChanged(dsl, connectorRef, connector, enforcedLimits); + + // assert + verify(crawlerEventLogger).logConnectorUpdateContractOfferLimitExceeded(dsl, connectorRef, 1); + } + + @Test + void verify_logConnectorUpdateContractOfferLimitOk() { + // arrange + var connector = new ConnectorRecord(); + connector.setContractOffersExceeded(ConnectorContractOffersExceeded.EXCEEDED); + + int maxDataOffers = -1; + int maxContractOffers = -1; + when(settings.getMaxDataOffersPerConnector()).thenReturn(maxDataOffers); + when(settings.getMaxContractOffersPerDataOffer()).thenReturn(maxContractOffers); + + var myDataOffer = new FetchedDataOffer(); + myDataOffer.setContractOffers(List.of(new FetchedContractOffer(), new FetchedContractOffer())); + var dataOffers = List.of(myDataOffer, myDataOffer); + + // act + var enforcedLimits = dataOfferLimitsEnforcer.enforceLimits(dataOffers); + dataOfferLimitsEnforcer.logEnforcedLimitsIfChanged(dsl, connectorRef, connector, enforcedLimits); + + // assert + verify(crawlerEventLogger).logConnectorUpdateContractOfferLimitOk(dsl, connectorRef); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/DataOfferWriterTestDataHelper.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/DataOfferWriterTestDataHelper.java new file mode 100644 index 000000000..fd5501ab2 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/DataOfferWriterTestDataHelper.java @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.writing; + +import de.sovity.authorityportal.db.jooq.tables.records.ContractOfferRecord; +import de.sovity.authorityportal.db.jooq.tables.records.DataOfferRecord; +import de.sovity.edc.ext.catalog.crawler.TestData; +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.model.FetchedContractOffer; +import de.sovity.edc.ext.catalog.crawler.crawling.fetching.model.FetchedDataOffer; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; +import de.sovity.edc.ext.wrapper.api.common.model.UiAsset; +import de.sovity.edc.utils.JsonUtils; +import jakarta.json.Json; +import org.apache.commons.lang3.Validate; +import org.jetbrains.annotations.NotNull; +import org.jooq.DSLContext; +import org.jooq.JSONB; + +import java.time.OffsetDateTime; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +class DataOfferWriterTestDataHelper { + OffsetDateTime old = TestData.old; + ConnectorRef connectorRef = TestData.connectorRef; + List existingContractOffers = new ArrayList<>(); + List existingDataOffers = new ArrayList<>(); + List fetchedDataOffers = new ArrayList<>(); + + + /** + * Adds fetched data offer + * + * @param dataOffer fetched data offer + */ + public void fetched(DataOfferWriterTestDataModels.Do dataOffer) { + Validate.notEmpty(dataOffer.getContractOffers()); + fetchedDataOffers.add(dummyFetchedDataOffer(dataOffer)); + } + + + /** + * Adds data offer directly to DB. + * + * @param dataOffer data offer + */ + public void existing(DataOfferWriterTestDataModels.Do dataOffer) { + Validate.notEmpty(dataOffer.getContractOffers()); + existingDataOffers.add(dummyDataOffer(dataOffer)); + dataOffer.getContractOffers().stream() + .map(contractOffer -> dummyContractOffer(dataOffer, contractOffer)) + .forEach(existingContractOffers::add); + } + + public void initialize(DSLContext dsl) { + TestData.insertConnector(dsl, connectorRef, record -> { + }); + dsl.batchInsert(existingDataOffers).execute(); + dsl.batchInsert(existingContractOffers).execute(); + } + + private ContractOfferRecord dummyContractOffer( + DataOfferWriterTestDataModels.Do dataOffer, + DataOfferWriterTestDataModels.Co contractOffer + ) { + var contractOfferRecord = new ContractOfferRecord(); + contractOfferRecord.setConnectorId(connectorRef.getConnectorId()); + contractOfferRecord.setAssetId(dataOffer.getAssetId()); + contractOfferRecord.setContractOfferId(contractOffer.getId()); + contractOfferRecord.setUiPolicyJson(JSONB.valueOf(dummyPolicyJson(contractOffer.getPolicyValue()))); + contractOfferRecord.setCreatedAt(old); + contractOfferRecord.setUpdatedAt(old); + return contractOfferRecord; + } + + private DataOfferRecord dummyDataOffer(DataOfferWriterTestDataModels.Do dataOffer) { + var assetName = Optional.of(dataOffer.getAssetTitle()).orElse(dataOffer.getAssetId()); + + var dataOfferRecord = new DataOfferRecord(); + dataOfferRecord.setConnectorId(connectorRef.getConnectorId()); + dataOfferRecord.setAssetId(dataOffer.getAssetId()); + dataOfferRecord.setAssetTitle(assetName); + dataOfferRecord.setUiAssetJson(JSONB.valueOf(dummyAssetJson(dataOffer))); + dataOfferRecord.setCreatedAt(old); + dataOfferRecord.setUpdatedAt(old); + return dataOfferRecord; + } + + private FetchedDataOffer dummyFetchedDataOffer(DataOfferWriterTestDataModels.Do dataOffer) { + var fetchedDataOffer = new FetchedDataOffer(); + fetchedDataOffer.setAssetId(dataOffer.getAssetId()); + fetchedDataOffer.setUiAsset( + UiAsset.builder() + .assetId(dataOffer.getAssetId()) + .title(dataOffer.getAssetTitle()) + .build() + ); + fetchedDataOffer.setUiAssetJson(dummyAssetJson(dataOffer)); + + var contractOffersMapped = dataOffer.getContractOffers().stream().map(this::dummyFetchedContractOffer).collect(Collectors.toList()); + fetchedDataOffer.setContractOffers(contractOffersMapped); + + return fetchedDataOffer; + } + + public String dummyAssetJson(DataOfferWriterTestDataModels.Do dataOffer) { + var dummyUiAssetJson = Json.createObjectBuilder() + .add("assetId", dataOffer.getAssetId()) + .add("title", dataOffer.getAssetTitle()) + .add("assetJsonLd", "{}") + .build(); + return JsonUtils.toJson(dummyUiAssetJson); + } + + public String dummyPolicyJson(String policyValue) { + return "{\"%s\": \"%s\"}".formatted( + "SomePolicyField", policyValue + ); + } + + @NotNull + private FetchedContractOffer dummyFetchedContractOffer(DataOfferWriterTestDataModels.Co it) { + var contractOffer = new FetchedContractOffer(); + contractOffer.setContractOfferId(it.getId()); + contractOffer.setUiPolicyJson(dummyPolicyJson(it.getPolicyValue())); + return contractOffer; + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/DataOfferWriterTestDataModels.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/DataOfferWriterTestDataModels.java new file mode 100644 index 000000000..7dd824d99 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/DataOfferWriterTestDataModels.java @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.writing; + +import lombok.Value; +import lombok.With; + +import java.util.ArrayList; +import java.util.List; + +class DataOfferWriterTestDataModels { + /** + * Dummy Data Offer + */ + @Value + static class Do { + @With + String assetId; + @With + String assetTitle; + @With + List contractOffers; + + public Do withContractOffer(Co co) { + var list = new ArrayList<>(contractOffers); + list.add(co); + return this.withContractOffers(list); + } + + public static Do forName(String name) { + return new Do(name, name + " Title", List.of(new Co(name + " CO", name + " Policy"))); + } + } + + /** + * Dummy Contract Offer + */ + @Value + static class Co { + @With + String id; + @With + String policyValue; + } + + public static Co forName(String name) { + return new Co(name + " CO", name + " Policy"); + } + +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/DataOfferWriterTestDydi.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/DataOfferWriterTestDydi.java new file mode 100644 index 000000000..a46cf1dc4 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/DataOfferWriterTestDydi.java @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.writing; + +import de.sovity.edc.ext.catalog.crawler.crawling.logging.CrawlerEventLogger; +import de.sovity.edc.ext.catalog.crawler.dao.CatalogPatchApplier; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorQueries; +import de.sovity.edc.ext.catalog.crawler.dao.contract_offers.ContractOfferQueries; +import de.sovity.edc.ext.catalog.crawler.dao.contract_offers.ContractOfferRecordUpdater; +import de.sovity.edc.ext.catalog.crawler.dao.data_offers.DataOfferQueries; +import de.sovity.edc.ext.catalog.crawler.dao.data_offers.DataOfferRecordUpdater; +import de.sovity.edc.ext.catalog.crawler.orchestration.config.CrawlerConfig; +import de.sovity.edc.ext.wrapper.api.common.mappers.asset.utils.ShortDescriptionBuilder; +import lombok.Value; +import org.eclipse.edc.spi.system.configuration.Config; + +import static org.mockito.Mockito.mock; + +@Value +class DataOfferWriterTestDydi { + Config config = mock(Config.class); + CrawlerConfig crawlerConfig = mock(CrawlerConfig.class); + DataOfferQueries dataOfferQueries = new DataOfferQueries(); + ContractOfferQueries contractOfferQueries = new ContractOfferQueries(); + ContractOfferRecordUpdater contractOfferRecordUpdater = new ContractOfferRecordUpdater(); + ConnectorQueries connectorQueries = new ConnectorQueries(crawlerConfig); + ShortDescriptionBuilder shortDescriptionBuilder = new ShortDescriptionBuilder(); + DataOfferRecordUpdater dataOfferRecordUpdater = new DataOfferRecordUpdater(shortDescriptionBuilder); + CatalogPatchBuilder catalogPatchBuilder = new CatalogPatchBuilder( + contractOfferQueries, + dataOfferQueries, + dataOfferRecordUpdater, + contractOfferRecordUpdater + ); + CatalogPatchApplier catalogPatchApplier = new CatalogPatchApplier(); + ConnectorUpdateCatalogWriter connectorUpdateCatalogWriter = new ConnectorUpdateCatalogWriter(catalogPatchBuilder, catalogPatchApplier); + + // for the ConnectorUpdateSuccessWriterTest + CrawlerEventLogger crawlerEventLogger = new CrawlerEventLogger(); + DataOfferLimitsEnforcer dataOfferLimitsEnforcer = new DataOfferLimitsEnforcer( + crawlerConfig, + crawlerEventLogger + ); + ConnectorUpdateSuccessWriter connectorUpdateSuccessWriter = new ConnectorUpdateSuccessWriter( + crawlerEventLogger, + connectorUpdateCatalogWriter, + dataOfferLimitsEnforcer + ); +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/DataOfferWriterTestResultHelper.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/DataOfferWriterTestResultHelper.java new file mode 100644 index 000000000..c982b2b55 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/DataOfferWriterTestResultHelper.java @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.writing; + +import de.sovity.authorityportal.db.jooq.Tables; +import de.sovity.authorityportal.db.jooq.tables.records.ContractOfferRecord; +import de.sovity.authorityportal.db.jooq.tables.records.DataOfferRecord; +import org.jetbrains.annotations.NotNull; +import org.jooq.DSLContext; + +import java.util.Map; +import java.util.function.Function; +import java.util.stream.Collectors; + +import static java.util.stream.Collectors.groupingBy; + +class DataOfferWriterTestResultHelper { + private final @NotNull Map dataOffers; + private final @NotNull Map> contractOffers; + + DataOfferWriterTestResultHelper(DSLContext dsl) { + this.dataOffers = dsl.selectFrom(Tables.DATA_OFFER).fetchMap(Tables.DATA_OFFER.ASSET_ID); + this.contractOffers = dsl.selectFrom(Tables.CONTRACT_OFFER).stream().collect(groupingBy( + ContractOfferRecord::getAssetId, + Collectors.toMap(ContractOfferRecord::getContractOfferId, Function.identity()) + )); + } + + public DataOfferRecord getDataOffer(String assetId) { + return dataOffers.get(assetId); + } + + public int numDataOffers() { + return dataOffers.size(); + } + + public int numContractOffers(String assetId) { + return contractOffers.getOrDefault(assetId, Map.of()).size(); + } + + public ContractOfferRecord getContractOffer(String assetId, String contractOfferId) { + return contractOffers.getOrDefault(assetId, Map.of()).get(contractOfferId); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/DiffUtilsTest.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/DiffUtilsTest.java new file mode 100644 index 000000000..79933dc2a --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/crawling/writing/DiffUtilsTest.java @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.crawling.writing; + +import de.sovity.edc.ext.catalog.crawler.crawling.writing.utils.DiffUtils; +import org.junit.jupiter.api.Test; + +import java.util.List; +import java.util.function.Function; + +import static org.assertj.core.api.Assertions.assertThat; + +class DiffUtilsTest { + + @Test + void testCompareLists() { + // arrange + List existing = List.of(1, 2); + List fetched = List.of("1", "3"); + + // act + var actual = DiffUtils.compareLists(existing, Function.identity(), fetched, Integer::parseInt); + + // assert + assertThat(actual.added()).containsExactly("3"); + assertThat(actual.updated()).containsExactly(new DiffUtils.DiffResultMatch<>(1, "1")); + assertThat(actual.removed()).containsExactly(2); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/dao/connectors/ConnectorRefTest.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/dao/connectors/ConnectorRefTest.java new file mode 100644 index 000000000..6593a5397 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/dao/connectors/ConnectorRefTest.java @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.dao.connectors; + + +import org.junit.jupiter.api.Test; + +import java.util.HashSet; +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + + +class ConnectorRefTest { + + @Test + void testEqualsTrue() { + // arrange + var a = new ConnectorRef("a", "1", "1", "1", "1"); + var b = new ConnectorRef("a", "2", "2", "2", "2"); + + // act + var result = a.equals(b); + + // assert + assertThat(result).isTrue(); + } + + @Test + void testEqualsFalse() { + // arrange + var a = new ConnectorRef("a", "1", "1", "1", "1"); + var b = new ConnectorRef("b", "1", "1", "1", "1"); + + // act + var result = a.equals(b); + + // assert + assertThat(result).isFalse(); + } + + @Test + void testSet() { + // arrange + var a = new ConnectorRef("a", "1", "1", "1", "1"); + var a2 = new ConnectorRef("a", "2", "2", "2", "2"); + var b = new ConnectorRef("b", "1", "1", "1", "1"); + + // act + var result = new HashSet<>(List.of(a, a2, b)).stream().map(ConnectorRef::getConnectorId).toList(); + + // assert + assertThat(result).containsExactlyInAnyOrder("a", "b"); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/orchestration/queue/ThreadPoolQueueTest.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/orchestration/queue/ThreadPoolQueueTest.java new file mode 100644 index 000000000..e6b1ad053 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/orchestration/queue/ThreadPoolQueueTest.java @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.orchestration.queue; + +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; + +import static org.mockito.Mockito.mock; + +class ThreadPoolQueueTest { + + + /** + * Regression against bug where the queue did not act like a queue. + */ + @Test + void testOrdering() { + Runnable noop = () -> { + }; + + var c10 = mock(ConnectorRef.class); + var c20 = mock(ConnectorRef.class); + var c11 = mock(ConnectorRef.class); + var c21 = mock(ConnectorRef.class); + var c00 = mock(ConnectorRef.class); + + var queue = new ThreadPoolTaskQueue(); + queue.add(new ThreadPoolTask(1, noop, c10)); + queue.add(new ThreadPoolTask(2, noop, c20)); + queue.add(new ThreadPoolTask(1, noop, c11)); + queue.add(new ThreadPoolTask(2, noop, c21)); + queue.add(new ThreadPoolTask(0, noop, c00)); + + var result = new ArrayList(); + queue.getQueue().drainTo(result); + + Assertions.assertThat(result).extracting(ThreadPoolTask::getConnectorRef) + .containsExactly(c00, c10, c11, c20, c21); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/orchestration/schedules/OfflineConnectorRemovalJobTest.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/orchestration/schedules/OfflineConnectorRemovalJobTest.java new file mode 100644 index 000000000..c8282c369 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/orchestration/schedules/OfflineConnectorRemovalJobTest.java @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.orchestration.schedules; + +import de.sovity.authorityportal.db.jooq.enums.ConnectorOnlineStatus; +import de.sovity.edc.ext.catalog.crawler.CrawlerTestDb; +import de.sovity.edc.ext.catalog.crawler.TestData; +import de.sovity.edc.ext.catalog.crawler.crawling.OfflineConnectorCleaner; +import de.sovity.edc.ext.catalog.crawler.crawling.logging.CrawlerEventLogger; +import de.sovity.edc.ext.catalog.crawler.dao.CatalogCleaner; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorQueries; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorRef; +import de.sovity.edc.ext.catalog.crawler.dao.connectors.ConnectorStatusUpdater; +import de.sovity.edc.ext.catalog.crawler.orchestration.config.CrawlerConfig; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; + +import java.time.Duration; +import java.time.OffsetDateTime; + +import static de.sovity.authorityportal.db.jooq.tables.Connector.CONNECTOR; +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +class OfflineConnectorRemovalJobTest { + @RegisterExtension + private static final CrawlerTestDb TEST_DATABASE = new CrawlerTestDb(); + + ConnectorRef connectorRef = TestData.connectorRef; + + CrawlerConfig crawlerConfig; + OfflineConnectorCleaner offlineConnectorCleaner; + ConnectorQueries connectorQueries; + + @BeforeEach + void beforeEach() { + crawlerConfig = mock(CrawlerConfig.class); + connectorQueries = new ConnectorQueries(crawlerConfig); + offlineConnectorCleaner = new OfflineConnectorCleaner( + crawlerConfig, + new ConnectorQueries(crawlerConfig), + new CrawlerEventLogger(), + new ConnectorStatusUpdater(), + new CatalogCleaner() + ); + when(crawlerConfig.getEnvironmentId()).thenReturn(connectorRef.getEnvironmentId()); + } + + @Test + void test_offlineConnectorCleaner_should_be_dead() { + TEST_DATABASE.testTransaction(dsl -> { + // arrange + when(crawlerConfig.getKillOfflineConnectorsAfter()).thenReturn(Duration.ofDays(5)); + TestData.insertConnector(dsl, connectorRef, record -> { + record.setOnlineStatus(ConnectorOnlineStatus.OFFLINE); + record.setLastSuccessfulRefreshAt(OffsetDateTime.now().minusDays(6)); + }); + + // act + offlineConnectorCleaner.cleanConnectorsIfOfflineTooLong(dsl); + + // assert + var connector = dsl.fetchOne(CONNECTOR, CONNECTOR.CONNECTOR_ID.eq(connectorRef.getConnectorId())); + assertThat(connector.getOnlineStatus()).isEqualTo(ConnectorOnlineStatus.DEAD); + }); + } + + @Test + void test_offlineConnectorCleaner_should_not_be_dead() { + TEST_DATABASE.testTransaction(dsl -> { + // arrange + when(crawlerConfig.getKillOfflineConnectorsAfter()).thenReturn(Duration.ofDays(5)); + TestData.insertConnector(dsl, connectorRef, record -> { + record.setOnlineStatus(ConnectorOnlineStatus.OFFLINE); + record.setLastSuccessfulRefreshAt(OffsetDateTime.now().minusDays(2)); + }); + + // act + offlineConnectorCleaner.cleanConnectorsIfOfflineTooLong(dsl); + + // assert + var connector = dsl.fetchOne(CONNECTOR, CONNECTOR.CONNECTOR_ID.eq(connectorRef.getConnectorId())); + assertThat(connector.getOnlineStatus()).isEqualTo(ConnectorOnlineStatus.OFFLINE); + }); + } + +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/utils/CollectionUtils2Test.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/utils/CollectionUtils2Test.java new file mode 100644 index 000000000..87673188f --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/utils/CollectionUtils2Test.java @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.utils; + +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + + +class CollectionUtils2Test { + @Test + void difference() { + assertThat(CollectionUtils2.difference(List.of(1, 2, 3), List.of(2, 3, 4))).containsExactly(1); + } + + @Test + void isNotEmpty_withEmptyList() { + assertThat(CollectionUtils2.isNotEmpty(List.of())).isFalse(); + } + + @Test + void isNotEmpty_withNull() { + assertThat(CollectionUtils2.isNotEmpty(null)).isFalse(); + } + + @Test + void isNotEmpty_withNonEmptyList() { + assertThat(CollectionUtils2.isNotEmpty(List.of(1))).isTrue(); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/utils/JsonUtils2Test.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/utils/JsonUtils2Test.java new file mode 100644 index 000000000..8843aebbb --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/utils/JsonUtils2Test.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.utils; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class JsonUtils2Test { + @Test + void equalityTests() { + assertTrue(JsonUtils2.isEqualJson(null, null)); + assertTrue(JsonUtils2.isEqualJson("null", "null")); + assertTrue(JsonUtils2.isEqualJson("{}", "{}")); + assertTrue(JsonUtils2.isEqualJson("{\"a\": true, \"b\": \"hello\"}", "{\"a\": true,\"b\": \"hello\"}")); + assertTrue(JsonUtils2.isEqualJson("{\"a\": true, \"b\": \"hello\"}", "{\"b\": \"hello\", \"a\": true}")); + + assertFalse(JsonUtils2.isEqualJson(null, "1")); + assertFalse(JsonUtils2.isEqualJson("1", null)); + } +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/utils/StringUtils2Test.java b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/utils/StringUtils2Test.java new file mode 100644 index 000000000..1cbd56dd5 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/java/de/sovity/edc/ext/catalog/crawler/utils/StringUtils2Test.java @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2023 sovity GmbH + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * SPDX-License-Identifier: Apache-2.0 + * + * Contributors: + * sovity GmbH - initial API and implementation + * + */ + +package de.sovity.edc.ext.catalog.crawler.utils; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +class StringUtils2Test { + @Test + void removeSuffix_emptyStrings() { + assertThat(StringUtils2.removeSuffix("", "")).isEmpty(); + } + + @Test + void removeSuffix_emptySuffix() { + assertThat(StringUtils2.removeSuffix("test", "")).isEqualTo("test"); + } + + + @Test + void removeSuffix_withSuffix() { + assertThat(StringUtils2.removeSuffix("testabc", "abc")).isEqualTo("test"); + } + + + @Test + void removeSuffix_withoutSuffix() { + assertThat(StringUtils2.removeSuffix("test", "abc")).isEqualTo("test"); + } + +} diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/resources/logging.properties b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/resources/logging.properties new file mode 100644 index 000000000..471bd20d6 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/resources/logging.properties @@ -0,0 +1,6 @@ +.level=ALL +org.eclipse.edc.level=ALL +handlers=java.util.logging.ConsoleHandler +java.util.logging.ConsoleHandler.formatter=java.util.logging.SimpleFormatter +java.util.logging.ConsoleHandler.level=ALL +java.util.logging.SimpleFormatter.format=[%1$tY-%1$tm-%1$td %1$tH:%1$tM:%1$tS] [%4$-7s] %5$s%6$s%n diff --git a/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/resources/migration-test-utils/V9999__Make_Columns_Nullable_For_Easier_Tests.sql b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/resources/migration-test-utils/V9999__Make_Columns_Nullable_For_Easier_Tests.sql new file mode 100644 index 000000000..bc4f9fd8c --- /dev/null +++ b/authority-portal-backend/catalog-crawler/catalog-crawler/src/test/resources/migration-test-utils/V9999__Make_Columns_Nullable_For_Easier_Tests.sql @@ -0,0 +1,25 @@ +do +$$ + declare + r record; + begin + for r in (select 'alter table "' || c.table_schema || '"."' || c.table_name || '" alter column "' || c.column_name || + '" drop not null;' as command + from information_schema.columns c + where c.table_schema not in ('pg_catalog', 'information_schema') -- exclude system schemas + and c.table_name in ('connector', 'organization', 'user') -- only selected AP tables + and c.is_nullable = 'NO' + and not exists (SELECT tc.constraint_type + FROM information_schema.table_constraints AS tc + JOIN information_schema.key_column_usage AS kcu + ON tc.constraint_name = kcu.constraint_name + AND tc.table_schema = kcu.table_schema + WHERE tc.table_schema = c.table_schema + and tc.table_name = c.table_name + AND kcu.column_name = c.column_name + AND tc.constraint_type = 'PRIMARY KEY')) -- exclude primary keys + loop + execute r.command; + end loop; + end +$$; diff --git a/authority-portal-backend/catalog-crawler/docker-entrypoint.sh b/authority-portal-backend/catalog-crawler/docker-entrypoint.sh new file mode 100755 index 000000000..5daf083f5 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/docker-entrypoint.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# Use bash instead of sh, +# because sh in this image is provided by dash (https://git.kernel.org/pub/scm/utils/dash/dash.git/), +# which seems to eat environment variables containing dashes, +# which are required for some EDC configuration values. + +# Do not set -u to permit unset variables in .env +set -eo pipefail + +if [[ "x${1:-}" == "xstart" ]]; then + cmd=(java ${JAVA_ARGS:-}) + + if [ "${REMOTE_DEBUG:-n}" = "y" ] || [ "${REMOTE_DEBUG:-false}" = "true" ]; then + cmd+=( + "-agentlib:jdwp=transport=dt_socket,server=y,suspend=${REMOTE_DEBUG_SUSPEND:-n},address=${REMOTE_DEBUG_BIND:-127.0.0.1:5005}" + ) + fi + + logging_config='/app/logging.properties' + if [ "${DEBUG_LOGGING:-n}" = "y" ] || [ "${DEBUG_LOGGING:-false}" = "true" ]; then + logging_config='/app/logging.dev.properties' + fi + + cmd+=( + -Djava.util.logging.config.file=${logging_config} + -jar /app/app.jar + ) +else + cmd=("$@") +fi + +if [ "${REMOTE_DEBUG:-n}" = "y" ] || [ "${REMOTE_DEBUG:-false}" = "true" ]; then + echo "Jar CMD (printing, because REMOTE_DEBUG=y|true): ${cmd[@]}" +fi + +# Use "exec" for termination signals to reach JVM +exec "${cmd[@]}" diff --git a/authority-portal-backend/catalog-crawler/logging.dev.properties b/authority-portal-backend/catalog-crawler/logging.dev.properties new file mode 100644 index 000000000..3db949d79 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/logging.dev.properties @@ -0,0 +1,7 @@ +handlers = java.util.logging.ConsoleHandler +.level = FINE +java.util.logging.ConsoleHandler.level = ALL +java.util.logging.ConsoleHandler.formatter = java.util.logging.SimpleFormatter +java.util.logging.SimpleFormatter.format = %1$tY-%1$tm-%1$td %1$tH:%1$tM:%1$tS %5$s %6$s%n +org.eclipse.dataspaceconnector.level = FINE +org.eclipse.dataspaceconnector.handler = java.util.logging.ConsoleHandler diff --git a/authority-portal-backend/catalog-crawler/logging.properties b/authority-portal-backend/catalog-crawler/logging.properties new file mode 100644 index 000000000..17dfd8a75 --- /dev/null +++ b/authority-portal-backend/catalog-crawler/logging.properties @@ -0,0 +1,8 @@ +handlers = java.util.logging.ConsoleHandler +.level = INFO +java.util.logging.ConsoleHandler.level = ALL +java.util.logging.ConsoleHandler.formatter = java.util.logging.SimpleFormatter +java.util.logging.SimpleFormatter.format = %1$tY-%1$tm-%1$td %1$tH:%1$tM:%1$tS %5$s %6$s%n +org.eclipse.dataspaceconnector.level = FINE +org.eclipse.dataspaceconnector.handler = java.util.logging.ConsoleHandler +org.eclipse.edc.api.observability.ObservabilityApiController.level = ERROR diff --git a/authority-portal-backend/gradle/libs.versions.toml b/authority-portal-backend/gradle/libs.versions.toml index fe22adff4..c6fc1553f 100644 --- a/authority-portal-backend/gradle/libs.versions.toml +++ b/authority-portal-backend/gradle/libs.versions.toml @@ -3,7 +3,18 @@ java = "17" kotlin = "1.9.22" npmVersion = "8.15.0" -sovity-edcCe = "10.4.1" +edcGroup = "org.eclipse.edc" +sovityCatalogCrawlerGroup = "de.sovity.edc.catalog.crawler" +sovityEdcExtensionGroup = "de.sovity.edc.ext" +sovityEdcGroup = "de.sovity.edc" + +sovity-edcCe = "0.0.1-SNAPSHOT" +edc = "0.2.1.4" +quartz = "2.3.2" +shadow = "7.1.2" +junit = "5.10.0" +flyway = "9.0.1" +jsonAssert = "1.5.1" quarkus = "3.9.2" quarkus-keycloakAdminClientReactive = "3.6.6" @@ -26,9 +37,16 @@ mockitoKotlin = "5.1.0" awaitility = "4.2.1" commons-lang = "3.14.0" +restAssured = "5.4.0" [libraries] sovity-edc-wrapperCommonApi = { module = "de.sovity.edc:wrapper-common-api", version.ref = "sovity-edcCe" } +sovity-edc-wrapperCommonMappers = { module = "de.sovity.edc:wrapper-common-mappers", version.ref = "sovity-edcCe" } +sovity-edc-jsonAndJsonLdUtils = { module = "de.sovity.edc:json-and-jsonld-utils", version.ref = "sovity-edcCe" } +sovity-edc-catalogParser = { module = "de.sovity.edc:catalog-parser", version.ref = "sovity-edcCe" } +sovity-edc-config = { module = "de.sovity.edc:config", version.ref = "sovity-edcCe" } +sovity-edc-ext-postgresFlywayCore = { module = "de.sovity.edc.ext:postgres-flyway-core", version.ref = "sovity-edcCe" } +sovity-edc-ext-testUtils = { module = "de.sovity.edc.ext:test-utils", version.ref = "sovity-edcCe" } quarkus-universeBom = { module = "io.quarkus.platform:quarkus-bom", version.ref = "quarkus" } quarkus-keycloakAdminClientReactive = { module = "io.quarkus:quarkus-keycloak-admin-client-reactive", version.ref = "quarkus-keycloakAdminClientReactive" } @@ -44,6 +62,8 @@ postgresql = { module = "org.postgresql:postgresql", version.ref = "postgresql" swaggerCore-annotations = { module = "io.swagger.core.v3:swagger-annotations-jakarta", version.ref = "swaggerCore" } swaggerCore-jaxrs2 = { module = "io.swagger.core.v3:swagger-jaxrs2-jakarta", version.ref = "swaggerCore" } testcontainers-postgresql = { module = "org.testcontainers:postgresql", version.ref = "testcontainers" } +testcontainers-testcontainers = { module = "org.testcontainers:testcontainers", version.ref = "testcontainers" } +testcontainers-junitJupiter = { module = "org.testcontainers:junit-jupiter", version.ref = "testcontainers" } assertj-core = { module = "org.assertj:assertj-core", version.ref = "assertj" } mockito-core = { module = "org.mockito:mockito-core", version.ref = "mockito" } @@ -54,6 +74,68 @@ awaitility = { module = "org.awaitility:awaitility", version.ref = "awaitility" commons-lang3 = { module = "org.apache.commons:commons-lang3", version.ref = "commons-lang" } +edc-apiCore = { module = "org.eclipse.edc:api-core", version.ref = "edc" } +edc-apiObservability = { module = "org.eclipse.edc:api-observability", version.ref = "edc" } +edc-authSpi = { module = "org.eclipse.edc:auth-spi", version.ref = "edc" } +edc-authTokenbased = { module = "org.eclipse.edc:auth-tokenbased", version.ref = "edc" } +edc-boot = { module = "org.eclipse.edc:boot", version.ref = "edc" } +edc-configurationFilesystem = { module = "org.eclipse.edc:configuration-filesystem", version.ref = "edc" } +edc-connectorCore = { module = "org.eclipse.edc:connector-core", version.ref = "edc" } +edc-contractDefinitionApi = { module = "org.eclipse.edc:contract-definition-api", version.ref = "edc" } +edc-contractNegotiationStoreSql = { module = "org.eclipse.edc:contract-negotiation-store-sql", version.ref = "edc" } +edc-contractSpi = { module = "org.eclipse.edc:contract-spi", version.ref = "edc" } +edc-controlPlaneAggregateServices = { module = "org.eclipse.edc:control-plane-aggregate-services", version.ref = "edc" } +edc-controlPlaneCore = { module = "org.eclipse.edc:control-plane-core", version.ref = "edc" } +edc-controlPlaneSpi = { module = "org.eclipse.edc:control-plane-spi", version.ref = "edc" } +edc-controlPlaneSql = { module = "org.eclipse.edc:control-plane-sql", version.ref = "edc" } +edc-coreSpi = { module = "org.eclipse.edc:core-spi", version.ref = "edc" } +edc-dataPlaneCore = { module = "org.eclipse.edc:data-plane-core", version.ref = "edc" } +edc-dataPlaneFramework = { module = "org.eclipse.edc:data-plane-framework", version.ref = "edc" } +edc-dataPlaneHttp = { module = "org.eclipse.edc:data-plane-http", version.ref = "edc" } +edc-dataPlaneSelectorClient = { module = "org.eclipse.edc:data-plane-selector-client", version.ref = "edc" } +edc-dataPlaneSelectorCore = { module = "org.eclipse.edc:data-plane-selector-core", version.ref = "edc" } +edc-dataPlaneUtil = { module = "org.eclipse.edc:data-plane-util", version.ref = "edc" } +edc-dsp = { module = "org.eclipse.edc:dsp", version.ref = "edc" } +edc-dspApiConfiguration = { module = "org.eclipse.edc:dsp-api-configuration", version.ref = "edc" } +edc-dspNegotiationTransform = { module = "org.eclipse.edc:dsp-negotiation-transform", version.ref = "edc" } +edc-dspHttpSpi = { module = "org.eclipse.edc:dsp-http-spi", version.ref = "edc" } +edc-dspHttpCore = { module = "org.eclipse.edc:dsp-http-core", version.ref = "edc" } +edc-http = { module = "org.eclipse.edc:http", version.ref = "edc" } +edc-httpSpi = { module = "org.eclipse.edc:http-spi", version.ref = "edc" } +edc-iamMock = { module = "org.eclipse.edc:iam-mock", version.ref = "edc" } +edc-jsonLd = { module = "org.eclipse.edc:json-ld", version.ref = "edc" } +edc-jsonLdSpi = { module = "org.eclipse.edc:json-ld-spi", version.ref = "edc" } +edc-junit = { module = "org.eclipse.edc:junit", version.ref = "edc" } +edc-managementApi = { module = "org.eclipse.edc:management-api", version.ref = "edc" } +edc-managementApiConfiguration = { module = "org.eclipse.edc:management-api-configuration", version.ref = "edc" } +edc-monitorJdkLogger = { module = "org.eclipse.edc:monitor-jdk-logger", version.ref = "edc" } +edc-oauth2Core = { module = "org.eclipse.edc:oauth2-core", version.ref = "edc" } +edc-policyDefinitionApi = { module = "org.eclipse.edc:policy-definition-api", version.ref = "edc" } +edc-policyEngineSpi = { module = "org.eclipse.edc:policy-engine-spi", version.ref = "edc" } +edc-policyModel = { module = "org.eclipse.edc:policy-model", version.ref = "edc" } +edc-policySpi = { module = "org.eclipse.edc:policy-spi", version.ref = "edc" } +edc-runtimeMetamodel = { module = "org.eclipse.edc:runtime-metamodel", version = "0.2.1" } +edc-sqlCore = { module = "org.eclipse.edc:sql-core", version.ref = "edc" } +edc-transactionLocal = { module = "org.eclipse.edc:transaction-local", version.ref = "edc" } +edc-transferDataPlane = { module = "org.eclipse.edc:transfer-data-plane", version.ref = "edc" } +edc-transferProcessApi = { module = "org.eclipse.edc:transfer-process-api", version.ref = "edc" } +edc-transferProcessStoreSql = { module = "org.eclipse.edc:transfer-process-store-sql", version.ref = "edc" } +edc-transferSpi = { module = "org.eclipse.edc:transfer-spi", version.ref = "edc" } +edc-transformCore = { module = "org.eclipse.edc:transform-core", version.ref = "edc" } +edc-transformSpi = { module = "org.eclipse.edc:transform-spi", version.ref = "edc" } +edc-vaultFilesystem = { module = "org.eclipse.edc:vault-filesystem", version.ref = "edc" } +quartz-quartz = { module = "org.quartz-scheduler:quartz", version.ref = "quartz" } + +junit-api = { module = "org.junit.jupiter:junit-jupiter-api", version.ref = "junit" } +junit-engine = { module = "org.junit.jupiter:junit-jupiter-engine", version.ref = "junit" } +junit-params = { module = "org.junit.jupiter:junit-jupiter-params", version.ref = "junit" } + +jsonAssert = { module = "org.skyscreamer:jsonassert", version.ref = "jsonAssert" } + +flyway-core = { module = "org.flywaydb:flyway-core", version.ref = "flyway" } + +restAssured-restAssured = { module = "io.rest-assured:rest-assured", version.ref = "restAssured" } + [bundles] mockito = ["mockito-core", "mockito-kotlin", "mockito-junitJupiter"] assertj = ["assertj-core"] @@ -63,5 +145,7 @@ quarkus = { id = "io.quarkus", version.ref = "quarkus" } flyway = { id = "org.flywaydb.flyway", version = "9.20.1" } jooq = { id = "nu.studer.jooq", version = "7.1.1" } +shadow = { id = "com.github.johnrengelman.shadow", version.ref = "shadow" } + openapiYamlGen = { id = "io.swagger.core.v3.swagger-gradle-plugin", version = "2.2.21" } openapiCodegen = { id = "org.openapi.generator", version = "6.6.0" } diff --git a/authority-portal-backend/settings.gradle.kts b/authority-portal-backend/settings.gradle.kts index d613ba1f8..a8c09d6e2 100644 --- a/authority-portal-backend/settings.gradle.kts +++ b/authority-portal-backend/settings.gradle.kts @@ -11,3 +11,7 @@ pluginManagement { include(":authority-portal-api") include(":authority-portal-db") include(":authority-portal-quarkus") +include(":catalog-crawler") +include(":catalog-crawler:catalog-crawler") +include(":catalog-crawler:catalog-crawler-launcher-base") +include(":catalog-crawler:catalog-crawler-ce-launcher") diff --git a/docs/deployment-guide/goals/production/README.md b/docs/deployment-guide/goals/production/README.md index 4b93a2e80..d77e62741 100644 --- a/docs/deployment-guide/goals/production/README.md +++ b/docs/deployment-guide/goals/production/README.md @@ -42,16 +42,16 @@ The respective compatible versions can be found in the [CHANGELOG.md](../../../. ### Deployment Units -| Deployment Unit | Version / Details | -|---------------------------|----------------------------------------------------------------------------------------------------------| -| Reverse Proxy / Ingress | _Infrastructure dependent_ | -| Keycloak Deployment | Version 24.0.4 or compatible version | -| OAuth2 Proxy | quay.io/oauth2-proxy/oauth2-proxy:7.5.0 | -| Caddy behind OAuth2 Proxy | caddy:2.7 | -| Authority Portal Backend | authority-portal-backend, see [CHANGELOG.md](../../../../CHANGELOG.md) for compatible versions. | -| Authority Portal Frontend | authority-portal-frontend, see [CHANGELOG.md](../../../../CHANGELOG.md) for compatible versions. | -| Catalog Crawler | ghcr.io/sovity/catalog-crawler-ce, see [CHANGELOG.md](../../../../CHANGELOG.md) for compatible versions. | -| Postgresql | Version 16 or compatible version | +| Deployment Unit | Version / Details | +|---------------------------------------|---------------------------------------------------------------------------------------------------| +| Reverse Proxy / Ingress | _Infrastructure dependent_ | +| Keycloak Deployment | Version 24.0.4 or compatible version | +| OAuth2 Proxy | quay.io/oauth2-proxy/oauth2-proxy:7.5.0 | +| Caddy behind OAuth2 Proxy | caddy:2.7 | +| Authority Portal Backend | authority-portal-backend, see [CHANGELOG.md](../../../../CHANGELOG.md) for compatible versions. | +| Authority Portal Frontend | authority-portal-frontend, see [CHANGELOG.md](../../../../CHANGELOG.md) for compatible versions. | +| Catalog Crawler (one per environment) | authority-portal-crawler, see [CHANGELOG.md](../../../../CHANGELOG.md) for compatible versions. | +| Postgresql | Version 16 or compatible version | ### Configuration @@ -324,7 +324,82 @@ AUTHORITY_PORTAL_FRONTEND_PORTAL_DISPLAY_NAME: "Authority Portal" # Portal name - Each deployment environment requires a Data Catalog Crawler. - A Data Catalog Crawler is based on the EDC Connector and crawls the catalogs of all connectors in the dataspace. - You will need an SKI/AKI client ID to register the crawler. Please refer to the [EDC documentation](https://github.com/sovity/edc-ce/tree/main/docs/getting-started#faq) on how to generate one. -- See the [Catalog Crawler Productive Deployment Guide](https://github.com/sovity/edc-ce/blob/v10.4.1/docs/deployment-guide/goals/catalog-crawler-production/README.md) + +#### Reverse Proxy Configuration + +- The catalog crawler is meant to be served via TLS/HTTPS. +- The catalog crawler is meant to be deployed with a reverse proxy terminating TLS / providing HTTPS. +- All requests are meant to be redirected to the deployment's `11003` port. + +#### Catalog Crawler Configuration + +A productive configuration will require you to join a DAPS. + +For that you will need a SKI/AKI client ID. Please refer +to [edc-extension's Getting Started Guide](https://github.com/sovity/edc-ce/tree/main/docs/getting-started#faq) +on how to generate one. + +The DAPS needs to contain the claim `referringConnector=broker` for the broker. +Although it is discouraged to do so, the expected value `broker` could be overridden by specifying a different value for `MY_EDC_PARTICIPANT_ID`. + +```yaml +# Required: Fully Qualified Domain Name +MY_EDC_FQDN: "crawler.test.example.com" + +# Required: Authority Portal Environment ID +CRAWLER_ENVIRONMENT_ID: test + +# Required: Authority Portal Postgresql DB Access +CRAWLER_DB_JDBC_URL: jdbc:postgresql://authority-portal:5432/portal +CRAWLER_DB_JDBC_USER: portal +CRAWLER_DB_JDBC_PASSWORD: portal + +# Required: DAPS credentials +EDC_OAUTH_TOKEN_URL: 'https://daps.test.mobility-dataspace.eu/token' +EDC_OAUTH_PROVIDER_JWKS_URL: 'https://daps.test.mobility-dataspace.eu/jwks' +EDC_OAUTH_CLIENT_ID: '_your SKI/AKI_' +EDC_KEYSTORE: '_your keystore file_' # Needs to be available as file in the running container +EDC_KEYSTORE_PASSWORD: '_your keystore password_' +EDC_OAUTH_CERTIFICATE_ALIAS: 1 +EDC_OAUTH_PRIVATE_KEY_ALIAS: 1 +``` + +You can also optionally override the following defaults: + +```yaml +# Database Connection Pool Size +CRAWLER_DB_CONNECTION_POOL_SIZE: 30 + +# Database Connection Timeout (in ms) +CRAWLER_DB_CONNECTION_TIMEOUT_IN_MS: 30000 + +# CRON interval for crawling ONLINE connectors +CRAWLER_CRON_ONLINE_CONNECTOR_REFRESH: */20 * * ? * * + +# CRON interval for crawling OFFLINE connectors +CRAWLER_CRON_OFFLINE_CONNECTOR_REFRESH: 0 */5 * ? * * + +# CRON interval for crawling DEAD connectors +CRAWLER_CRON_DEAD_CONNECTOR_REFRESH: 0 0 * ? * * + +# CRON interval for marking connectors as DEAD +CRAWLER_SCHEDULED_KILL_OFFLINE_CONNECTORS: 0 0 2 ? * * + +# Delete data offers / mark as dead after connector has been offline for: +CRAWLER_KILL_OFFLINE_CONNECTORS_AFTER: P5D + +# Hide data offers after connector has been offline for: +CRAWLER_HIDE_OFFLINE_DATA_OFFERS_AFTER: P1D + +# Parallelization for Crawling +CRAWLER_NUM_THREADS: 32 + +# Maximum number of Data Offers per Connector +CRAWLER_MAX_DATA_OFFERS_PER_CONNECTOR: 50 + +# Maximum number of Contract Offers per Data Offer +CRAWLER_MAX_CONTRACT_OFFERS_PER_DATA_OFFER: 10 +``` ## Initial Setup