Skip to content

Commit

Permalink
feat: catalog crawler (#358, moved here from CE)
Browse files Browse the repository at this point in the history
  • Loading branch information
richardtreier authored Nov 25, 2024
2 parents c6709ef + ef4478d commit bb6b794
Show file tree
Hide file tree
Showing 100 changed files with 5,970 additions and 16 deletions.
75 changes: 75 additions & 0 deletions .github/actions/build-connector-image/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
name: "Build EDC Connector Image"
description: "Builds and deploys the React frontend to AWS S3"
inputs:
registry-url:
required: true
description: "Docker Registry"
registry-user:
required: true
description: "Docker Registry Login Username"
registry-password:
required: true
description: "Docker Registry Login Password"
image-base-name:
required: true
description: "Docker Image Base Name (Company)"
image-name:
required: true
description: "Docker Image Name (Artifact Name)"
connector-name:
required: true
description: "EDC Connector Name in launchers/connectors/{connector-name}"
title:
required: true
description: "Docker Image Title"
description:
required: true
description: "Docker Image Description"
runs:
using: "composite"
steps:
- name: "Docker: Log in to the Container registry"
uses: docker/login-action@v2
with:
registry: ${{ inputs.registry-url }}
username: ${{ inputs.registry-user }}
password: ${{ inputs.registry-password }}
- name: "Docker: Store last commit info and build date"
id: last-commit-information
shell: bash
run: |
echo "LAST_COMMIT_INFO<<EOF" >> $GITHUB_ENV
export LAST_COMMIT_INFO=$(git log -1)
echo "$LAST_COMMIT_INFO" >> $GITHUB_ENV
echo "EOF" >> $GITHUB_ENV
echo "BUILD_DATE=$(date --utc +%FT%TZ)" >> $GITHUB_ENV
- name: "Docker: Extract metadata (tags, labels)"
id: meta
uses: docker/metadata-action@v4
with:
images: ${{ inputs.registry-url }}/${{ inputs.image-base-name }}/${{ inputs.image-name }}
labels: |
org.opencontainers.image.title=${{ inputs.title }}
org.opencontainers.image.description=${{ inputs.description }}
tags: |
type=schedule
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=semver,pattern={{major}}
type=ref,event=branch
type=ref,event=pr
type=sha
type=raw,value=latest,enable={{is_default_branch}}
type=raw,value=release,enable=${{ startsWith(github.ref, 'refs/tags/') }}
- name: "Docker: Build and Push"
uses: docker/build-push-action@v5
with:
file: authority-portal-backend/catalog-crawler/Dockerfile
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
build-args: |
CONNECTOR_NAME=${{ inputs.connector-name }}
"EDC_LAST_COMMIT_INFO_ARG=${{ env.LAST_COMMIT_INFO }}"
EDC_BUILD_DATE_ARG=${{ env.BUILD_DATE }}
14 changes: 12 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ jobs:
type=sha
type=raw,value=latest,enable={{is_default_branch}}
type=raw,value=release,enable=${{ startsWith(github.ref, 'refs/tags/') }}
- name: "Docker: Build and Push Image"
- name: "Docker: Build and Push Image (authority-portal-backend)"
uses: docker/build-push-action@v4
with:
file: authority-portal-backend/authority-portal-quarkus/src/main/docker/Dockerfile.jvm
Expand All @@ -101,7 +101,17 @@ jobs:
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
network: host

- name: "Docker: Build and Push Image (authority-portal-crawler)"
uses: ./.github/actions/build-connector-image
with:
registry-url: ${{ env.REGISTRY }}
registry-user: ${{ github.actor }}
registry-password: ${{ secrets.GITHUB_TOKEN }}
image-base-name: ${{ env.IMAGE_NAME_BASE }}
image-name: "authority-portal-crawler"
connector-name: "catalog-crawler-ce"
title: "Catalog Crawler (Community Edition, DAPS)"
description: "sovity CE Catalog crawler for the sovity CE Authority Portal. Requires DAPS dataspace credentials to join an existing dataspace."

frontend:
name: Frontend
Expand Down
8 changes: 7 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ please see [changelog_updates.md](docs/dev/changelog_updates.md).

#### Major

- The Catalog Crawler has been moved to the AP repository.

#### Minor

#### Patch
Expand All @@ -25,11 +27,15 @@ please see [changelog_updates.md](docs/dev/changelog_updates.md).

### Deployment Migration Notes

- The Crawler image name and version changed due to the crawler being moved into the AP repository and versions being aligned
- Previously: `ghcr.io/sovity/catalog-crawler-ce`
- Now: `ghcr.io/sovity/authority-portal-crawler`

#### Compatible Versions

- Authority Portal Backend Docker Image: `ghcr.io/sovity/authority-portal-backend:{{ version }}`
- Authority Portal Frontend Docker Image: `ghcr.io/sovity/authority-portal-frontend:{{ version }}`
- Catalog Crawler CE: `ghcr.io/sovity/catalog-crawler-ce:{{ CE VERSION }}`
- Catalog Crawler CE: `ghcr.io/sovity/authority-portal-crawler:{{ version }}`
- Sovity EDC CE: {{ CE Release Link }}

## [v4.1.2] - 2024-09-26
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ import de.sovity.authorityportal.api.model.CentralComponentCreateRequest
import de.sovity.authorityportal.api.model.CentralComponentDto
import de.sovity.authorityportal.api.model.ComponentStatusOverview
import de.sovity.authorityportal.api.model.ConfigureProvidedConnectorWithCertificateRequest
import de.sovity.authorityportal.api.model.ConfigureProvidedConnectorWithJwksRequest
import de.sovity.authorityportal.api.model.ConnectorDetailsDto
import de.sovity.authorityportal.api.model.ConnectorOverviewResult
import de.sovity.authorityportal.api.model.CreateCaasRequest
import de.sovity.authorityportal.api.model.CreateConnectorRequest
import de.sovity.authorityportal.api.model.ConfigureProvidedConnectorWithJwksRequest
import de.sovity.authorityportal.api.model.CreateConnectorResponse
import de.sovity.authorityportal.api.model.DeploymentEnvironmentDto
import de.sovity.authorityportal.api.model.IdResponse
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package de.sovity.authorityportal.web.thirdparty.daps
import de.sovity.authorityportal.web.environment.DeploymentEnvironmentConfiguration.DeploymentEnvironment.DapsConfig
import de.sovity.authorityportal.web.thirdparty.daps.ext.CustomKeycloakResource
import de.sovity.authorityportal.web.thirdparty.daps.ext.instantiateResource
import io.quarkus.logging.Log
import org.keycloak.admin.client.KeycloakBuilder
import org.keycloak.representations.idm.ClientRepresentation
import org.keycloak.representations.idm.ProtocolMapperRepresentation
Expand Down Expand Up @@ -43,7 +44,9 @@ class DapsClient(dapsConfig: DapsConfig): AutoCloseable {
}

fun createClient(clientId: String) {
Log.info("Creating client $clientId in realm $realmName")
keycloak.realm(realmName).clients().create(buildClientRepresentation(clientId))
Log.info("Client $clientId created in realm $realmName")
}

fun deleteClient(clientId: String) {
Expand All @@ -66,6 +69,7 @@ class DapsClient(dapsConfig: DapsConfig): AutoCloseable {
}

fun addJwksUrl(clientId: String, jwksUrl: String) {
Log.info("Getting client $clientId in realm $realmName")
val client = getClientById(clientId) ?: error("Client not found")

client.attributes["jwks.url"] = jwksUrl
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ quarkus.arc.exclude-types=io.swagger.v3.jaxrs2.**
%test.quarkus.log.console.json=false
%test.quarkus.otel.traces.exporter=none

quarkus.otel.sdk.disabled=true

# Rest Client
quarkus.rest-client.http2=true

Expand Down
4 changes: 4 additions & 0 deletions authority-portal-backend/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ subprojects {
password = project.findProperty("gpr.key") as String? ?: System.getenv("GPR_KEY")
}
}
maven {
url = uri("https://pkgs.dev.azure.com/sovity/41799556-91c8-4df6-8ddb-4471d6f15953/_packaging/core-edc/maven/v1")
name = "AzureRepo"
}
}

configurations.all {
Expand Down
34 changes: 34 additions & 0 deletions authority-portal-backend/catalog-crawler/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
FROM eclipse-temurin:17-jre-alpine

# Install curl for healthcheck, bash for entrypoint
RUN apk add --no-cache curl bash
SHELL ["/bin/bash", "-c"]

# Use a non-root user
RUN adduser -D -H -s /sbin/nologin edc
USER edc:edc

# Which app.jar to include
ARG CONNECTOR_NAME="catalog-crawler-ce"

# For last-commit-info extension
ARG EDC_LAST_COMMIT_INFO_ARG="The docker container was built outside of github actions and you didn't provide the build arg EDC_LAST_COMMIT_INFO_ARG, so there's no last commit info."
ARG EDC_BUILD_DATE_ARG="The docker container was built outside of github actions and you didn't provide the build arg EDC_BUILD_DATE_ARG, so there's no build date."

WORKDIR /app
COPY ./authority-portal-backend/catalog-crawler/${CONNECTOR_NAME}-launcher/build/libs/app.jar /app
COPY ./authority-portal-backend/catalog-crawler/logging.properties /app
COPY ./authority-portal-backend/catalog-crawler/logging.dev.properties /app

RUN touch /app/empty-properties-file.properties

ENV EDC_LAST_COMMIT_INFO=$EDC_LAST_COMMIT_INFO_ARG
ENV EDC_BUILD_DATE=$EDC_BUILD_DATE_ARG
ENV JVM_ARGS=""

COPY ./authority-portal-backend/catalog-crawler/docker-entrypoint.sh /app/entrypoint.sh
ENTRYPOINT ["/app/entrypoint.sh"]
CMD ["start"]

# health status is determined by the availability of the /health endpoint
HEALTHCHECK --interval=5s --timeout=5s --retries=10 CMD curl -H "x-api-key: $EDC_API_AUTH_KEY" --fail http://localhost:11001/api/check/health || curl -H "x-api-key: $EDC_API_AUTH_KEY" --fail http://localhost:11001/backend/api/check/health
41 changes: 41 additions & 0 deletions authority-portal-backend/catalog-crawler/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
<!-- PROJECT LOGO -->
<br />
<div align="center">
<a href="https://github.com/sovity/authority-portal">
<img src="https://raw.githubusercontent.com/sovity/edc-ui/main/src/assets/images/sovity_logo.svg" alt="Logo" width="300">
</a>

<h3 align="center">EDC-Connector Extension:<br />Catalog Crawler</h3>

<p align="center">
<a href="https://github.com/sovity/authority-portal/issues/new?assignees=&labels=kind%2Fbug&projects=&template=bug_report.yaml">Report Bug</a>
·
<a href="https://github.com/sovity/edc-ce/issues/new?template=feature_request.md">Request Feature</a>
</p>
</div>

## About this Extension

The Catalog Crawler is an additional deployment unit needed to determine the online status of registered connectors and populate the Data Catalog:

- It is a modified EDC connector with the task to crawl the other connectors' public data offers.
- It periodically checks the Authority Portal's connector list for its environment.
- It crawls the given connectors in regular intervals.
- It writes the data offers and connector statuses into the Authority Portal DB.
- Each environment configured in the Authority Portal requires its own Catalog Crawler with credentials for that environment's DAPS.

## Why does this component exist?

The Authority Portal uses a non-EDC stack and thus it cannot read the catalogs of participating connectors directly.

## Deployment

Please see the [Productive Deployment Guide](../../docs/deployment-guide/goals/production/README.md) for more information.

## License

Apache License 2.0 - see [LICENSE](../../LICENSE)

## Contact

sovity GmbH - [email protected]
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
plugins {
`java-library`
id("application")
alias(libs.plugins.shadow)
}

dependencies {
implementation(project(":catalog-crawler:catalog-crawler-launcher-base"))

api(libs.edc.monitorJdkLogger)
api(libs.edc.apiObservability)

implementation(libs.edc.oauth2Core)
implementation(libs.edc.vaultFilesystem)
}

application {
mainClass.set("de.sovity.edc.ext.catalog.crawler.Main")
}

tasks.withType<com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar> {
mergeServiceFiles()
archiveFileName.set("app.jar")
}

group = libs.versions.sovityEdcGroup.get()
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
plugins {
`java-library`
}

dependencies {
// A minimal EDC that can request catalogs
api(libs.edc.controlPlaneCore)
api(libs.edc.dataPlaneSelectorCore)
api(libs.edc.configurationFilesystem)
api(libs.edc.controlPlaneAggregateServices)
api(libs.edc.http)
api(libs.edc.dsp)
api(libs.edc.jsonLd)

// Data Catalog Crawler
api(project(":catalog-crawler:catalog-crawler"))
}

group = libs.versions.sovityEdcGroup.get()
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
plugins {
`java-library`
}

dependencies {
annotationProcessor(libs.lombok)
compileOnly(libs.lombok)

implementation(libs.edc.controlPlaneSpi)
implementation(libs.edc.managementApiConfiguration)

implementation(libs.quartz.quartz)
implementation(libs.commons.lang3)
implementation(libs.quarkus.jooq)

api(libs.sovity.edc.catalogParser)
api(libs.sovity.edc.jsonAndJsonLdUtils)
api(libs.sovity.edc.wrapperCommonMappers)
api(libs.sovity.edc.ext.postgresFlywayCore)
api(libs.sovity.edc.config)
api(project(":authority-portal-db"))

testAnnotationProcessor(libs.lombok)
testCompileOnly(libs.lombok)
testImplementation(libs.sovity.edc.ext.testUtils)
testImplementation(libs.assertj.core)
testImplementation(libs.mockito.core)
testImplementation(libs.restAssured.restAssured)
testImplementation(libs.testcontainers.testcontainers)
testImplementation(libs.flyway.core)
testImplementation(libs.testcontainers.junitJupiter)
testImplementation(libs.testcontainers.postgresql)
testImplementation(libs.junit.api)
testImplementation(libs.jsonAssert)
testRuntimeOnly(libs.junit.engine)
}

tasks.getByName<Test>("test") {
useJUnitPlatform()
maxParallelForks = 1
}
Loading

0 comments on commit bb6b794

Please sign in to comment.