From 8cd088d53b37e0dde4ce1891776491e8d8a8c7da Mon Sep 17 00:00:00 2001 From: Chaoyi Sun Date: Tue, 22 Oct 2024 12:07:45 -0700 Subject: [PATCH] #421 data/model lineage java17 upgrade --- build-parent/pom.xml | 3 ++ .../pom.xml | 16 ++------ .../src/main/resources/docker/Dockerfile | 4 +- .../foundation-data-lineage-consumer/pom.xml | 14 +++---- .../foundation-data-lineage-java/pom.xml | 41 +++++++++++++++---- .../aissemble/data/lineage/DatasetFacet.java | 2 +- .../aissemble/data/lineage/JobFacet.java | 2 +- .../aissemble/data/lineage/RunEvent.java | 3 +- .../src/main/resources/profiles.json | 3 -- .../mlflow.lineage.properties.vm | 3 +- .../general-mlflow/training.base.py.vm | 21 +++++++--- 11 files changed, 66 insertions(+), 46 deletions(-) diff --git a/build-parent/pom.xml b/build-parent/pom.xml index 31da65155..c44856a56 100644 --- a/build-parent/pom.xml +++ b/build-parent/pom.xml @@ -158,6 +158,9 @@ 6.14.3 3.12.0 2.0 + + + 1.23.0 diff --git a/extensions/extensions-data-lineage/extensions-data-lineage-http-consumer-service/pom.xml b/extensions/extensions-data-lineage/extensions-data-lineage-http-consumer-service/pom.xml index 78b78e8c9..c978edde7 100644 --- a/extensions/extensions-data-lineage/extensions-data-lineage-http-consumer-service/pom.xml +++ b/extensions/extensions-data-lineage/extensions-data-lineage-http-consumer-service/pom.xml @@ -42,14 +42,6 @@ io.quarkus quarkus-smallrye-fault-tolerance - - io.quarkus - quarkus-resteasy-reactive-jackson - - - io.quarkus - quarkus-smallrye-openapi - io.quarkus quarkus-rest-client-reactive-jackson @@ -96,9 +88,9 @@ test - com.github.tomakehurst - wiremock-jre8 - 2.35.1 + org.wiremock + wiremock-standalone + 3.9.1 test @@ -109,7 +101,7 @@ io.openlineage openlineage-java - 0.22.0 + ${version.open.lineage.java} diff --git a/extensions/extensions-docker/aissemble-nvidia/src/main/resources/docker/Dockerfile b/extensions/extensions-docker/aissemble-nvidia/src/main/resources/docker/Dockerfile index 9f36255ad..d91cef258 100644 --- a/extensions/extensions-docker/aissemble-nvidia/src/main/resources/docker/Dockerfile +++ b/extensions/extensions-docker/aissemble-nvidia/src/main/resources/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM docker.io/nvidia/cuda:12.1.1-base-ubuntu22.04 +FROM docker.io/nvidia/cuda:12.6.2-base-ubuntu22.04 LABEL org.opencontainers.image.source="https://github.com/boozallen/aissemble" @@ -11,7 +11,7 @@ RUN chmod 755 /bin/uname RUN apt-get update \ && apt-get upgrade -y \ - && apt-get install -y openjdk-11-jdk \ + && apt-get install -y openjdk-17-jdk \ && update-ca-certificates \ && rm -rf /var/lib/apt/lists/* \ && apt-get clean \ No newline at end of file diff --git a/foundation/foundation-lineage/foundation-data-lineage/foundation-data-lineage-consumer/pom.xml b/foundation/foundation-lineage/foundation-data-lineage/foundation-data-lineage-consumer/pom.xml index 4da934678..76869c708 100644 --- a/foundation/foundation-lineage/foundation-data-lineage/foundation-data-lineage-consumer/pom.xml +++ b/foundation/foundation-lineage/foundation-data-lineage/foundation-data-lineage-consumer/pom.xml @@ -55,20 +55,16 @@ foundation-data-lineage-java ${project.version} - - org.apache.commons - commons-lang3 - 3.12.0 - - - org.apache.logging.log4j - log4j-core - com.boozallen.aissemble foundation-messaging-java ${project.version} + + io.openlineage + openlineage-java + ${version.open.lineage.java} + diff --git a/foundation/foundation-lineage/foundation-data-lineage/foundation-data-lineage-java/pom.xml b/foundation/foundation-lineage/foundation-data-lineage/foundation-data-lineage-java/pom.xml index 92cf72376..0941c760e 100644 --- a/foundation/foundation-lineage/foundation-data-lineage/foundation-data-lineage-java/pom.xml +++ b/foundation/foundation-lineage/foundation-data-lineage/foundation-data-lineage-java/pom.xml @@ -49,20 +49,12 @@ io.openlineage openlineage-java - 0.22.0 + ${version.open.lineage.java} org.technologybrewery.krausening krausening - - org.apache.commons - commons-lang3 - - - org.apache.logging.log4j - log4j-core - com.boozallen.aissemble foundation-messaging-java @@ -73,6 +65,25 @@ foundation-common ${project.version} + + io.smallrye.reactive + smallrye-reactive-messaging-provider + ${version.smallrye.reactive.messaging} + + + io.smallrye.reactive + smallrye-reactive-messaging-api + ${version.smallrye.reactive.messaging} + + + com.boozallen.aissemble + foundation-core-java + ${project.version} + + + org.slf4j + slf4j-api + @@ -99,6 +110,18 @@ ${version.cucumber.reporting.plugin} test + + org.jboss.weld.se + weld-se-core + ${version.weld} + test + + + org.jboss.weld + weld-api + 5.0.SP3 + test + diff --git a/foundation/foundation-lineage/foundation-data-lineage/foundation-data-lineage-java/src/main/java/com/boozallen/aissemble/data/lineage/DatasetFacet.java b/foundation/foundation-lineage/foundation-data-lineage/foundation-data-lineage-java/src/main/java/com/boozallen/aissemble/data/lineage/DatasetFacet.java index 9b318c500..2fd3de8e3 100644 --- a/foundation/foundation-lineage/foundation-data-lineage/foundation-data-lineage-java/src/main/java/com/boozallen/aissemble/data/lineage/DatasetFacet.java +++ b/foundation/foundation-lineage/foundation-data-lineage/foundation-data-lineage-java/src/main/java/com/boozallen/aissemble/data/lineage/DatasetFacet.java @@ -57,7 +57,7 @@ public class DefaultDatasetFacet extends OpenLineage.DefaultDatasetFacet { private final URI schemaUrl; public DefaultDatasetFacet(String schemaUrl, URI producer) { - super(producer); + super(producer, null); this.schemaUrl = URI.create(schemaUrl); } diff --git a/foundation/foundation-lineage/foundation-data-lineage/foundation-data-lineage-java/src/main/java/com/boozallen/aissemble/data/lineage/JobFacet.java b/foundation/foundation-lineage/foundation-data-lineage/foundation-data-lineage-java/src/main/java/com/boozallen/aissemble/data/lineage/JobFacet.java index 69558f40f..d698995af 100644 --- a/foundation/foundation-lineage/foundation-data-lineage/foundation-data-lineage-java/src/main/java/com/boozallen/aissemble/data/lineage/JobFacet.java +++ b/foundation/foundation-lineage/foundation-data-lineage/foundation-data-lineage-java/src/main/java/com/boozallen/aissemble/data/lineage/JobFacet.java @@ -57,7 +57,7 @@ public class DefaultJobFacet extends OpenLineage.DefaultJobFacet { private final URI schemaUrl; public DefaultJobFacet(String schemaUrl, URI producer) { - super(producer); + super(producer, null); this.schemaUrl = URI.create(schemaUrl); } diff --git a/foundation/foundation-lineage/foundation-data-lineage/foundation-data-lineage-java/src/main/java/com/boozallen/aissemble/data/lineage/RunEvent.java b/foundation/foundation-lineage/foundation-data-lineage/foundation-data-lineage-java/src/main/java/com/boozallen/aissemble/data/lineage/RunEvent.java index 2366dd4dc..6747a3b5a 100644 --- a/foundation/foundation-lineage/foundation-data-lineage/foundation-data-lineage-java/src/main/java/com/boozallen/aissemble/data/lineage/RunEvent.java +++ b/foundation/foundation-lineage/foundation-data-lineage/foundation-data-lineage-java/src/main/java/com/boozallen/aissemble/data/lineage/RunEvent.java @@ -69,8 +69,7 @@ public OpenLineage.RunEvent getOpenLineageRunEvent() { }); } - return openLineage.newRunEvent(EventType.valueOf(this.eventType), - zTime, + return openLineage.newRunEvent(zTime, EventType.valueOf(this.eventType), run.getOpenLineageRun(), job.getOpenLineageJob(), !olInputs.isEmpty() ? olInputs : null, diff --git a/foundation/foundation-mda/src/main/resources/profiles.json b/foundation/foundation-mda/src/main/resources/profiles.json index 5cc37d35f..58b19795c 100644 --- a/foundation/foundation-mda/src/main/resources/profiles.json +++ b/foundation/foundation-mda/src/main/resources/profiles.json @@ -542,9 +542,6 @@ }, { "name": "mlflowLineageProperties" - }, - { - "name": "trainingGitkeepFile" } ] }, diff --git a/foundation/foundation-mda/src/main/resources/templates/general-docker/mlflow.lineage.properties.vm b/foundation/foundation-mda/src/main/resources/templates/general-docker/mlflow.lineage.properties.vm index 21968a7a7..ee74334e4 100644 --- a/foundation/foundation-mda/src/main/resources/templates/general-docker/mlflow.lineage.properties.vm +++ b/foundation/foundation-mda/src/main/resources/templates/general-docker/mlflow.lineage.properties.vm @@ -1,2 +1,3 @@ data.lineage.enabled=true -data.lineage.producer=${scmUrl} \ No newline at end of file +data.lineage.producer=${scmUrl} +data.lineage.namespace=default \ No newline at end of file diff --git a/foundation/foundation-mda/src/main/resources/templates/general-mlflow/training.base.py.vm b/foundation/foundation-mda/src/main/resources/templates/general-mlflow/training.base.py.vm index 46edc0861..b6efb827e 100644 --- a/foundation/foundation-mda/src/main/resources/templates/general-mlflow/training.base.py.vm +++ b/foundation/foundation-mda/src/main/resources/templates/general-mlflow/training.base.py.vm @@ -166,10 +166,11 @@ class ${pipeline.capitalizedName}Base(ABC): #if ($pipeline.trainingStep.isModelLineageEnabled()) # pylint: disable-next=assignment-from-none event_data = self.create_base_lineage_event_data() + default_namespace = self.get_default_namespace() #end start = datetime.utcnow() #if ($pipeline.trainingStep.isModelLineageEnabled()) - self.record_lineage(self.create_lineage_start_event(run_id=run_id, job_name=job_name, parent_run_facet=parent_run_facet, event_data=event_data, start_time=start)) + self.record_lineage(self.create_lineage_start_event(run_id=run_id, job_name=job_name, default_namespace=default_namespace, parent_run_facet=parent_run_facet, event_data=event_data, start_time=start)) #end loaded_dataset = self.load_dataset() prepped_dataset = self.prep_dataset(loaded_dataset) @@ -186,12 +187,12 @@ class ${pipeline.capitalizedName}Base(ABC): self.log_information(start, end, loaded_dataset, features) self.logger.info('Complete') #if ($pipeline.trainingStep.isModelLineageEnabled()) - self.record_lineage(self.create_lineage_complete_event(run_id=run_id, job_name=job_name, parent_run_facet=parent_run_facet, event_data=event_data, start_time=start, end_time=end)) + self.record_lineage(self.create_lineage_complete_event(run_id=run_id, job_name=job_name, default_namespace=default_namespace, parent_run_facet=parent_run_facet, event_data=event_data, start_time=start, end_time=end)) #end except Exception as error: #if ($pipeline.trainingStep.isModelLineageEnabled()) - self.record_lineage(self.create_lineage_fail_event(run_id=run_id, job_name=job_name, event_data=event_data, parent_run_facet=parent_run_facet, start_time=start, end_time=datetime.now(), error=error)) + self.record_lineage(self.create_lineage_fail_event(run_id=run_id, job_name=job_name, event_data=event_data, default_namespace=default_namespace, parent_run_facet=parent_run_facet, start_time=start, end_time=datetime.now(), error=error)) PipelineBase().record_pipeline_lineage_fail_event() #end raise Exception(error) @@ -237,7 +238,7 @@ class ${pipeline.capitalizedName}Base(ABC): return LineageEventData(job_facets=job_facets, run_facets=run_facets, event_inputs=[input_dataset]) - def create_lineage_start_event(self, run_id: str = None, job_name: str = "", parent_run_facet: ParentRunFacet = None, event_data: LineageEventData = None, **kwargs) -> RunEvent: + def create_lineage_start_event(self, run_id: str = None, job_name: str = "", default_namespace:str = None, parent_run_facet: ParentRunFacet = None, event_data: LineageEventData = None, **kwargs) -> RunEvent: """ Creates the Start RunEvent with given uuid, parent run facet, job name, lineage data event or any input parameters To customize the event, override the customize_lineage_start_event(...) function to include the job facets, run facets @@ -253,6 +254,7 @@ class ${pipeline.capitalizedName}Base(ABC): run_id=run_id, parent_run_facet=parent_run_facet, job_name=job_name, + default_namespace=default_namespace, event_data=event_data) event = self.customize_lineage_start_event(event, **kwargs) return self.customize_run_event(event) @@ -273,7 +275,7 @@ class ${pipeline.capitalizedName}Base(ABC): return event - def create_lineage_complete_event(self, run_id: str = None, job_name: str = "", parent_run_facet: ParentRunFacet = None, event_data: LineageEventData = None, **kwargs) -> RunEvent: + def create_lineage_complete_event(self, run_id: str = None, job_name: str = "", default_namespace:str = None, parent_run_facet: ParentRunFacet = None, event_data: LineageEventData = None, **kwargs) -> RunEvent: """ Creates the Complete RunEvent with given uuid, parent run facet, job name, lineage data event or any input parameters To customize the event, override the customize_lineage_complete_event(...) function to include the job facets, run facets @@ -289,6 +291,7 @@ class ${pipeline.capitalizedName}Base(ABC): run_id=run_id, parent_run_facet=parent_run_facet, job_name=job_name, + default_namespace=default_namespace, event_data=event_data) event = self.customize_lineage_complete_event(event, **kwargs) return self.customize_run_event(event) @@ -305,7 +308,7 @@ class ${pipeline.capitalizedName}Base(ABC): event.run.facets.update(self.record_run_end(kwargs["start_time"], kwargs["end_time"])) return event - def create_lineage_fail_event(self, run_id: str = None, job_name: str = "", parent_run_facet: ParentRunFacet = None, event_data: LineageEventData = None, **kwargs) -> RunEvent: + def create_lineage_fail_event(self, run_id: str = None, job_name: str = "", default_namespace:str = None, parent_run_facet: ParentRunFacet = None, event_data: LineageEventData = None, **kwargs) -> RunEvent: """ Creates the Fail RunEvent with given uuid, parent run facet, job name, lineage data event or any input parameters To customize the event, override the customize_lineage_fail_event(...) function to include the job facets, run facets @@ -321,6 +324,7 @@ class ${pipeline.capitalizedName}Base(ABC): run_id=run_id, parent_run_facet=parent_run_facet, job_name=job_name, + default_namespace=default_namespace, event_data=event_data) event = self.customize_lineage_fail_event(event, **kwargs) return self.customize_run_event(event) @@ -377,6 +381,11 @@ class ${pipeline.capitalizedName}Base(ABC): """ return "${pipeline.capitalizedName}.${pipeline.trainingStep.name}" + def get_default_namespace(self) -> str: + """ + The default namespace is the Pipeline name. Override this function to change the default namespace. + """ + return "${pipeline.capitalizedName}" #end def set_dataset_origin(self, origin: str) -> None: