Skip to content

Commit

Permalink
Merge branch 'main' into helm-chart-fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
perttus authored Jan 20, 2023
2 parents 68fc574 + 3492df2 commit 190b454
Show file tree
Hide file tree
Showing 516 changed files with 3,488 additions and 638 deletions.
113 changes: 113 additions & 0 deletions .circleci/api-load-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#!/bin/bash
#
# Copyright 2018-2023 contributors to the Marquez project
# SPDX-License-Identifier: Apache-2.0
#
# A script used in CI to load test HTTP API server by:
# (1) Starting HTTP API server
# (2) Generating random dataset, job, and run metadata
# (3) Running load test using k6
# (4) Writing load test results to 'k6/results' for analysis
#
# Usage: $ ./api-load-test.sh

set -e

# Build version of Marquez
readonly MARQUEZ_VERSION="0.30.0-SNAPSHOT"
# Fully qualified path to marquez.jar
readonly MARQUEZ_JAR="api/build/libs/marquez-api-${MARQUEZ_VERSION}.jar"

readonly MARQUEZ_HOST="localhost"
readonly MARQUEZ_ADMIN_PORT=8081
readonly MARQUEZ_URL="http://${MARQUEZ_HOST}:${MARQUEZ_ADMIN_PORT}"
readonly MARQUEZ_DB="marquez-db"

readonly METADATA_FILE="api/load-testing/metadata.json"
readonly METADATA_STATS_QUERY=$(cat <<-END
SELECT run_uuid,COUNT(*)
FROM lineage_events
GROUP BY run_uuid;
END
)

# marquez.yml
cat > marquez.yml <<EOF
server:
applicationConnectors:
- type: http
port: 8080
httpCompliance: RFC7230_LEGACY
adminConnectors:
- type: http
port: 8081
db:
driverClass: org.postgresql.Driver
url: jdbc:postgresql://localhost:5432/marquez
user: marquez
password: marquez
migrateOnStartup: true
EOF

log() {
echo -e "\033[1m>>\033[0m ${1}"
}

cpu_and_mem_info() {
log "CPU info:"
cat /proc/cpuinfo
log "MEM info:"
cat /proc/meminfo
}

metadata_stats() {
# Query db for metadata stats
log "load test metadata stats:"
docker exec "${MARQUEZ_DB}" \
psql -U marquez -c "${METADATA_STATS_QUERY}"
}

# Change working directory to project root
project_root=$(git rev-parse --show-toplevel)
cd "${project_root}"

# (1) Start db
log "start db:"
docker-compose -f docker-compose.db.yml up --detach

# (2) Build HTTP API server
log "build http API server..."
./gradlew --no-daemon :api:build -x test > /dev/null 2>&1

# (3) Start HTTP API server
log "start http API server..."
mkdir marquez && \
java -jar "${MARQUEZ_JAR}" server marquez.yml > marquez/http.log 2>&1 &

# (4) Wait for HTTP API server
log "waiting for http API server (${MARQUEZ_URL})..."
until curl --output /dev/null --silent --head --fail "${MARQUEZ_URL}/ping"; do
sleep 5
done
# When available, print status
log "http API server is ready!"

# (5) Use metadata command to generate random dataset, job, and run metadata
log "generate load test metadata (${METADATA_FILE}):"
java -jar "${MARQUEZ_JAR}" metadata --runs 10 --bytes-per-event 16384 --output "${METADATA_FILE}"

# Display CPU/MEM
cpu_and_mem_info

# (6) Run load test
log "start load test:"
mkdir -p k6/results && \
k6 run --vus 25 --duration 30s api/load-testing/http.js \
--out json=k6/results/full.json --summary-export=k6/results/summary.json

# Display metadata stats
metadata_stats

echo "DONE!"
31 changes: 30 additions & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ only-on-release: &only-on-release
ignore: /.*/

orbs:
# https://circleci.com/orbs/registry/orb/codecov/codecov
codecov: codecov/[email protected]

jobs:
Expand Down Expand Up @@ -148,6 +147,30 @@ jobs:
- run: npm install --prefix=${HOME}/.local --global redoc-cli
- run: redoc-cli bundle spec/openapi.yml

load-test-api:
working_directory: ~/marquez
machine:
image: ubuntu-2004:current
steps:
- checkout
- run: ./.circleci/get-docker-compose.sh
- run: ./.circleci/get-jdk17.sh
- run: ./.circleci/get-k6.sh
- run: ./.circleci/api-load-test.sh
- store_artifacts:
path: marquez
- store_artifacts:
path: k6

migrate-db:
working_directory: ~/marquez
machine:
image: ubuntu-2004:current
steps:
- checkout
- run: ./.circleci/get-docker-compose.sh
- run: ./.circleci/db-migration.sh

release-java:
working_directory: ~/marquez
machine:
Expand Down Expand Up @@ -195,6 +218,12 @@ workflows:
- unit-test-web
- unit-test-client-python
- lint-spec-api
- load-test-api:
requires:
- build-api
- migrate-db:
requires:
- build-api
release:
jobs:
- build-client-python:
Expand Down
89 changes: 89 additions & 0 deletions .circleci/db-migration.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/bin/bash
#
# Copyright 2018-2023 contributors to the Marquez project
# SPDX-License-Identifier: Apache-2.0
#
# A script used in CI to test database migrations by:
# (1) Applying db migrations on latest Marquez release
# (2) Taking a backup of db from Step 1
# (3) Applying db migrations on latest Marquez build using backup
#
# Usage: $ ./db-migration.sh

# Version of PostgreSQL
readonly POSTGRES_VERSION="12.1"
# Version of Marquez
readonly MARQUEZ_VERSION="0.29.0"
# Build version of Marquez
readonly MARQUEZ_BUILD_VERSION="$(git log --pretty=format:'%h' -n 1)" # SHA1

readonly DB_MIGRATION_VOLUME="marquez_db-backup"
readonly DB_MIGRATION_BACKUP="db-migration-backup"
readonly DB_MIGRATION_QUERY=$(cat <<-END
SELECT version,installed_on,checksum
FROM flyway_schema_history
WHERE version IS NOT NULL
ORDER BY installed_on DESC LIMIT 1;
END
)

log() {
echo -e "\033[1m>>\033[0m ${1}"
}

error() {
echo -e "\033[0;31merror: ${1}\033[0m"
}

exit_with_cause() {
log "please view container logs for more details on cause:"
docker-compose logs
exit 1
}

query_db_migration() {
# Start db using backup
[[ $(docker ps -f "name=${DB_MIGRATION_BACKUP}" --format '{{.Names}}') == "${DB_MIGRATION_BACKUP}" ]] || \
docker run -d --name "${DB_MIGRATION_BACKUP}" \
-v "${DB_MIGRATION_VOLUME}:/var/lib/postgresql/data" \
"postgres:${POSTGRES_VERSION}"
# Query applied db migrations
log "latest migration applied to db:"
docker exec "${DB_MIGRATION_BACKUP}" \
psql -U marquez -c "${DB_MIGRATION_QUERY}"
}

# Change working directory to project root
project_root=$(git rev-parse --show-toplevel)
cd "${project_root}/"

# (1) Apply db migrations on latest Marquez release
log "start db with latest migrations (marquez=${MARQUEZ_VERSION}):"
if ! ./docker/up.sh \
--args "--exit-code-from seed_marquez" \
--tag "${MARQUEZ_VERSION}" \
--no-web \
--seed > /dev/null; then
error "failed to start db using backup!"
exit_with_cause
fi

# Query, then display schema migration applied
query_db_migration

# (2) Apply db migrations on latest Marquez build using backup
log "start db using backup (marquez=${MARQUEZ_BUILD_VERSION}):"
if ! ./docker/up.sh \
--args "--exit-code-from seed_marquez" \
--no-web \
--no-volumes \
--build \
--seed > /dev/null; then
error "failed to start db using backup!"
exit_with_cause
fi

# Query, then display additional schema migration applied on backup (if any)
query_db_migration

log "DONE!"
26 changes: 10 additions & 16 deletions .circleci/get-docker-compose.sh
Original file line number Diff line number Diff line change
@@ -1,24 +1,18 @@
#!/bin/bash
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2018-2022 contributors to the Marquez project
# SPDX-License-Identifier: Apache-2.0
#
# Usage: $ ./get-docker-compose.sh

set -e

curl -L https://github.com/docker/compose/releases/download/1.25.3/docker-compose-`uname -s`-`uname -m` > ~/docker-compose
chmod +x ~/docker-compose
sudo mv ~/docker-compose /usr/local/bin/docker-compose
docker-compose --version
# Download docker compose
curl -L https://github.com/docker/compose/releases/download/1.29.2/docker-compose-`uname -s`-`uname -m` > ~/docker-compose

# Change permissions, relocate docker compose, then verify
chmod +x ~/docker-compose && \
sudo mv ~/docker-compose /usr/local/bin/docker-compose && \
docker-compose --version

echo "DONE!"
echo "DONE!"
15 changes: 4 additions & 11 deletions .circleci/get-jdk17.sh
Original file line number Diff line number Diff line change
@@ -1,19 +1,12 @@
#!/bin/bash
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2018-2023 contributors to the Marquez project
# SPDX-License-Identifier: Apache-2.0
#
# Usage: $ ./get-jdk17.sh

set -e

wget -qO - https://adoptium.jfrog.io/adoptium/api/gpg/key/public | sudo apt-key add -
sudo add-apt-repository --yes https://adoptium.jfrog.io/adoptium/deb
sudo apt-get update --allow-releaseinfo-change && sudo apt-get install --yes temurin-17-jdk
Expand Down
21 changes: 21 additions & 0 deletions .circleci/get-k6.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash
#
# Copyright 2018-2023 contributors to the Marquez project
# SPDX-License-Identifier: Apache-2.0
#
# Usage: $ ./get-k6.sh

set -e

# Delete existing key (if present)
sudo apt-key del k6

# Add k6 key and update the repository
sudo gpg -k && \
sudo gpg --no-default-keyring --keyring /usr/share/keyrings/k6-archive-keyring.gpg --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys C5AD17C747E3415A3642D57D77C6C491D6AC1D69
echo "deb [signed-by=/usr/share/keyrings/k6-archive-keyring.gpg] https://dl.k6.io/deb stable main" | sudo tee /etc/apt/sources.list.d/k6.list

# Install k6, then verify
sudo snap install k6 && k6 version

echo "DONE!"
4 changes: 2 additions & 2 deletions .github/workflows/test-chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
fetch-depth: 0

- name: Setup Helm
uses: azure/setup-helm@v3.4
uses: azure/setup-helm@v3.5

- name: Setup Python
uses: actions/setup-python@v4
Expand All @@ -38,7 +38,7 @@ jobs:
run: ct lint --config ct.yaml

- name: Create kind cluster
uses: helm/kind-action@v1.4.0
uses: helm/kind-action@v1.5.0

- name: Run chart-testing (install)
run: ct install --config ct.yaml
17 changes: 13 additions & 4 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,20 @@

### Added

* Add point-in-time requests support to column-lineage endpoints [`#2265`](https://github.com/MarquezProject/marquez/pull/2265) [@pawel-big-lebowski](https://github.com/pawel-big-lebowski)
* Column-lineage endpoints supports point-in-time requests [`#2265`](https://github.com/MarquezProject/marquez/pull/2265) [@pawel-big-lebowski](https://github.com/pawel-big-lebowski)
*Enable requesting `column-lineage` endpoint by a dataset version, job version or dataset field of a specific dataset version.*
* Present column lineage of a dataset [`#2293`](https://github.com/MarquezProject/marquez/pull/2293) [@pawel-big-lebowski](https://github.com/pawel-big-lebowski)
*Column lineage of a dataset with a single level of depth can
be displayed in datase details tab.*
* Add point-in-time requests support to column-lineage endpoints [`#2265`](https://github.com/MarquezProject/marquez/pull/2265) [@pawel-big-lebowski](https://github.com/pawel-big-lebowski)
*Enables requesting `column-lineage` endpoint by a dataset version, job version or dataset field of a specific dataset version.*
* Add column lineage point-in-time Java client methods [`#2269`](https://github.com/MarquezProject/marquez/pull/2269) [@pawel-big-lebowski](https://github.com/pawel-big-lebowski)
* Add column lineage point-in-time Java client methods [`#2269`](https://github.com/MarquezProject/marquez/pull/2269) [@pawel-big-lebowski](https://github.com/pawel-big-lebowski)
*Java client methods to retrieve point-in-time `column-lineage`. Please note that the existing methods `getColumnLineageByDataset`, `getColumnLineageByDataset` and `getColumnLineageByDatasetField` are replaced by a single `getColumnLineage` method taking `NodeId` as a parameter.*
* Add raw event viewer to UI [`#2249`](https://github.com/MarquezProject/marquez/pull/2249) [@tito12](https://github.com/tito12)
*A new events page enables filtering events by date and expanding the payload by clicking on each event.*
* Update events page with styling synchronization [`#2324`](https://github.com/MarquezProject/marquez/pull/2324) [@phixMe](https://github.com/phixMe)
*Makes some updates to the new page to make it conform better to the overall design system.*
* Update helm Ingress template to be cross-compatible with recent k8s versions [`#2275`](https://github.com/MarquezProject/marquez/pull/2275) [@jlukenoff](https://github.com/jlukenoff)
* Update helm Ingress template to be cross-compatible with recent k8s versions [`#2275`](https://github.com/MarquezProject/marquez/pull/2275) [@jlukenoff](https://github.com/jlukenoff)
*Certain components of the Ingress schema have changed in recent versions of Kubernetes. This change updates the Ingress helm template to render based on the semantic Kubernetes version.*
* Add delete namespace endpoint doc to OpenAPI docs [`#2295`](https://github.com/MarquezProject/marquez/pull/2295) [@mobuchowski](https://github.com/mobuchowski)
*Adds a doc about the delete namespace endpoint.*
Expand All @@ -29,7 +34,7 @@
*The column-lineage endpoint was throwing an exception when no data type of the field was provided. Includes a test.*
* Include error message for JSON processing exception [`#2271`](https://github.com/MarquezProject/marquez/pull/2271) [@pawel-big-lebowski](https://github.com/pawel-big-lebowski)
*In case of JSON processing exceptions, the Marquez API now returns an exception message to a client.*
* Fix column lineage when multiple jobs write to same dataset [`#2289`](https://github.com/MarquezProject/marquez/pull/2289) [@pawel-big-lebowski](https://github.com/pawel-big-lebowski)
* Fix column lineage when multiple jobs write to same dataset [`#2289`](https://github.com/MarquezProject/marquez/pull/2289) [@pawel-big-lebowski](https://github.com/pawel-big-lebowski)
*The fix deprecates the way the fields `transformationDescription` and `transformationType` are returned. The deprecated way of returning those fields will be removed in 0.30.0.*
* Use raw link for `iconSearchArrow.svg` [`#2280`](https://github.com/MarquezProject/marquez/pull/2280) [@wslulciuc](https://github.com/wslulciuc)
*Using a direct link to the events viewer icon fixes a loading issue.*
Expand Down Expand Up @@ -889,3 +894,7 @@
## [0.1.0](https://github.com/MarquezProject/marquez/releases/tag/0.1.0) - 2018-12-18

* Marquez initial public release.

----
SPDX-License-Identifier: Apache-2.0
Copyright 2018-2023 contributors to the Marquez project.
Loading

0 comments on commit 190b454

Please sign in to comment.