From d71aa850336cf643acd8b904d588d977740d49ad Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Fri, 16 Aug 2024 12:34:22 +0200 Subject: [PATCH 1/5] Kinesis: Adjust code for lorrystream version 0.0.3. Add tests. On the last occasion, dependencies to lorrystream have not been verified correctly, so it was expectable that things go south. This patch fixes them, and adds rudimentary software tests to avoid tripping into the same situation as before. --- CHANGES.md | 1 + cratedb_toolkit/iac/aws.py | 5 +-- .../io/processor/kinesis_lambda.py | 2 +- pyproject.toml | 2 +- tests/io/test_iac.py | 10 ++++++ tests/io/test_processor.py | 31 +++++++++++++++++++ 6 files changed, 47 insertions(+), 4 deletions(-) create mode 100644 tests/io/test_iac.py create mode 100644 tests/io/test_processor.py diff --git a/CHANGES.md b/CHANGES.md index a9589fa6..763de469 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -15,6 +15,7 @@ - MongoDB: Make `migr8 extract` and `migr8 export` accept the `--limit` option - MongoDB: Fix indentation in prettified SQL output of `migr8 translate` - MongoDB: Add capability to give type hints and add transformations +- Dependencies: Adjust code for lorrystream version 0.0.3 ## 2024/07/25 v0.0.16 - `ctk load table`: Added support for MongoDB Change Streams diff --git a/cratedb_toolkit/iac/aws.py b/cratedb_toolkit/iac/aws.py index 904af12c..1aad2d73 100644 --- a/cratedb_toolkit/iac/aws.py +++ b/cratedb_toolkit/iac/aws.py @@ -1,9 +1,10 @@ +from lorrystream.carabas.aws import DynamoDBKinesisPipe, RDSPostgreSQLDMSKinesisPipe from lorrystream.carabas.aws.function.model import LambdaFactory from lorrystream.carabas.aws.function.oci import LambdaPythonImage -from lorrystream.carabas.aws.stack import DynamoDBKinesisPipe __all__ = [ + "DynamoDBKinesisPipe", "LambdaFactory", "LambdaPythonImage", - "DynamoDBKinesisPipe", + "RDSPostgreSQLDMSKinesisPipe", ] diff --git a/cratedb_toolkit/io/processor/kinesis_lambda.py b/cratedb_toolkit/io/processor/kinesis_lambda.py index c9c60a78..c2219beb 100644 --- a/cratedb_toolkit/io/processor/kinesis_lambda.py +++ b/cratedb_toolkit/io/processor/kinesis_lambda.py @@ -25,7 +25,7 @@ # /// script # requires-python = ">=3.9" # dependencies = [ -# "commons-codec==0.0.3", +# "commons-codec==0.0.4", # "sqlalchemy-cratedb==0.38.0", # ] # /// diff --git a/pyproject.toml b/pyproject.toml index 1771e5f4..85cc9d26 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -151,7 +151,7 @@ io = [ "sqlalchemy>=2", ] kinesis = [ - "lorrystream @ git+https://github.com/daq-tools/lorrystream.git@kinesis", + "lorrystream[carabas]==0.0.3", ] mongodb = [ "commons-codec[mongodb,zyp]==0.0.4", diff --git a/tests/io/test_iac.py b/tests/io/test_iac.py new file mode 100644 index 00000000..8136bfd6 --- /dev/null +++ b/tests/io/test_iac.py @@ -0,0 +1,10 @@ +# ruff: noqa: F401 + + +def test_iac_imports(): + from cratedb_toolkit.iac.aws import ( + DynamoDBKinesisPipe, + LambdaFactory, + LambdaPythonImage, + RDSPostgreSQLDMSKinesisPipe, + ) diff --git a/tests/io/test_processor.py b/tests/io/test_processor.py new file mode 100644 index 00000000..db972ffc --- /dev/null +++ b/tests/io/test_processor.py @@ -0,0 +1,31 @@ +import os +import sys + +import pytest + + +@pytest.fixture +def reset_handler(): + try: + del sys.modules["cratedb_toolkit.io.processor.kinesis_lambda"] + except KeyError: + pass + + +def test_processor_invoke_no_records(reset_handler, mocker, caplog): + """ + Roughly verify that the unified Lambda handler works. + """ + + # Configure environment variables. + handler_environment = { + "MESSAGE_FORMAT": "dms", + } + mocker.patch.dict(os.environ, handler_environment) + + from cratedb_toolkit.io.processor.kinesis_lambda import handler + + event = {"Records": []} + handler(event, None) + + assert "Successfully processed 0 records" in caplog.messages From 47c01368626a0e755c7819c5ee6b9b2e760e533a Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Fri, 16 Aug 2024 12:48:31 +0200 Subject: [PATCH 2/5] Kinesis: Add CI configuration for software tests --- .github/workflows/main.yml | 65 +++++++++++++++++++ .../io/processor/kinesis_lambda.py | 2 +- pyproject.toml | 3 +- tests/io/test_iac.py | 5 ++ tests/io/test_processor.py | 4 ++ 5 files changed, 77 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 620f512c..4a56c923 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -155,3 +155,68 @@ jobs: env_vars: OS,PYTHON name: codecov-umbrella fail_ci_if_error: true + + + tests-kinesis: + + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: ["ubuntu-latest"] + python-version: ["3.9", "3.12"] + + env: + OS: ${{ matrix.os }} + PYTHON: ${{ matrix.python-version }} + # Do not tear down Testcontainers + TC_KEEPALIVE: true + + # https://docs.github.com/en/actions/using-containerized-services/about-service-containers + services: + cratedb: + image: crate/crate:nightly + ports: + - 4200:4200 + - 5432:5432 + + name: " + Kinesis: + Python ${{ matrix.python-version }} on OS ${{ matrix.os }}" + steps: + + - name: Acquire sources + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + cache: 'pip' + cache-dependency-path: 'pyproject.toml' + + - name: Set up project + run: | + + # `setuptools 0.64.0` adds support for editable install hooks (PEP 660). + # https://github.com/pypa/setuptools/blob/main/CHANGES.rst#v6400 + pip install "setuptools>=64" --upgrade + + # Install package in editable mode. + pip install --use-pep517 --prefer-binary --editable=.[kinesis,test,develop] + + - name: Run linter and software tests + run: | + pytest -m kinesis + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + with: + files: ./coverage.xml + flags: kinesis + env_vars: OS,PYTHON + name: codecov-umbrella + fail_ci_if_error: true diff --git a/cratedb_toolkit/io/processor/kinesis_lambda.py b/cratedb_toolkit/io/processor/kinesis_lambda.py index c2219beb..c9c60a78 100644 --- a/cratedb_toolkit/io/processor/kinesis_lambda.py +++ b/cratedb_toolkit/io/processor/kinesis_lambda.py @@ -25,7 +25,7 @@ # /// script # requires-python = ">=3.9" # dependencies = [ -# "commons-codec==0.0.4", +# "commons-codec==0.0.3", # "sqlalchemy-cratedb==0.38.0", # ] # /// diff --git a/pyproject.toml b/pyproject.toml index 85cc9d26..63e61eff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -107,7 +107,7 @@ dependencies = [ ] [project.optional-dependencies] all = [ - "cratedb-toolkit[full,influxdb,mongodb]", + "cratedb-toolkit[full,influxdb,kinesis,mongodb]", ] cfr = [ "pandas<2.2", @@ -255,6 +255,7 @@ xfail_strict = true markers = [ "examples", "influxdb", + "kinesis", "mongodb", "pymongo", "slow", diff --git a/tests/io/test_iac.py b/tests/io/test_iac.py index 8136bfd6..ef49b088 100644 --- a/tests/io/test_iac.py +++ b/tests/io/test_iac.py @@ -1,4 +1,9 @@ # ruff: noqa: F401 +import pytest + +pytestmark = pytest.mark.kinesis + +pytest.importorskip("lorrystream", reason="Only works with LorryStream installed") def test_iac_imports(): diff --git a/tests/io/test_processor.py b/tests/io/test_processor.py index db972ffc..49e5bc37 100644 --- a/tests/io/test_processor.py +++ b/tests/io/test_processor.py @@ -3,6 +3,10 @@ import pytest +pytestmark = pytest.mark.kinesis + +pytest.importorskip("commons_codec", reason="Only works with commons-codec installed") + @pytest.fixture def reset_handler(): From 48daeaa09f7b165df99b9d1dfc970d399665d06e Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Fri, 16 Aug 2024 15:28:20 +0200 Subject: [PATCH 3/5] Dependencies: Update to lorrystream 0.0.4 and commons-codec 0.0.6 --- CHANGES.md | 1 + cratedb_toolkit/io/processor/kinesis_lambda.py | 2 +- pyproject.toml | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 763de469..a8c93867 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -16,6 +16,7 @@ - MongoDB: Fix indentation in prettified SQL output of `migr8 translate` - MongoDB: Add capability to give type hints and add transformations - Dependencies: Adjust code for lorrystream version 0.0.3 +- Dependencies: Update to lorrystream 0.0.4 and commons-codec 0.0.6 ## 2024/07/25 v0.0.16 - `ctk load table`: Added support for MongoDB Change Streams diff --git a/cratedb_toolkit/io/processor/kinesis_lambda.py b/cratedb_toolkit/io/processor/kinesis_lambda.py index c9c60a78..0f37851c 100644 --- a/cratedb_toolkit/io/processor/kinesis_lambda.py +++ b/cratedb_toolkit/io/processor/kinesis_lambda.py @@ -25,7 +25,7 @@ # /// script # requires-python = ">=3.9" # dependencies = [ -# "commons-codec==0.0.3", +# "commons-codec==0.0.6", # "sqlalchemy-cratedb==0.38.0", # ] # /// diff --git a/pyproject.toml b/pyproject.toml index 63e61eff..75d62969 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -151,10 +151,10 @@ io = [ "sqlalchemy>=2", ] kinesis = [ - "lorrystream[carabas]==0.0.3", + "lorrystream[carabas]==0.0.4", ] mongodb = [ - "commons-codec[mongodb,zyp]==0.0.4", + "commons-codec[mongodb,zyp]==0.0.6", "cratedb-toolkit[io]", "orjson<4,>=3.3.1", "pymongo<5,>=3.10.1", From 329a731cd63d81c54f54cd31a80a6452542d7e97 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Fri, 16 Aug 2024 16:42:43 +0200 Subject: [PATCH 4/5] Dependencies: Don't include `kinesis` extra within `all` extra It provides some woes around the `multiprocess` package, becoming apparent when building the OCI image. So, let's just not include it into the standard OCI release package for now. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 75d62969..db8dccc6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -107,7 +107,7 @@ dependencies = [ ] [project.optional-dependencies] all = [ - "cratedb-toolkit[full,influxdb,kinesis,mongodb]", + "cratedb-toolkit[full,influxdb,mongodb]", ] cfr = [ "pandas<2.2", From f12d5bc04fd8566ed69d7b683c2db5469783c74a Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Fri, 16 Aug 2024 17:03:58 +0200 Subject: [PATCH 5/5] Chore: Update Dockerfile with new syntax about ENV directives The admonition raised by GHA check warnings is: > Legacy key/value format with whitespace separator should not be used > > LegacyKeyValueFormat: "ENV key=value" should be used instead of legacy > "ENV key value" format > More info: https://docs.docker.com/go/dockerfile/rule/legacy-key-value-format/ --- release/oci/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/release/oci/Dockerfile b/release/oci/Dockerfile index e56e8b1f..38aedada 100644 --- a/release/oci/Dockerfile +++ b/release/oci/Dockerfile @@ -6,8 +6,8 @@ FROM python:3.11-slim-bookworm -ENV DEBIAN_FRONTEND noninteractive -ENV TERM linux +ENV DEBIAN_FRONTEND=noninteractive +ENV TERM=linux # Install Git, it is needed for `versioningit`. RUN rm -f /etc/apt/apt.conf.d/docker-clean; echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache