refactor(flink): port to sqlglot #18998
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Backends | |
on: | |
push: | |
# Skip the backend suite if all changes are docs | |
paths-ignore: | |
- "docs/**" | |
- "**/*.md" | |
- "**/*.qmd" | |
- "codecov.yml" | |
- ".envrc" | |
- ".codespellrc" | |
branches: | |
- main | |
- "*.x.x" | |
- the-epic-split | |
pull_request: | |
# Skip the backend suite if all changes are docs | |
paths-ignore: | |
- "docs/**" | |
- "**/*.md" | |
- "**/*.qmd" | |
- "codecov.yml" | |
- ".envrc" | |
- ".codespellrc" | |
branches: | |
- main | |
- "*.x.x" | |
- the-epic-split | |
merge_group: | |
permissions: | |
# this allows extractions/setup-just to list releases for `just` at a higher | |
# rate limit while restricting GITHUB_TOKEN permissions elsewhere | |
contents: read | |
concurrency: | |
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} | |
cancel-in-progress: true | |
env: | |
FORCE_COLOR: "1" | |
ODBCSYSINI: "${{ github.workspace }}/ci/odbc" | |
HYPOTHESIS_PROFILE: "ci" | |
jobs: | |
test_backends: | |
name: ${{ matrix.backend.title }} ${{ matrix.os }} python-${{ matrix.python-version }} | |
runs-on: ${{ matrix.os }} | |
strategy: | |
fail-fast: false | |
matrix: | |
os: | |
- ubuntu-latest | |
- windows-latest | |
python-version: | |
- "3.9" | |
- "3.11" | |
backend: | |
- name: duckdb | |
title: DuckDB | |
extras: | |
- duckdb | |
- deltalake | |
- geospatial | |
- examples | |
- decompiler | |
additional_deps: | |
- torch | |
- name: clickhouse | |
title: ClickHouse | |
services: | |
- clickhouse | |
extras: | |
- clickhouse | |
- examples | |
- name: dask | |
title: Dask | |
extras: | |
- dask | |
- name: pandas | |
title: Pandas | |
extras: | |
- pandas | |
- name: sqlite | |
title: SQLite | |
extras: | |
- sqlite | |
- name: datafusion | |
title: Datafusion | |
extras: | |
- datafusion | |
- name: polars | |
title: Polars | |
extras: | |
- polars | |
- deltalake | |
- name: mysql | |
title: MySQL | |
services: | |
- mysql | |
extras: | |
- mysql | |
- geospatial | |
sys-deps: | |
- libgeos-dev | |
- name: postgres | |
title: PostgreSQL | |
extras: | |
- postgres | |
- geospatial | |
services: | |
- postgres | |
sys-deps: | |
- libgeos-dev | |
- name: postgres | |
title: PostgreSQL + Torch | |
extras: | |
- postgres | |
- geospatial | |
additional_deps: | |
- torch | |
services: | |
- postgres | |
sys-deps: | |
- libgeos-dev | |
- name: risingwave | |
title: Risingwave | |
services: | |
- risingwave | |
extras: | |
- risingwave | |
- name: impala | |
title: Impala | |
serial: true | |
extras: | |
- impala | |
services: | |
- impala | |
- kudu | |
sys-deps: | |
- cmake | |
- ninja-build | |
- name: mssql | |
title: MS SQL Server | |
extras: | |
- mssql | |
services: | |
- mssql | |
sys-deps: | |
- freetds-dev | |
- unixodbc-dev | |
- tdsodbc | |
- name: trino | |
title: Trino | |
extras: | |
- trino | |
services: | |
- trino | |
- name: druid | |
title: Druid | |
extras: | |
- druid | |
services: | |
- druid | |
- name: exasol | |
title: Exasol | |
serial: true | |
extras: | |
- exasol | |
services: | |
- exasol | |
- name: oracle | |
title: Oracle | |
serial: true | |
extras: | |
- oracle | |
services: | |
- oracle | |
- name: flink | |
title: Flink | |
extras: | |
- flink | |
additional_deps: | |
- apache-flink | |
services: | |
- flink | |
include: | |
- os: ubuntu-latest | |
python-version: "3.10" | |
backend: | |
name: flink | |
title: Flink | |
extras: | |
- flink | |
additional_deps: | |
- apache-flink | |
services: | |
- flink | |
exclude: | |
- os: windows-latest | |
backend: | |
name: mysql | |
title: MySQL | |
extras: | |
- mysql | |
- geospatial | |
services: | |
- mysql | |
sys-deps: | |
- libgeos-dev | |
- os: windows-latest | |
backend: | |
name: clickhouse | |
title: ClickHouse | |
extras: | |
- clickhouse | |
- examples | |
services: | |
- clickhouse | |
- os: windows-latest | |
backend: | |
name: postgres | |
title: PostgreSQL | |
extras: | |
- postgres | |
- geospatial | |
services: | |
- postgres | |
sys-deps: | |
- libgeos-dev | |
- os: windows-latest | |
backend: | |
name: risingwave | |
title: Risingwave | |
services: | |
- risingwave | |
extras: | |
- risingwave | |
- os: windows-latest | |
backend: | |
name: postgres | |
title: PostgreSQL + Torch | |
extras: | |
- postgres | |
- geospatial | |
additional_deps: | |
- torch | |
services: | |
- postgres | |
sys-deps: | |
- libgeos-dev | |
- os: windows-latest | |
backend: | |
name: impala | |
title: Impala | |
serial: true | |
extras: | |
- impala | |
services: | |
- impala | |
- kudu | |
sys-deps: | |
- cmake | |
- ninja-build | |
- os: windows-latest | |
backend: | |
name: mssql | |
title: MS SQL Server | |
extras: | |
- mssql | |
services: | |
- mssql | |
sys-deps: | |
- freetds-dev | |
- unixodbc-dev | |
- tdsodbc | |
- os: windows-latest | |
backend: | |
name: trino | |
title: Trino | |
services: | |
- trino | |
extras: | |
- trino | |
- os: windows-latest | |
backend: | |
name: druid | |
title: Druid | |
extras: | |
- druid | |
services: | |
- druid | |
- os: windows-latest | |
backend: | |
name: oracle | |
title: Oracle | |
serial: true | |
extras: | |
- oracle | |
services: | |
- oracle | |
- os: windows-latest | |
backend: | |
name: flink | |
title: Flink | |
extras: | |
- flink | |
additional_deps: | |
- apache-flink | |
services: | |
- flink | |
- os: ubuntu-latest | |
python-version: "3.11" | |
backend: | |
name: flink | |
title: Flink | |
extras: | |
- flink | |
additional_deps: | |
- apache-flink | |
services: | |
- flink | |
- os: windows-latest | |
backend: | |
name: exasol | |
title: Exasol | |
serial: true | |
extras: | |
- exasol | |
services: | |
- exasol | |
steps: | |
- name: update and install system dependencies | |
if: matrix.os == 'ubuntu-latest' && matrix.backend.sys-deps != null | |
run: | | |
set -euo pipefail | |
sudo apt-get update -qq -y | |
sudo apt-get install -qq -y build-essential ${{ join(matrix.backend.sys-deps, ' ') }} | |
- name: install sqlite | |
if: matrix.os == 'windows-latest' && matrix.backend.name == 'sqlite' | |
run: choco install sqlite | |
- name: checkout | |
uses: actions/checkout@v4 | |
- uses: extractions/setup-just@v1 | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
- name: download backend data | |
run: just download-data | |
- name: start services | |
if: matrix.backend.services != null | |
run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }} | |
- name: install poetry | |
run: pipx install 'poetry==1.7.1' | |
- name: install python | |
uses: actions/setup-python@v5 | |
id: install_python | |
with: | |
python-version: ${{ matrix.python-version }} | |
cache: poetry | |
- name: install ibis | |
run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}" | |
- name: install deps for broken avro-python setup | |
if: matrix.backend.name == 'flink' | |
run: poetry run pip install wheel | |
- name: install other deps | |
if: matrix.backend.additional_deps != null | |
run: poetry run pip install ${{ join(matrix.backend.additional_deps, ' ') }} | |
- name: show installed deps | |
run: poetry run pip list | |
- name: "run parallel tests: ${{ matrix.backend.name }}" | |
if: ${{ !matrix.backend.serial }} | |
run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup | |
env: | |
IBIS_TEST_IMPALA_HOST: localhost | |
IBIS_TEST_IMPALA_PORT: 21050 | |
IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }} | |
- name: "run serial tests: ${{ matrix.backend.name }}" | |
if: matrix.backend.serial | |
run: just ci-check -m ${{ matrix.backend.name }} | |
env: | |
FLINK_REMOTE_CLUSTER_ADDR: localhost | |
FLINK_REMOTE_CLUSTER_PORT: "8081" | |
IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }} | |
- name: check that no untracked files were produced | |
shell: bash | |
run: | | |
! git status --porcelain | tee /dev/stderr | grep . | |
- name: upload code coverage | |
if: success() | |
continue-on-error: true | |
uses: codecov/codecov-action@v4 | |
with: | |
flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} | |
- name: Show docker compose logs on fail | |
if: matrix.backend.services != null && failure() | |
run: docker compose logs | |
test_backends_min_version: | |
name: ${{ matrix.backend.title }} Min Version ${{ matrix.os }} python-${{ matrix.python-version }} | |
runs-on: ${{ matrix.os }} | |
strategy: | |
fail-fast: false | |
matrix: | |
os: | |
- ubuntu-latest | |
- windows-latest | |
python-version: | |
- "3.9" | |
- "3.11" | |
backend: | |
- name: dask | |
title: Dask | |
deps: | |
- "dask[array,dataframe]@2022.9.1" | |
- "[email protected]" | |
extras: | |
- dask | |
- name: postgres | |
title: PostgreSQL | |
deps: | |
- "[email protected]" | |
- "[email protected]" | |
- "Shapely@2" | |
services: | |
- postgres | |
extras: | |
- postgres | |
- geospatial | |
exclude: | |
- os: windows-latest | |
backend: | |
name: postgres | |
title: PostgreSQL | |
deps: | |
- "[email protected]" | |
- "[email protected]" | |
- "Shapely@2" | |
services: | |
- postgres | |
extras: | |
- postgres | |
- geospatial | |
- python-version: "3.11" | |
backend: | |
name: postgres | |
title: PostgreSQL | |
deps: | |
- "[email protected]" | |
- "[email protected]" | |
- "Shapely@2" | |
services: | |
- postgres | |
extras: | |
- postgres | |
- geospatial | |
steps: | |
- name: checkout | |
uses: actions/checkout@v4 | |
- name: install libgeos for shapely | |
if: matrix.backend.name == 'postgres' | |
run: | | |
sudo apt-get update -y -qq | |
sudo apt-get install -qq -y build-essential libgeos-dev | |
- uses: extractions/setup-just@v1 | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
- name: download backend data | |
run: just download-data | |
- name: start services | |
if: matrix.backend.services != null | |
run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }} | |
- name: install python | |
uses: actions/setup-python@v5 | |
id: install_python | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: install poetry | |
run: python -m pip install --upgrade pip 'poetry==1.7.1' | |
- name: remove lonboard | |
# it requires a version of pandas that min versions are not compatible with | |
run: poetry remove lonboard | |
- name: install minimum versions | |
run: poetry add --lock --optional ${{ join(matrix.backend.deps, ' ') }} | |
- name: checkout the lock file | |
run: git checkout poetry.lock | |
- name: lock with no updates | |
# poetry add is aggressive and will update other dependencies like | |
# numpy and pandas so we keep the pyproject.toml edits and then relock | |
# without updating anything except the requested versions | |
run: poetry lock --no-update | |
- name: install ibis | |
run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}" | |
- name: run tests | |
run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup | |
- name: check that no untracked files were produced | |
shell: bash | |
run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep . | |
- name: upload code coverage | |
if: success() | |
continue-on-error: true | |
uses: codecov/codecov-action@v4 | |
with: | |
flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} | |
- name: Show docker compose logs on fail | |
if: matrix.backend.services != null && failure() | |
run: docker compose logs | |
test_pyspark: | |
name: PySpark ${{ matrix.pyspark-version }} ubuntu-latest python-${{ matrix.python-version }} | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false | |
matrix: | |
include: | |
- name: pyspark-3.3 | |
python-version: "3.9" | |
pyspark-version: 3.3 | |
deps: | |
- "[email protected]" | |
- "'pandas@<2'" | |
- "'numpy@<1.24'" | |
- name: pyspark-3.5 | |
python-version: "3.11" | |
pyspark-version: 3.5 | |
deps: | |
- "[email protected]" | |
- "'pandas@>2'" | |
- "'numpy@>1.24'" | |
steps: | |
- name: checkout | |
uses: actions/checkout@v4 | |
- uses: actions/setup-java@v4 | |
with: | |
distribution: microsoft | |
java-version: 17 | |
- uses: extractions/setup-just@v1 | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
- name: download backend data | |
run: just download-data | |
- name: install python | |
uses: actions/setup-python@v5 | |
id: install_python | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: install poetry | |
run: python -m pip install --upgrade pip 'poetry==1.7.1' | |
- name: remove lonboard | |
# it requires a version of pandas that pyspark is not compatible with | |
run: poetry remove lonboard | |
- name: install exact versions of pyspark, pandas and numpy | |
run: poetry add --lock ${{ join(matrix.deps, ' ') }} | |
- name: checkout the lock file | |
run: git checkout poetry.lock | |
- name: lock with no updates | |
# poetry add is aggressive and will update other dependencies like | |
# numpy and pandas so we keep the pyproject.toml edits and then relock | |
# without updating anything except the requested versions | |
run: poetry lock --no-update | |
- name: install ibis | |
run: poetry install --without dev --without docs --extras pyspark | |
- name: run tests | |
run: just ci-check -m pyspark | |
- name: check that no untracked files were produced | |
shell: bash | |
run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep . | |
- name: upload code coverage | |
# only upload coverage for jobs that aren't mostly xfails | |
if: success() && matrix.python-version != '3.11' | |
continue-on-error: true | |
uses: codecov/codecov-action@v4 | |
with: | |
flags: backend,pyspark,${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} | |
backends: | |
# this job exists so that we can use a single job from this workflow to gate merging | |
runs-on: ubuntu-latest | |
needs: | |
# - test_backends_min_version | |
- test_backends | |
- test_pyspark | |
steps: | |
- run: exit 0 |