Skip to content

fix(backends): make string concat-with-null behavior consistent across backends #18991

fix(backends): make string concat-with-null behavior consistent across backends

fix(backends): make string concat-with-null behavior consistent across backends #18991

Workflow file for this run

name: Backends
on:
push:
# Skip the backend suite if all changes are docs
paths-ignore:
- "docs/**"
- "**/*.md"
- "**/*.qmd"
- "codecov.yml"
- ".envrc"
- ".codespellrc"
branches:
- main
- "*.x.x"
- the-epic-split
pull_request:
# Skip the backend suite if all changes are docs
paths-ignore:
- "docs/**"
- "**/*.md"
- "**/*.qmd"
- "codecov.yml"
- ".envrc"
- ".codespellrc"
branches:
- main
- "*.x.x"
- the-epic-split
merge_group:
permissions:
# this allows extractions/setup-just to list releases for `just` at a higher
# rate limit while restricting GITHUB_TOKEN permissions elsewhere
contents: read
concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true
env:
FORCE_COLOR: "1"
ODBCSYSINI: "${{ github.workspace }}/ci/odbc"
HYPOTHESIS_PROFILE: "ci"
jobs:
test_backends:
name: ${{ matrix.backend.title }} ${{ matrix.os }} python-${{ matrix.python-version }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
- windows-latest
python-version:
- "3.9"
- "3.11"
backend:
- name: duckdb
title: DuckDB
extras:
- duckdb
- deltalake
- geospatial
- examples
- decompiler
additional_deps:
- torch
- name: clickhouse
title: ClickHouse
services:
- clickhouse
extras:
- clickhouse
- examples
- name: dask
title: Dask
extras:
- dask
- name: pandas
title: Pandas
extras:
- pandas
- name: sqlite
title: SQLite
extras:
- sqlite
- name: datafusion
title: Datafusion
extras:
- datafusion
- name: polars
title: Polars
extras:
- polars
- deltalake
- name: mysql
title: MySQL
services:
- mysql
extras:
- mysql
- geospatial
sys-deps:
- libgeos-dev
- name: postgres
title: PostgreSQL
extras:
- postgres
- geospatial
services:
- postgres
sys-deps:
- libgeos-dev
- name: postgres
title: PostgreSQL + Torch
extras:
- postgres
- geospatial
additional_deps:
- torch
services:
- postgres
sys-deps:
- libgeos-dev
- name: risingwave
title: Risingwave
services:
- risingwave
extras:
- risingwave
- name: impala
title: Impala
serial: true
extras:
- impala
services:
- impala
- kudu
sys-deps:
- cmake
- ninja-build
- name: mssql
title: MS SQL Server
extras:
- mssql
services:
- mssql
sys-deps:
- freetds-dev
- unixodbc-dev
- tdsodbc
- name: trino
title: Trino
extras:
- trino
services:
- trino
- name: druid
title: Druid
extras:
- druid
services:
- druid
- name: exasol
title: Exasol
serial: true
extras:
- exasol
services:
- exasol
- name: oracle
title: Oracle
serial: true
extras:
- oracle
services:
- oracle
# - name: flink
# title: Flink
# serial: true
# extras:
# - flink
# additional_deps:
# - apache-flink
# - pytest-split
# services:
# - flink
# - name: risingwave
# title: Risingwave
# services:
# - risingwave
# extras:
# - risingwave
exclude:
- os: windows-latest
backend:
name: mysql
title: MySQL
extras:
- mysql
- geospatial
services:
- mysql
sys-deps:
- libgeos-dev
- os: windows-latest
backend:
name: clickhouse
title: ClickHouse
extras:
- clickhouse
- examples
services:
- clickhouse
- os: windows-latest
backend:
name: postgres
title: PostgreSQL
extras:
- postgres
- geospatial
services:
- postgres
sys-deps:
- libgeos-dev
- os: windows-latest
backend:
name: risingwave
title: Risingwave
services:
- risingwave
extras:
- risingwave
- os: windows-latest
backend:
name: postgres
title: PostgreSQL + Torch
extras:
- postgres
- geospatial
additional_deps:
- torch
services:
- postgres
sys-deps:
- libgeos-dev
- os: windows-latest
backend:
name: impala
title: Impala
serial: true
extras:
- impala
services:
- impala
- kudu
sys-deps:
- cmake
- ninja-build
- os: windows-latest
backend:
name: mssql
title: MS SQL Server
extras:
- mssql
services:
- mssql
sys-deps:
- freetds-dev
- unixodbc-dev
- tdsodbc
- os: windows-latest
backend:
name: trino
title: Trino
services:
- trino
extras:
- trino
- os: windows-latest
backend:
name: druid
title: Druid
extras:
- druid
services:
- druid
- os: windows-latest
backend:
name: oracle
title: Oracle
serial: true
extras:
- oracle
services:
- oracle
# - os: windows-latest
# backend:
# name: flink
# title: Flink
# serial: true
# extras:
# - flink
# services:
# - flink
# - python-version: "3.11"
# backend:
# name: flink
# title: Flink
# serial: true
# extras:
# - flink
# services:
# - flink
# - os: windows-latest
# backend:
# name: risingwave
# title: Risingwave
# services:
# - risingwave
# extras:
# - risingwave
- os: windows-latest
backend:
name: exasol
title: Exasol
serial: true
extras:
- exasol
services:
- exasol
steps:
- name: update and install system dependencies
if: matrix.os == 'ubuntu-latest' && matrix.backend.sys-deps != null
run: |
set -euo pipefail
sudo apt-get update -qq -y
sudo apt-get install -qq -y build-essential ${{ join(matrix.backend.sys-deps, ' ') }}
- name: install sqlite
if: matrix.os == 'windows-latest' && matrix.backend.name == 'sqlite'
run: choco install sqlite
- name: checkout
uses: actions/checkout@v4
- uses: extractions/setup-just@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: download backend data
run: just download-data
- name: start services
if: matrix.backend.services != null
run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }}
- name: install poetry
run: pipx install 'poetry==1.7.1'
- name: install python
uses: actions/setup-python@v5
id: install_python
with:
python-version: ${{ matrix.python-version }}
cache: poetry
- name: install ibis
run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}"
- name: install deps for broken avro-python setup
if: matrix.backend.name == 'flink'
run: poetry run pip install wheel
- name: install other deps
if: matrix.backend.additional_deps != null
run: poetry run pip install ${{ join(matrix.backend.additional_deps, ' ') }}
- name: show installed deps
run: poetry run pip list
- name: "run parallel tests: ${{ matrix.backend.name }}"
if: ${{ !matrix.backend.serial }}
run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup
env:
IBIS_TEST_IMPALA_HOST: localhost
IBIS_TEST_IMPALA_PORT: 21050
IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }}
# FIXME(deepyaman): If some backend-specific test, in test_ddl.py,
# executes before common tests, they will fail with:
# org.apache.flink.table.api.ValidationException: Table `default_catalog`.`default_database`.`functional_alltypes` was not found.
# Therefore, we run backend-specific tests second to avoid this.
# - name: "run serial tests: ${{ matrix.backend.name }}"
# if: matrix.backend.serial && matrix.backend.name == 'flink'
# run: |
# just ci-check -m ${{ matrix.backend.name }} ibis/backends/tests
# just ci-check -m ${{ matrix.backend.name }} ibis/backends/flink/tests
# env:
# IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }}
# FLINK_REMOTE_CLUSTER_ADDR: localhost
# FLINK_REMOTE_CLUSTER_PORT: "8081"
#
- name: "run serial tests: ${{ matrix.backend.name }}"
if: matrix.backend.serial # && matrix.backend.name != 'flink'
run: just ci-check -m ${{ matrix.backend.name }}
env:
IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }}
- name: check that no untracked files were produced
shell: bash
run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep .
- name: upload code coverage
if: success()
continue-on-error: true
uses: codecov/codecov-action@v4
with:
flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }}
- name: Show docker compose logs on fail
if: matrix.backend.services != null && failure()
run: docker compose logs
test_backends_min_version:
name: ${{ matrix.backend.title }} Min Version ${{ matrix.os }} python-${{ matrix.python-version }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
- windows-latest
python-version:
- "3.9"
- "3.11"
backend:
- name: dask
title: Dask
deps:
- "dask[array,dataframe]@2022.9.1"
- "[email protected]"
extras:
- dask
- name: postgres
title: PostgreSQL
deps:
- "[email protected]"
- "[email protected]"
- "Shapely@2"
services:
- postgres
extras:
- postgres
- geospatial
exclude:
- os: windows-latest
backend:
name: postgres
title: PostgreSQL
deps:
- "[email protected]"
- "[email protected]"
- "Shapely@2"
services:
- postgres
extras:
- postgres
- geospatial
- python-version: "3.11"
backend:
name: postgres
title: PostgreSQL
deps:
- "[email protected]"
- "[email protected]"
- "Shapely@2"
services:
- postgres
extras:
- postgres
- geospatial
steps:
- name: checkout
uses: actions/checkout@v4
- name: install libgeos for shapely
if: matrix.backend.name == 'postgres'
run: |
sudo apt-get update -y -qq
sudo apt-get install -qq -y build-essential libgeos-dev
- uses: extractions/setup-just@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: download backend data
run: just download-data
- name: start services
if: matrix.backend.services != null
run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }}
- name: install python
uses: actions/setup-python@v5
id: install_python
with:
python-version: ${{ matrix.python-version }}
- name: install poetry
run: python -m pip install --upgrade pip 'poetry==1.7.1'
- name: remove lonboard
# it requires a version of pandas that min versions are not compatible with
run: poetry remove lonboard
- name: install minimum versions
run: poetry add --lock --optional ${{ join(matrix.backend.deps, ' ') }}
- name: checkout the lock file
run: git checkout poetry.lock
- name: lock with no updates
# poetry add is aggressive and will update other dependencies like
# numpy and pandas so we keep the pyproject.toml edits and then relock
# without updating anything except the requested versions
run: poetry lock --no-update
- name: install ibis
run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}"
- name: run tests
run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup
- name: check that no untracked files were produced
shell: bash
run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep .
- name: upload code coverage
if: success()
continue-on-error: true
uses: codecov/codecov-action@v4
with:
flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }}
- name: Show docker compose logs on fail
if: matrix.backend.services != null && failure()
run: docker compose logs
test_pyspark:
name: PySpark ${{ matrix.pyspark-version }} ubuntu-latest python-${{ matrix.python-version }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- name: pyspark-3.3
python-version: "3.9"
pyspark-version: 3.3
deps:
- "[email protected]"
- "'pandas@<2'"
- "'numpy@<1.24'"
- name: pyspark-3.5
python-version: "3.11"
pyspark-version: 3.5
deps:
- "[email protected]"
- "'pandas@>2'"
- "'numpy@>1.24'"
steps:
- name: checkout
uses: actions/checkout@v4
- uses: actions/setup-java@v4
with:
distribution: microsoft
java-version: 17
- uses: extractions/setup-just@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: download backend data
run: just download-data
- name: install python
uses: actions/setup-python@v5
id: install_python
with:
python-version: ${{ matrix.python-version }}
- name: install poetry
run: python -m pip install --upgrade pip 'poetry==1.7.1'
- name: remove lonboard
# it requires a version of pandas that pyspark is not compatible with
run: poetry remove lonboard
- name: install exact versions of pyspark, pandas and numpy
run: poetry add --lock ${{ join(matrix.deps, ' ') }}
- name: checkout the lock file
run: git checkout poetry.lock
- name: lock with no updates
# poetry add is aggressive and will update other dependencies like
# numpy and pandas so we keep the pyproject.toml edits and then relock
# without updating anything except the requested versions
run: poetry lock --no-update
- name: install ibis
run: poetry install --without dev --without docs --extras pyspark
- name: run tests
run: just ci-check -m pyspark
- name: check that no untracked files were produced
shell: bash
run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep .
- name: upload code coverage
# only upload coverage for jobs that aren't mostly xfails
if: success() && matrix.python-version != '3.11'
continue-on-error: true
uses: codecov/codecov-action@v4
with:
flags: backend,pyspark,${{ runner.os }},python-${{ steps.install_python.outputs.python-version }}
backends:
# this job exists so that we can use a single job from this workflow to gate merging
runs-on: ubuntu-latest
needs:
# - test_backends_min_version
- test_backends
- test_pyspark
steps:
- run: exit 0