diff --git a/.github/renovate.json b/.github/renovate.json index 3a3dbe6073de..a2e3a325fad7 100644 --- a/.github/renovate.json +++ b/.github/renovate.json @@ -46,7 +46,7 @@ "addLabels": ["bigquery"] }, { - "matchPackagePatterns": ["duckdb", "duckdb-engine"], + "matchPackagePatterns": ["duckdb"], "addLabels": ["duckdb"] }, { diff --git a/.github/workflows/ibis-backends-skip-helper.yml b/.github/workflows/ibis-backends-skip-helper.yml index 56969fc7c2fb..1432f14864d4 100644 --- a/.github/workflows/ibis-backends-skip-helper.yml +++ b/.github/workflows/ibis-backends-skip-helper.yml @@ -13,7 +13,7 @@ on: branches: - master - "*.x.x" - - "!the-epic-split" + - the-epic-split pull_request: paths: - "docs/**" @@ -24,7 +24,7 @@ on: branches: - master - "*.x.x" - - "!the-epic-split" + - the-epic-split merge_group: jobs: test_backends: diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index 0f60b5289a0c..b5308e713fc9 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -12,7 +12,7 @@ on: branches: - master - "*.x.x" - - "!the-epic-split" + - the-epic-split pull_request: # Skip the backend suite if all changes are docs paths-ignore: @@ -24,7 +24,7 @@ on: branches: - master - "*.x.x" - - "!the-epic-split" + - the-epic-split merge_group: permissions: @@ -55,139 +55,140 @@ jobs: - "3.9" - "3.11" backend: - - name: dask - title: Dask - extras: - - dask - name: duckdb title: DuckDB extras: - duckdb - deltalake - geospatial + - decompiler additional_deps: - torch - - name: pandas - title: Pandas - extras: - - pandas - - name: sqlite - title: SQLite - extras: - - sqlite - - name: datafusion - title: Datafusion - extras: - - datafusion - - name: polars - title: Polars - extras: - - polars - - deltalake - - name: mysql - title: MySQL - services: - - mysql - extras: - - mysql - - geospatial - sys-deps: - - libgeos-dev - name: clickhouse title: ClickHouse services: - clickhouse extras: - clickhouse - - name: postgres - title: PostgreSQL - extras: - - postgres - - geospatial - services: - - postgres - sys-deps: - - libgeos-dev - - name: postgres - title: PostgreSQL + Torch - extras: - - postgres - - geospatial - additional_deps: - - torch - services: - - postgres - sys-deps: - - libgeos-dev - - name: impala - title: Impala - extras: - - impala - services: - - impala - - kudu - sys-deps: - - cmake - - ninja-build - - name: mssql - title: MS SQL Server - extras: - - mssql - services: - - mssql - sys-deps: - - freetds-dev - - unixodbc-dev - - tdsodbc - - name: trino - title: Trino - extras: - - trino - - postgres - services: - - trino - - name: druid - title: Druid - extras: - - druid - services: - - druid - - name: oracle - title: Oracle - serial: true - extras: - - oracle - services: - - oracle - - name: exasol - title: Exasol - serial: true - extras: - - exasol - services: - - exasol - - name: flink - title: Flink - serial: true - extras: - - flink - additional_deps: - - apache-flink - - pytest-split - services: - - flink + # - name: dask + # title: Dask + # extras: + # - dask + # - name: pandas + # title: Pandas + # extras: + # - pandas + # - name: sqlite + # title: SQLite + # extras: + # - sqlite + # - name: datafusion + # title: Datafusion + # extras: + # - datafusion + # - name: polars + # title: Polars + # extras: + # - polars + # - deltalake + # - name: mysql + # title: MySQL + # services: + # - mysql + # extras: + # - mysql + # - geospatial + # sys-deps: + # - libgeos-dev + # - name: postgres + # title: PostgreSQL + # extras: + # - postgres + # - geospatial + # services: + # - postgres + # sys-deps: + # - libgeos-dev + # - name: postgres + # title: PostgreSQL + Torch + # extras: + # - postgres + # - geospatial + # additional_deps: + # - torch + # services: + # - postgres + # sys-deps: + # - libgeos-dev + # - name: impala + # title: Impala + # extras: + # - impala + # services: + # - impala + # - kudu + # sys-deps: + # - cmake + # - ninja-build + # - name: mssql + # title: MS SQL Server + # extras: + # - mssql + # services: + # - mssql + # sys-deps: + # - freetds-dev + # - unixodbc-dev + # - tdsodbc + # - name: trino + # title: Trino + # extras: + # - trino + # - postgres + # services: + # - trino + # - name: druid + # title: Druid + # extras: + # - druid + # services: + # - druid + # - name: oracle + # title: Oracle + # serial: true + # extras: + # - oracle + # services: + # - oracle + # - name: exasol + # title: Exasol + # serial: true + # extras: + # - exasol + # services: + # - exasol + # - name: flink + # title: Flink + # serial: true + # extras: + # - flink + # additional_deps: + # - apache-flink + # - pytest-split + # services: + # - flink exclude: - - os: windows-latest - backend: - name: mysql - title: MySQL - extras: - - mysql - - geospatial - services: - - mysql - sys-deps: - - libgeos-dev + # - os: windows-latest + # backend: + # name: mysql + # title: MySQL + # extras: + # - mysql + # - geospatial + # services: + # - mysql + # sys-deps: + # - libgeos-dev - os: windows-latest backend: name: clickhouse @@ -196,115 +197,115 @@ jobs: - clickhouse services: - clickhouse - - os: windows-latest - backend: - name: postgres - title: PostgreSQL - extras: - - postgres - - geospatial - services: - - postgres - sys-deps: - - libgeos-dev - - os: windows-latest - backend: - name: postgres - title: PostgreSQL + Torch - extras: - - postgres - - geospatial - additional_deps: - - torch - services: - - postgres - sys-deps: - - libgeos-dev - - os: windows-latest - backend: - name: impala - title: Impala - extras: - - impala - services: - - impala - - kudu - sys-deps: - - cmake - - ninja-build - - os: windows-latest - backend: - name: mssql - title: MS SQL Server - extras: - - mssql - services: - - mssql - sys-deps: - - freetds-dev - - unixodbc-dev - - tdsodbc - - os: windows-latest - backend: - name: trino - title: Trino - services: - - trino - extras: - - trino - - postgres - - os: windows-latest - backend: - name: druid - title: Druid - extras: - - druid - services: - - druid - - os: windows-latest - backend: - name: oracle - title: Oracle - serial: true - extras: - - oracle - services: - - oracle - - os: windows-latest - backend: - name: flink - title: Flink - serial: true - extras: - - flink - services: - - flink - - python-version: "3.11" - backend: - name: flink - title: Flink - serial: true - extras: - - flink - services: - - flink - - os: windows-latest - backend: - name: exasol - title: Exasol - serial: true - extras: - - exasol - services: - - exasol + # - os: windows-latest + # backend: + # name: postgres + # title: PostgreSQL + # extras: + # - postgres + # - geospatial + # services: + # - postgres + # sys-deps: + # - libgeos-dev + # - os: windows-latest + # backend: + # name: postgres + # title: PostgreSQL + Torch + # extras: + # - postgres + # - geospatial + # additional_deps: + # - torch + # services: + # - postgres + # sys-deps: + # - libgeos-dev + # - os: windows-latest + # backend: + # name: impala + # title: Impala + # extras: + # - impala + # services: + # - impala + # - kudu + # sys-deps: + # - cmake + # - ninja-build + # - os: windows-latest + # backend: + # name: mssql + # title: MS SQL Server + # extras: + # - mssql + # services: + # - mssql + # sys-deps: + # - freetds-dev + # - unixodbc-dev + # - tdsodbc + # - os: windows-latest + # backend: + # name: trino + # title: Trino + # services: + # - trino + # extras: + # - trino + # - postgres + # - os: windows-latest + # backend: + # name: druid + # title: Druid + # extras: + # - druid + # services: + # - druid + # - os: windows-latest + # backend: + # name: oracle + # title: Oracle + # serial: true + # extras: + # - oracle + # services: + # - oracle + # - os: windows-latest + # backend: + # name: flink + # title: Flink + # serial: true + # extras: + # - flink + # services: + # - flink + # - python-version: "3.11" + # backend: + # name: flink + # title: Flink + # serial: true + # extras: + # - flink + # services: + # - flink + # - os: windows-latest + # backend: + # name: exasol + # title: Exasol + # serial: true + # extras: + # - exasol + # services: + # - exasol steps: - - name: update and install system dependencies - if: matrix.os == 'ubuntu-latest' && matrix.backend.sys-deps != null - run: | - set -euo pipefail - - sudo apt-get update -qq -y - sudo apt-get install -qq -y build-essential ${{ join(matrix.backend.sys-deps, ' ') }} + # - name: update and install system dependencies + # if: matrix.os == 'ubuntu-latest' && matrix.backend.sys-deps != null + # run: | + # set -euo pipefail + # + # sudo apt-get update -qq -y + # sudo apt-get install -qq -y build-essential ${{ join(matrix.backend.sys-deps, ' ') }} - name: install sqlite if: matrix.os == 'windows-latest' && matrix.backend.name == 'sqlite' @@ -358,7 +359,7 @@ jobs: run: poetry run pip list - name: "run parallel tests: ${{ matrix.backend.name }}" - if: ${{ !matrix.backend.serial }} + if: true # ${{ !matrix.backend.serial }} run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup env: IBIS_TEST_IMPALA_HOST: localhost @@ -369,21 +370,21 @@ jobs: # executes before common tests, they will fail with: # org.apache.flink.table.api.ValidationException: Table `default_catalog`.`default_database`.`functional_alltypes` was not found. # Therefore, we run backend-specific tests second to avoid this. - - name: "run serial tests: ${{ matrix.backend.name }}" - if: matrix.backend.serial && matrix.backend.name == 'flink' - run: | - just ci-check -m ${{ matrix.backend.name }} ibis/backends/tests - just ci-check -m ${{ matrix.backend.name }} ibis/backends/flink/tests - env: - IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }} - FLINK_REMOTE_CLUSTER_ADDR: localhost - FLINK_REMOTE_CLUSTER_PORT: "8081" - - - name: "run serial tests: ${{ matrix.backend.name }}" - if: matrix.backend.serial && matrix.backend.name != 'flink' - run: just ci-check -m ${{ matrix.backend.name }} - env: - IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }} + # - name: "run serial tests: ${{ matrix.backend.name }}" + # if: matrix.backend.serial && matrix.backend.name == 'flink' + # run: | + # just ci-check -m ${{ matrix.backend.name }} ibis/backends/tests + # just ci-check -m ${{ matrix.backend.name }} ibis/backends/flink/tests + # env: + # IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }} + # FLINK_REMOTE_CLUSTER_ADDR: localhost + # FLINK_REMOTE_CLUSTER_PORT: "8081" + # + # - name: "run serial tests: ${{ matrix.backend.name }}" + # if: matrix.backend.serial && matrix.backend.name != 'flink' + # run: just ci-check -m ${{ matrix.backend.name }} + # env: + # IBIS_EXAMPLES_DATA: ${{ runner.temp }}/examples-${{ matrix.backend.name }}-${{ matrix.os }}-${{ steps.install_python.outputs.python-version }} - name: check that no untracked files were produced shell: bash @@ -399,380 +400,390 @@ jobs: if: matrix.backend.services != null && failure() run: docker compose logs - test_backends_min_version: - name: ${{ matrix.backend.title }} Min Version ${{ matrix.os }} python-${{ matrix.python-version }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: - - ubuntu-latest - - windows-latest - python-version: - - "3.9" - - "3.11" - backend: - - name: dask - title: Dask - deps: - - "dask[array,dataframe]@2022.9.1" - - "pandas@1.5.3" - extras: - - dask - - name: postgres - title: PostgreSQL - deps: - - "psycopg2@2.8.4" - - "GeoAlchemy2@0.6.3" - - "geopandas@0.6" - - "Shapely@2" - services: - - postgres - extras: - - postgres - - geospatial - exclude: - - os: windows-latest - backend: - name: postgres - title: PostgreSQL - deps: - - "psycopg2@2.8.4" - - "GeoAlchemy2@0.6.3" - - "geopandas@0.6" - - "Shapely@2" - services: - - postgres - extras: - - postgres - - geospatial - - python-version: "3.11" - backend: - name: postgres - title: PostgreSQL - deps: - - "psycopg2@2.8.4" - - "GeoAlchemy2@0.6.3" - - "geopandas@0.6" - - "Shapely@2" - services: - - postgres - extras: - - postgres - - geospatial - steps: - - name: checkout - uses: actions/checkout@v4 - - - name: install libgeos for shapely - if: matrix.backend.name == 'postgres' - run: | - sudo apt-get update -y -qq - sudo apt-get install -qq -y build-essential libgeos-dev - - - uses: extractions/setup-just@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: download backend data - run: just download-data - - - name: start services - if: matrix.backend.services != null - run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }} - - - name: install python - uses: actions/setup-python@v5 - id: install_python - with: - python-version: ${{ matrix.python-version }} - - - name: install poetry - run: python -m pip install --upgrade pip 'poetry==1.7.1' - - - name: remove lonboard - # it requires a version of pandas that min versions are not compatible with - run: poetry remove lonboard - - - name: install minimum versions - run: poetry add --lock --optional ${{ join(matrix.backend.deps, ' ') }} - - - name: checkout the lock file - run: git checkout poetry.lock - - - name: lock with no updates - # poetry add is aggressive and will update other dependencies like - # numpy and pandas so we keep the pyproject.toml edits and then relock - # without updating anything except the requested versions - run: poetry lock --no-update - - - name: install ibis - run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}" - - - name: run tests - run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup - - - name: check that no untracked files were produced - shell: bash - run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep . - - - name: upload code coverage - if: success() - uses: codecov/codecov-action@v3 - with: - flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} - - - name: Show docker compose logs on fail - if: matrix.backend.services != null && failure() - run: docker compose logs - - test_pyspark: - name: PySpark ${{ matrix.os }} python-${{ matrix.python-version }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: - - ubuntu-latest - python-version: - - "3.10" - steps: - - name: checkout - uses: actions/checkout@v4 - - - uses: actions/setup-java@v4 - with: - distribution: microsoft - java-version: 17 - - - uses: extractions/setup-just@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: download backend data - run: just download-data - - - name: install python - uses: actions/setup-python@v5 - id: install_python - with: - python-version: ${{ matrix.python-version }} - - - name: install poetry - run: python -m pip install --upgrade pip 'poetry==1.7.1' - - - name: remove lonboard - # it requires a version of pandas that pyspark is not compatible with - run: poetry remove lonboard - - - name: install maximum versions of pandas and numpy - run: poetry add --lock 'pandas@<2' 'numpy<1.24' - - - name: checkout the lock file - run: git checkout poetry.lock - - - name: lock with no updates - # poetry add is aggressive and will update other dependencies like - # numpy and pandas so we keep the pyproject.toml edits and then relock - # without updating anything except the requested versions - run: poetry lock --no-update - - - name: install ibis - run: poetry install --without dev --without docs --extras pyspark - - - name: run tests - run: just ci-check -m pyspark - - - name: check that no untracked files were produced - shell: bash - run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep . - - - name: upload code coverage - # only upload coverage for jobs that aren't mostly xfails - if: success() && matrix.python-version != '3.11' - uses: codecov/codecov-action@v3 - with: - flags: backend,pyspark,${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} - - gen_lockfile_sqlalchemy2: - name: Generate Poetry Lockfile for SQLAlchemy 2 - runs-on: ubuntu-latest - steps: - - name: checkout - uses: actions/checkout@v4 - - - name: install python - uses: actions/setup-python@v5 - with: - python-version: "3.11" - - - run: python -m pip install --upgrade pip 'poetry==1.7.1' - - - name: remove deps that are not compatible with sqlalchemy 2 - run: poetry remove snowflake-sqlalchemy sqlalchemy-exasol - - - name: add sqlalchemy 2 - run: poetry add --lock --optional 'sqlalchemy>=2,<3' - - - name: checkout the lock file - run: git checkout poetry.lock - - - name: lock with no updates - # poetry add is aggressive and will update other dependencies like - # numpy and pandas so we keep the pyproject.toml edits and then relock - # without updating anything except the requested versions - run: poetry lock --no-update - - - name: check the sqlalchemy version - run: poetry show sqlalchemy --no-ansi | grep version | cut -d ':' -f2- | sed 's/ //g' | grep -P '^2\.' - - - name: upload deps file - uses: actions/upload-artifact@v3 - with: - name: deps - path: | - pyproject.toml - poetry.lock - - test_backends_sqlalchemy2: - name: SQLAlchemy 2 ${{ matrix.backend.title }} ${{ matrix.os }} python-${{ matrix.python-version }} - runs-on: ${{ matrix.os }} - needs: - - gen_lockfile_sqlalchemy2 - strategy: - fail-fast: false - matrix: - os: - - ubuntu-latest - python-version: - - "3.11" - backend: - - name: mssql - title: MS SQL Server - services: - - mssql - extras: - - mssql - sys-deps: - - freetds-dev - - unixodbc-dev - - tdsodbc - - name: mysql - title: MySQL - services: - - mysql - extras: - - geospatial - - mysql - - name: postgres - title: PostgreSQL - services: - - postgres - extras: - - geospatial - - postgres - sys-deps: - - libgeos-dev - - name: sqlite - title: SQLite - extras: - - sqlite - - name: trino - title: Trino - services: - - trino - extras: - - trino - - postgres - - name: duckdb - title: DuckDB - extras: - - duckdb - - name: oracle - title: Oracle - serial: true - extras: - - oracle - services: - - oracle - steps: - - name: checkout - uses: actions/checkout@v4 - - - name: update and install system dependencies - if: matrix.backend.sys-deps != null - run: | - set -euo pipefail - - sudo apt-get update -qq -y - sudo apt-get install -qq -y build-essential ${{ join(matrix.backend.sys-deps, ' ') }} - - - uses: extractions/setup-just@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: download backend data - run: just download-data - - - name: start services - if: matrix.backend.services != null - run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }} - - - name: install python - uses: actions/setup-python@v5 - id: install_python - with: - python-version: ${{ matrix.python-version }} - - - name: download poetry lockfile - uses: actions/download-artifact@v3 - with: - name: deps - path: deps - - - name: pull out lockfile - run: | - set -euo pipefail - - mv -f deps/* . - rm -r deps - - - uses: syphar/restore-virtualenv@v1 - with: - requirement_files: poetry.lock - custom_cache_key_element: ${{ matrix.backend.name }}-${{ steps.install_python.outputs.python-version }} - - - uses: syphar/restore-pip-download-cache@v1 - with: - requirement_files: poetry.lock - custom_cache_key_element: ${{ steps.install_python.outputs.python-version }} - - - name: install poetry - run: python -m pip install --upgrade pip 'poetry==1.7.1' - - - name: install ibis - run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}" - - - name: run tests - run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup - - - name: check that no untracked files were produced - shell: bash - run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep . - - - name: upload code coverage - if: success() - uses: codecov/codecov-action@v3 - with: - flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} + # test_backends_min_version: + # name: ${{ matrix.backend.title }} Min Version ${{ matrix.os }} python-${{ matrix.python-version }} + # runs-on: ${{ matrix.os }} + # env: + # SQLALCHEMY_WARN_20: "1" + # strategy: + # fail-fast: false + # matrix: + # os: + # - ubuntu-latest + # - windows-latest + # python-version: + # - "3.9" + # - "3.11" + # backend: + # - name: dask + # title: Dask + # deps: + # - "dask[array,dataframe]@2022.9.1" + # - "pandas@1.5.3" + # extras: + # - dask + # - name: postgres + # title: PostgreSQL + # deps: + # - "psycopg2@2.8.4" + # - "GeoAlchemy2@0.6.3" + # - "geopandas@0.6" + # - "Shapely@2" + # services: + # - postgres + # extras: + # - postgres + # - geospatial + # exclude: + # - os: windows-latest + # backend: + # name: postgres + # title: PostgreSQL + # deps: + # - "psycopg2@2.8.4" + # - "GeoAlchemy2@0.6.3" + # - "geopandas@0.6" + # - "Shapely@2" + # services: + # - postgres + # extras: + # - postgres + # - geospatial + # - python-version: "3.11" + # backend: + # name: postgres + # title: PostgreSQL + # deps: + # - "psycopg2@2.8.4" + # - "GeoAlchemy2@0.6.3" + # - "geopandas@0.6" + # - "Shapely@2" + # services: + # - postgres + # extras: + # - postgres + # - geospatial + # steps: + # - name: checkout + # uses: actions/checkout@v4 + # + # - name: install libgeos for shapely + # if: matrix.backend.name == 'postgres' + # run: | + # sudo apt-get update -y -qq + # sudo apt-get install -qq -y build-essential libgeos-dev + # + # - uses: extractions/setup-just@v1 + # env: + # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # + # - name: download backend data + # run: just download-data + # + # - name: start services + # if: matrix.backend.services != null + # run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }} + # + # - name: install python + # uses: actions/setup-python@v5 + # id: install_python + # with: + # python-version: ${{ matrix.python-version }} + # + # - name: install poetry + # run: python -m pip install --upgrade pip 'poetry==1.7.1' + # + # - name: remove lonboard + # # it requires a version of pandas that min versions are not compatible with + # run: poetry remove lonboard + # + # - name: install minimum versions + # run: poetry add --lock --optional ${{ join(matrix.backend.deps, ' ') }} + # + # - name: checkout the lock file + # run: git checkout poetry.lock + # + # - name: lock with no updates + # # poetry add is aggressive and will update other dependencies like + # # numpy and pandas so we keep the pyproject.toml edits and then relock + # # without updating anything except the requested versions + # run: poetry lock --no-update + # + # - name: install ibis + # run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}" + # + # - name: run tests + # run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup + # + # - name: check that no untracked files were produced + # shell: bash + # run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep . + # + # - name: upload code coverage + # if: success() + # uses: codecov/codecov-action@v3 + # with: + # flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} + # + # - name: Show docker compose logs on fail + # if: matrix.backend.services != null && failure() + # run: docker compose logs + + # test_pyspark: + # name: PySpark ${{ matrix.os }} python-${{ matrix.python-version }} + # runs-on: ${{ matrix.os }} + # strategy: + # fail-fast: false + # matrix: + # os: + # - ubuntu-latest + # python-version: + # - "3.10" + # steps: + # - name: checkout + # uses: actions/checkout@v4 + # + # - uses: actions/setup-java@v4 + # with: + # distribution: microsoft + # java-version: 17 + # + # - uses: extractions/setup-just@v1 + # env: + # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # + # - name: download backend data + # run: just download-data + # + # - name: install python + # uses: actions/setup-python@v5 + # id: install_python + # with: + # python-version: ${{ matrix.python-version }} + # + # - name: install poetry + # run: python -m pip install --upgrade pip 'poetry==1.7.1' + # + # - name: remove lonboard + # # it requires a version of pandas that pyspark is not compatible with + # run: poetry remove lonboard + # + # - name: install maximum versions of pandas and numpy + # run: poetry add --lock 'pandas@<2' 'numpy<1.24' + # + # - name: checkout the lock file + # run: git checkout poetry.lock + # + # - name: lock with no updates + # # poetry add is aggressive and will update other dependencies like + # # numpy and pandas so we keep the pyproject.toml edits and then relock + # # without updating anything except the requested versions + # run: poetry lock --no-update + # + # - name: install ibis + # run: poetry install --without dev --without docs --extras pyspark + # + # - name: run tests + # run: just ci-check -m pyspark + # + # - name: check that no untracked files were produced + # shell: bash + # run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep . + # + # - name: upload code coverage + # # only upload coverage for jobs that aren't mostly xfails + # if: success() && matrix.python-version != '3.11' + # uses: codecov/codecov-action@v3 + # with: + # flags: backend,pyspark,${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} + + # gen_lockfile_sqlalchemy2: + # name: Generate Poetry Lockfile for SQLAlchemy 2 + # runs-on: ubuntu-latest + # steps: + # - name: checkout + # uses: actions/checkout@v4 + # + # - name: install python + # uses: actions/setup-python@v5 + # with: + # python-version: "3.11" + # + # - run: python -m pip install --upgrade pip 'poetry==1.7.1' + # + # - name: remove deps that are not compatible with sqlalchemy 2 + # run: poetry remove snowflake-sqlalchemy sqlalchemy-exasol + # + # - name: add sqlalchemy 2 + # run: poetry add --lock --optional 'sqlalchemy>=2,<3' + # + # - name: checkout the lock file + # run: git checkout poetry.lock + # + # - name: lock with no updates + # # poetry add is aggressive and will update other dependencies like + # # numpy and pandas so we keep the pyproject.toml edits and then relock + # # without updating anything except the requested versions + # run: poetry lock --no-update + # + # - name: check the sqlalchemy version + # run: poetry show sqlalchemy --no-ansi | grep version | cut -d ':' -f2- | sed 's/ //g' | grep -P '^2\.' + # + # - name: upload deps file + # uses: actions/upload-artifact@v3 + # with: + # name: deps + # path: | + # pyproject.toml + # poetry.lock + + # test_backends_sqlalchemy2: + # name: SQLAlchemy 2 ${{ matrix.backend.title }} ${{ matrix.os }} python-${{ matrix.python-version }} + # runs-on: ${{ matrix.os }} + # needs: + # - gen_lockfile_sqlalchemy2 + # env: + # ODBCSYSINI: "${{ github.workspace }}/.odbc" + # strategy: + # fail-fast: false + # matrix: + # os: + # - ubuntu-latest + # python-version: + # - "3.11" + # backend: + # - name: mssql + # title: MS SQL Server + # services: + # - mssql + # extras: + # - mssql + # sys-deps: + # - freetds-dev + # - unixodbc-dev + # - tdsodbc + # - name: mysql + # title: MySQL + # services: + # - mysql + # extras: + # - geospatial + # - mysql + # - name: postgres + # title: PostgreSQL + # services: + # - postgres + # extras: + # - geospatial + # - postgres + # sys-deps: + # - libgeos-dev + # - name: sqlite + # title: SQLite + # extras: + # - sqlite + # - name: trino + # title: Trino + # services: + # - trino + # extras: + # - trino + # - postgres + # - name: oracle + # title: Oracle + # serial: true + # extras: + # - oracle + # services: + # - oracle + # steps: + # - name: checkout + # uses: actions/checkout@v4 + # + # - name: update and install system dependencies + # if: matrix.backend.sys-deps != null + # run: | + # set -euo pipefail + # + # sudo apt-get update -qq -y + # sudo apt-get install -qq -y build-essential ${{ join(matrix.backend.sys-deps, ' ') }} + # + # - name: setup odbc for mssql + # if: ${{ matrix.backend.name == 'mssql' }} + # run: | + # mkdir -p "$ODBCSYSINI" + # + # { + # echo '[FreeTDS]' + # echo "Driver = libtdsodbc.so" + # } > "$ODBCSYSINI/odbcinst.ini" + # + # - uses: extractions/setup-just@v1 + # env: + # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # + # - name: download backend data + # run: just download-data + # + # - name: start services + # if: matrix.backend.services != null + # run: docker compose up --wait ${{ join(matrix.backend.services, ' ') }} + # + # - name: install python + # uses: actions/setup-python@v5 + # id: install_python + # with: + # python-version: ${{ matrix.python-version }} + # + # - name: download poetry lockfile + # uses: actions/download-artifact@v3 + # with: + # name: deps + # path: deps + # + # - name: pull out lockfile + # run: | + # set -euo pipefail + # + # mv -f deps/* . + # rm -r deps + # + # - uses: syphar/restore-virtualenv@v1 + # with: + # requirement_files: poetry.lock + # custom_cache_key_element: ${{ matrix.backend.name }}-${{ steps.install_python.outputs.python-version }} + # + # - uses: syphar/restore-pip-download-cache@v1 + # with: + # requirement_files: poetry.lock + # custom_cache_key_element: ${{ steps.install_python.outputs.python-version }} + # + # - name: install poetry + # run: python -m pip install --upgrade pip 'poetry==1.7.1' + # + # - name: install ibis + # run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }}" + # + # - name: run tests + # run: just ci-check -m ${{ matrix.backend.name }} --numprocesses auto --dist=loadgroup + # + # - name: check that no untracked files were produced + # shell: bash + # run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep . + # + # - name: upload code coverage + # if: success() + # uses: codecov/codecov-action@v3 + # with: + # flags: backend,${{ matrix.backend.name }},${{ runner.os }},python-${{ steps.install_python.outputs.python-version }} backends: # this job exists so that we can use a single job from this workflow to gate merging runs-on: ubuntu-latest needs: - - test_backends_min_version + # - test_backends_min_version - test_backends - - test_backends_sqlalchemy2 - - test_pyspark + # - test_backends_sqlalchemy2 + # - test_pyspark steps: - run: exit 0 diff --git a/.github/workflows/ibis-main.yml b/.github/workflows/ibis-main.yml index 50a9482f1785..2b135914d72e 100644 --- a/.github/workflows/ibis-main.yml +++ b/.github/workflows/ibis-main.yml @@ -94,12 +94,13 @@ jobs: - name: run all core tests and run benchmarks once parallel if: matrix.os != 'windows-latest' - # TODO(kszucs): restore "just ci-check -m "'core or benchmark'" -n auto" - run: poetry run pytest -v -n auto ibis/common ibis/expr ibis/tests ibis/formats + # TODO(cpcloud): bring back benchmarks smoke tests -m 'core or benchmarks' + run: just ci-check -m core --numprocesses auto - name: run all core tests and run benchmarks once serial if: matrix.os == 'windows-latest' - run: poetry run pytest -v ibis/common ibis/expr ibis/tests ibis/formats + # TODO(cpcloud): bring back benchmarks smoke tests -m 'core or benchmarks' + run: just ci-check -m core - name: upload code coverage if: success() diff --git a/ibis/backends/base/__init__.py b/ibis/backends/base/__init__.py index a208a8f00bc1..7190d831ba68 100644 --- a/ibis/backends/base/__init__.py +++ b/ibis/backends/base/__init__.py @@ -31,7 +31,6 @@ import pyarrow as pa import torch - __all__ = ("BaseBackend", "Database", "connect") _IBIS_TO_SQLGLOT_DIALECT = { diff --git a/ibis/backends/base/sql/alchemy/registry.py b/ibis/backends/base/sql/alchemy/registry.py index f4a809c364e1..baa5d5fe287e 100644 --- a/ibis/backends/base/sql/alchemy/registry.py +++ b/ibis/backends/base/sql/alchemy/registry.py @@ -605,7 +605,7 @@ class array_filter(FunctionElement): ops.Coalesce: varargs(sa.func.coalesce), ops.NullIf: fixed_arity(sa.func.nullif, 2), ops.InValues: _in_values, - ops.InColumn: _in_column, + ops.InSubquery: _in_column, ops.Count: reduction(sa.func.count), ops.CountStar: _count_star, ops.CountDistinctStar: _count_distinct_star, @@ -629,8 +629,7 @@ class array_filter(FunctionElement): ops.Literal: _literal, ops.SimpleCase: _simple_case, ops.SearchedCase: _searched_case, - ops.TableColumn: _table_column, - ops.TableArrayView: _table_array_view, + ops.Field: _table_column, ops.ExistsSubquery: _exists_subquery, # miscellaneous varargs ops.Least: varargs(sa.func.least), diff --git a/ibis/backends/base/sql/compiler/query_builder.py b/ibis/backends/base/sql/compiler/query_builder.py index 50bd21eb62ae..4376e03b4a55 100644 --- a/ibis/backends/base/sql/compiler/query_builder.py +++ b/ibis/backends/base/sql/compiler/query_builder.py @@ -23,16 +23,6 @@ class TableSetFormatter: - _join_names = { - ops.InnerJoin: "INNER JOIN", - ops.LeftJoin: "LEFT OUTER JOIN", - ops.RightJoin: "RIGHT OUTER JOIN", - ops.OuterJoin: "FULL OUTER JOIN", - ops.LeftAntiJoin: "LEFT ANTI JOIN", - ops.LeftSemiJoin: "LEFT SEMI JOIN", - ops.CrossJoin: "CROSS JOIN", - } - def __init__(self, parent, node, indent=2): # `parent` is a `Select` instance, not a `TableSetFormatter` self.parent = parent diff --git a/ibis/backends/base/sql/compiler/select_builder.py b/ibis/backends/base/sql/compiler/select_builder.py index 41abc75b4a33..f1f9b73cc984 100644 --- a/ibis/backends/base/sql/compiler/select_builder.py +++ b/ibis/backends/base/sql/compiler/select_builder.py @@ -86,9 +86,9 @@ def _populate_context(self): def _make_table_aliases(self, node): ctx = self.context - if isinstance(node, ops.Join): + if isinstance(node, ops.JoinChain): for arg in node.args: - if isinstance(arg, ops.TableNode): + if isinstance(arg, ops.Relation): self._make_table_aliases(arg) elif not ctx.is_extracted(node): ctx.make_alias(node) @@ -112,7 +112,7 @@ def _collect_elements(self): if isinstance(self.op, ops.DummyTable): self.select_set = list(self.op.values) - elif isinstance(self.op, ops.TableNode): + elif isinstance(self.op, ops.Relation): self._collect(self.op, toplevel=True) else: self.select_set = [self.op] @@ -125,7 +125,7 @@ def _collect(self, op, toplevel=False): f(op, toplevel=toplevel) elif isinstance(op, (ops.PhysicalTable, ops.SQLQueryResult)): self._collect_PhysicalTable(op, toplevel=toplevel) - elif isinstance(op, ops.Join): + elif isinstance(op, ops.JoinChain): self._collect_Join(op, toplevel=toplevel) elif isinstance(op, ops.WindowingTVF): self._collect_WindowingTVF(op, toplevel=toplevel) @@ -140,7 +140,7 @@ def _collect_Distinct(self, op, toplevel=False): def _collect_Limit(self, op, toplevel=False): if toplevel: - if isinstance(table := op.table, ops.Limit): + if isinstance(table := op.parent, ops.Limit): self.table_set = table self.select_set = [table] else: @@ -184,27 +184,18 @@ def _collect_Aggregation(self, op, toplevel=False): self._collect(op.table) - def _collect_Selection(self, op, toplevel=False): - table = op.table + def _collect_Project(self, op, toplevel=False): + table = op.parent if toplevel: - if isinstance(table, ops.Join): + if isinstance(table, ops.JoinChain): self._collect_Join(table) else: self._collect(table) - selections = op.selections - sort_keys = op.sort_keys - filters = op.predicates - - if not selections: - # select * - selections = [table] - - self.order_by = sort_keys - self.select_set = selections + selections = op.values + self.select_set = list(selections.values()) self.table_set = table - self.filters = filters def _collect_InMemoryTable(self, node, toplevel=False): if toplevel: diff --git a/ibis/backends/base/sql/registry/main.py b/ibis/backends/base/sql/registry/main.py index 1f1d6cc99db3..91bf90f29707 100644 --- a/ibis/backends/base/sql/registry/main.py +++ b/ibis/backends/base/sql/registry/main.py @@ -355,11 +355,10 @@ def _floor(t, op): ops.IfElse: fixed_arity("if", 3), ops.Between: between, ops.InValues: binary_infix.in_values, - ops.InColumn: binary_infix.in_column, + ops.InSubquery: binary_infix.in_column, ops.SimpleCase: case.simple_case, ops.SearchedCase: case.searched_case, - ops.TableColumn: table_column, - ops.TableArrayView: table_array_view, + ops.Field: table_column, ops.DateAdd: timestamp.timestamp_op("date_add"), ops.DateSub: timestamp.timestamp_op("date_sub"), ops.DateDiff: timestamp.timestamp_op("datediff"), diff --git a/ibis/backends/base/sqlglot/__init__.py b/ibis/backends/base/sqlglot/__init__.py index 092e035ce61d..f8cc80e5c76f 100644 --- a/ibis/backends/base/sqlglot/__init__.py +++ b/ibis/backends/base/sqlglot/__init__.py @@ -1,97 +1,238 @@ from __future__ import annotations -from functools import partial -from typing import TYPE_CHECKING, Any, Callable +import abc +from typing import TYPE_CHECKING, Any, ClassVar import sqlglot as sg +import sqlglot.expressions as sge -if TYPE_CHECKING: - import ibis.expr.datatypes as dt - from ibis.backends.base.sqlglot.datatypes import SqlglotType - - -class AggGen: - __slots__ = ("aggfunc",) - - def __init__(self, *, aggfunc: Callable) -> None: - self.aggfunc = aggfunc - - def __getattr__(self, name: str) -> partial: - return partial(self.aggfunc, name) - - def __getitem__(self, key: str) -> partial: - return getattr(self, key) - - -def _func(name: str, *args: Any, **kwargs: Any): - return sg.func(name, *map(sg.exp.convert, args), **kwargs) - - -class FuncGen: - __slots__ = () - - def __getattr__(self, name: str) -> partial: - return partial(_func, name) - - def __getitem__(self, key: str) -> partial: - return getattr(self, key) - - def array(self, *args): - return sg.exp.Array.from_arg_list(list(map(sg.exp.convert, args))) - - def tuple(self, *args): - return sg.func("tuple", *map(sg.exp.convert, args)) - - def exists(self, query): - return sg.exp.Exists(this=query) - - def concat(self, *args): - return sg.exp.Concat(expressions=list(map(sg.exp.convert, args))) +import ibis +import ibis.expr.operations as ops +import ibis.expr.schema as sch +from ibis.backends.base import BaseBackend +from ibis.backends.base.sqlglot.compiler import STAR - def map(self, keys, values): - return sg.exp.Map(keys=keys, values=values) - - -class ColGen: - __slots__ = () - - def __getattr__(self, name: str) -> sg.exp.Column: - return sg.column(name) - - def __getitem__(self, key: str) -> sg.exp.Column: - return sg.column(key) - - -def paren(expr): - """Wrap a sqlglot expression in parentheses.""" - return sg.exp.Paren(this=expr) - - -def parenthesize(op, arg): - import ibis.expr.operations as ops - - if isinstance(op, (ops.Binary, ops.Unary)): - return paren(arg) - # function calls don't need parens - return arg - - -def interval(value, *, unit): - return sg.exp.Interval(this=sg.exp.convert(value), unit=sg.exp.var(unit)) - - -C = ColGen() -F = FuncGen() -NULL = sg.exp.NULL -FALSE = sg.exp.FALSE -TRUE = sg.exp.TRUE -STAR = sg.exp.Star() - - -def make_cast( - converter: SqlglotType, -) -> Callable[[sg.exp.Expression, dt.DataType], sg.exp.Cast]: - def cast(arg: sg.exp.Expression, to: dt.DataType) -> sg.exp.Cast: - return sg.cast(arg, to=converter.from_ibis(to)) +if TYPE_CHECKING: + from collections.abc import Iterator - return cast + import ibis.expr.datatypes as dt + import ibis.expr.types as ir + from ibis.backends.base.sqlglot.compiler import SQLGlotCompiler + from ibis.common.typing import SupportsSchema + + +class SQLGlotBackend(BaseBackend): + compiler: ClassVar[SQLGlotCompiler] + name: ClassVar[str] + + @classmethod + def has_operation(cls, operation: type[ops.Value]) -> bool: + # singledispatchmethod overrides `__get__` so we can't directly access + # the dispatcher + dispatcher = cls.compiler.visit_node.register.__self__.dispatcher + return dispatcher.dispatch(operation) is not dispatcher.dispatch(object) + + def table( + self, name: str, schema: str | None = None, database: str | None = None + ) -> ir.Table: + """Construct a table expression. + + Parameters + ---------- + name + Table name + schema + Schema name + database + Database name + + Returns + ------- + Table + Table expression + """ + table_schema = self.get_schema(name, schema=schema, database=database) + return ops.DatabaseTable( + name, + schema=table_schema, + source=self, + namespace=ops.Namespace(database=database, schema=schema), + ).to_expr() + + def _to_sqlglot( + self, expr: ir.Expr, limit: str | None = None, params=None, **_: Any + ): + """Compile an Ibis expression to a sqlglot object.""" + table_expr = expr.as_table() + + if limit == "default": + limit = ibis.options.sql.default_limit + if limit is not None: + table_expr = table_expr.limit(limit) + + if params is None: + params = {} + + sql = self.compiler.translate(table_expr.op(), params=params) + assert not isinstance(sql, sge.Subquery) + + if isinstance(sql, sge.Table): + sql = sg.select(STAR).from_(sql) + + assert not isinstance(sql, sge.Subquery) + return sql + + def compile( + self, expr: ir.Expr, limit: str | None = None, params=None, **kwargs: Any + ): + """Compile an Ibis expression to a SQL string.""" + query = self._to_sqlglot(expr, limit=limit, params=params, **kwargs) + sql = query.sql(dialect=self.name, pretty=True) + self._log(sql) + return sql + + def _to_sql(self, expr: ir.Expr, **kwargs) -> str: + return self.compile(expr, **kwargs) + + def _log(self, sql: str) -> None: + """Log `sql`. + + This method can be implemented by subclasses. Logging occurs when + `ibis.options.verbose` is `True`. + """ + from ibis import util + + util.log(sql) + + def sql( + self, + query: str, + schema: SupportsSchema | None = None, + dialect: str | None = None, + ) -> ir.Table: + query = self._transpile_sql(query, dialect=dialect) + if schema is None: + schema = self._get_schema_using_query(query) + return ops.SQLQueryResult(query, ibis.schema(schema), self).to_expr() + + @abc.abstractmethod + def _metadata(self, query: str) -> Iterator[tuple[str, dt.DataType]]: + """Return the metadata of a SQL query.""" + + def _get_schema_using_query(self, query: str) -> sch.Schema: + """Return an ibis Schema from a backend-specific SQL string.""" + return sch.Schema.from_tuples(self._metadata(query)) + + def create_view( + self, + name: str, + obj: ir.Table, + *, + database: str | None = None, + schema: str | None = None, + overwrite: bool = False, + ) -> ir.Table: + src = sge.Create( + this=sg.table( + name, db=schema, catalog=database, quoted=self.compiler.quoted + ), + kind="VIEW", + replace=overwrite, + expression=self.compile(obj), + ) + self._register_in_memory_tables(obj) + with self._safe_raw_sql(src): + pass + return self.table(name, database=database) + + def _register_in_memory_tables(self, expr: ir.Expr) -> None: + for memtable in expr.op().find(ops.InMemoryTable): + self._register_in_memory_table(memtable) + + def drop_view( + self, + name: str, + *, + database: str | None = None, + schema: str | None = None, + force: bool = False, + ) -> None: + src = sge.Drop( + this=sg.table( + name, db=schema, catalog=database, quoted=self.compiler.quoted + ), + kind="VIEW", + exists=force, + ) + with self._safe_raw_sql(src): + pass + + def _get_temp_view_definition(self, name: str, definition: str) -> str: + return sge.Create( + this=sg.to_identifier(name, quoted=self.compiler.quoted), + kind="VIEW", + expression=definition, + replace=True, + properties=sge.Properties(expressions=[sge.TemporaryProperty()]), + ) + + def _create_temp_view(self, table_name, source): + if table_name not in self._temp_views and table_name in self.list_tables(): + raise ValueError( + f"{table_name} already exists as a non-temporary table or view" + ) + + with self._safe_raw_sql(self._get_temp_view_definition(table_name, source)): + pass + + self._temp_views.add(table_name) + self._register_temp_view_cleanup(table_name) + + def _register_temp_view_cleanup(self, name: str) -> None: + """Register a clean up function for a temporary view. + + No-op by default. + + Parameters + ---------- + name + The temporary view to register for clean up. + """ + + def _load_into_cache(self, name, expr): + self.create_table(name, expr, schema=expr.schema(), temp=True) + + def _clean_up_cached_table(self, op): + self.drop_table(op.name) + + def execute( + self, expr: ir.Expr, limit: str | None = "default", **kwargs: Any + ) -> Any: + """Execute an expression.""" + + self._run_pre_execute_hooks(expr) + table = expr.as_table() + sql = self.compile(table, limit=limit, **kwargs) + + schema = table.schema() + + with self._safe_raw_sql(sql) as cur: + result = self._fetch_from_cursor(cur, schema) + return expr.__pandas_result__(result) + + def drop_table( + self, + name: str, + database: str | None = None, + schema: str | None = None, + force: bool = False, + ) -> None: + drop_stmt = sg.exp.Drop( + kind="TABLE", + this=sg.table( + name, db=schema, catalog=database, quoted=self.compiler.quoted + ), + exists=force, + ) + with self._safe_raw_sql(drop_stmt): + pass diff --git a/ibis/backends/base/sqlglot/compiler.py b/ibis/backends/base/sqlglot/compiler.py new file mode 100644 index 000000000000..e9bb79997e84 --- /dev/null +++ b/ibis/backends/base/sqlglot/compiler.py @@ -0,0 +1,1256 @@ +from __future__ import annotations + +import abc +import calendar +import functools +import itertools +import math +import operator +import string +from collections.abc import Iterator, Mapping +from functools import partial, singledispatchmethod +from itertools import starmap +from typing import TYPE_CHECKING, Any, Callable + +import sqlglot as sg +import sqlglot.expressions as sge +import toolz +from public import public + +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt +import ibis.expr.operations as ops +from ibis.backends.base.sqlglot.rewrites import Select, Window, sqlize +from ibis.expr.rewrites import ( + add_one_to_nth_value_input, + add_order_by_to_empty_ranking_window_functions, + empty_in_values_right_side, + one_to_zero_index, + replace_scalar_parameter, + unwrap_scalar_parameter, +) + +if TYPE_CHECKING: + import ibis.expr.schema as sch + import ibis.expr.types as ir + from ibis.backends.base.sqlglot.datatypes import SqlglotType + + +class AggGen: + __slots__ = ("aggfunc",) + + def __init__(self, *, aggfunc: Callable) -> None: + self.aggfunc = aggfunc + + def __getattr__(self, name: str) -> partial: + return partial(self.aggfunc, name) + + def __getitem__(self, key: str) -> partial: + return getattr(self, key) + + +class FuncGen: + __slots__ = () + + def __getattr__(self, name: str) -> partial: + return lambda *args, **kwargs: sg.func(name, *map(sge.convert, args), **kwargs) + + def __getitem__(self, key: str) -> partial: + return getattr(self, key) + + def array(self, *args): + return sge.Array.from_arg_list(list(map(sge.convert, args))) + + def tuple(self, *args): + return sg.func("tuple", *map(sge.convert, args)) + + def exists(self, query): + return sge.Exists(this=query) + + def concat(self, *args): + return sge.Concat(expressions=list(map(sge.convert, args))) + + def map(self, keys, values): + return sge.Map(keys=keys, values=values) + + +class ColGen: + __slots__ = () + + def __getattr__(self, name: str) -> sge.Column: + return sg.column(name) + + def __getitem__(self, key: str) -> sge.Column: + return sg.column(key) + + +def paren(expr): + """Wrap a sqlglot expression in parentheses.""" + return sge.Paren(this=expr) + + +def parenthesize(op, arg): + if isinstance(op, (ops.Binary, ops.Unary)): + return paren(arg) + # function calls don't need parens + return arg + + +C = ColGen() +F = FuncGen() +NULL = sge.NULL +FALSE = sge.FALSE +TRUE = sge.TRUE +STAR = sge.Star() + + +@public +class SQLGlotCompiler(abc.ABC): + __slots__ = "agg", "f" + + rewrites: tuple = ( + empty_in_values_right_side, + add_order_by_to_empty_ranking_window_functions, + one_to_zero_index, + add_one_to_nth_value_input, + ) + """A sequence of rewrites to apply to the expression tree before compilation.""" + + no_limit_value: sge.Null | None = None + """The value to use to indicate no limit.""" + + quoted: bool | None = None + """Whether to always quote identifiers.""" + + NAN = sge.Literal.number("'NaN'::double") + """Backend's NaN literal.""" + + POS_INF = sge.Literal.number("'Inf'::double") + """Backend's positive infinity literal.""" + + NEG_INF = sge.Literal.number("'-Inf'::double") + """Backend's negative infinity literal.""" + + def __init__(self) -> None: + self.agg = AggGen(aggfunc=self._aggregate) + self.f = FuncGen() + + @property + @abc.abstractmethod + def dialect(self) -> str: + """Backend dialect.""" + + @property + @abc.abstractmethod + def type_mapper(self) -> type[SqlglotType]: + """The type mapper for the backend.""" + + @abc.abstractmethod + def _aggregate(self, funcname, *args, where): + """Translate an aggregate function. + + Three flavors of filtering aggregate function inputs: + + 1. supports filter (duckdb, postgres, others) + e.g.: sum(x) filter (where predicate) + 2. use null to filter out + e.g.: sum(if(predicate, x, NULL)) + 3. clickhouse's ${func}If implementation, e.g.: + sumIf(predicate, x) + """ + + # Concrete API + + def if_(self, condition, true, false: sge.Expression | None = None) -> sge.If: + return sge.If( + this=sge.convert(condition), + true=sge.convert(true), + false=false if false is None else sge.convert(false), + ) + + def cast(self, arg, to: dt.DataType) -> sge.Cast: + return sg.cast(sge.convert(arg), to=self.type_mapper.from_ibis(to)) + + def translate(self, op, *, params: Mapping[ir.Value, Any]) -> sge.Expression: + """Translate an ibis operation to a sqlglot expression. + + Parameters + ---------- + op + An ibis operation + params + A mapping of expressions to concrete values + compiler + An instance of SQLGlotCompiler + translate_rel + Relation node translator + translate_val + Value node translator + + Returns + ------- + sqlglot.expressions.Expression + A sqlglot expression + """ + + gen_alias_index = itertools.count() + quoted = self.quoted + + def fn(node, _, **kwargs): + result = self.visit_node(node, **kwargs) + + # don't alias root nodes or value ops + if node is op or isinstance(node, ops.Value): + return result + + alias_index = next(gen_alias_index) + alias = sg.to_identifier(f"t{alias_index:d}", quoted=quoted) + + try: + return result.subquery(alias) + except AttributeError: + return result.as_(alias, quoted=quoted) + + # substitute parameters immediately to avoid having to define a + # ScalarParameter translation rule + # + # this lets us avoid threading `params` through every `translate_val` + # call only to be used in the one place it would be needed: the + # ScalarParameter `translate_val` rule + params = { + # remove aliases from scalar parameters + param.op().replace(unwrap_scalar_parameter): value + for param, value in (params or {}).items() + } + + op = op.replace( + replace_scalar_parameter(params) + | functools.reduce(operator.or_, self.rewrites) + ) + op = sqlize(op) + # apply translate rules in topological order + results = op.map(fn) + node = results[op] + return node.this if isinstance(node, sge.Subquery) else node + + @singledispatchmethod + def visit_node(self, op: ops.Node, **_): + raise com.OperationNotDefinedError( + f"No translation rule for {type(op).__name__}" + ) + + @visit_node.register(ops.Field) + def visit_Field(self, op, *, rel, name): + return sg.column( + self._gen_valid_name(name), table=rel.alias_or_name, quoted=self.quoted + ) + + @visit_node.register(ops.ScalarSubquery) + def visit_ScalarSubquery(self, op, *, rel): + return rel.this.subquery() + + @visit_node.register(ops.Alias) + def visit_Alias(self, op, *, arg, name): + return arg + + @visit_node.register(ops.Literal) + def visit_Literal(self, op, *, value, dtype, **kw): + if value is None: + if dtype.nullable: + return NULL if dtype.is_null() else self.cast(NULL, dtype) + raise com.UnsupportedOperationError( + f"Unsupported NULL for non-nullable type: {dtype!r}" + ) + elif dtype.is_integer(): + return sge.convert(value) + elif dtype.is_floating(): + if math.isnan(value): + return self.NAN + elif math.isinf(value): + return self.POS_INF if value < 0 else self.NEG_INF + return sge.convert(value) + elif dtype.is_decimal(): + return self.cast(sge.convert(str(value)), dtype) + elif dtype.is_interval(): + return sge.Interval( + this=sge.convert(str(value)), unit=dtype.resolution.upper() + ) + elif dtype.is_boolean(): + return sge.Boolean(this=bool(value)) + elif dtype.is_string(): + return sge.convert(value) + elif dtype.is_inet() or dtype.is_macaddr(): + return sge.convert(str(value)) + elif dtype.is_timestamp() or dtype.is_time(): + return self.cast(value.isoformat(), dtype) + elif dtype.is_date(): + return self.f.datefromparts(value.year, value.month, value.day) + elif dtype.is_array(): + value_type = dtype.value_type + return self.f.array( + *( + self.visit_Literal( + ops.Literal(v, value_type), value=v, dtype=value_type + ) + for v in value + ) + ) + elif dtype.is_map(): + key_type = dtype.key_type + keys = self.f.array( + *( + self.visit_Literal( + ops.Literal(k, key_type), value=k, dtype=key_type, **kw + ) + for k in value.keys() + ) + ) + + value_type = dtype.value_type + values = self.f.array( + *( + self.visit_Literal( + ops.Literal(v, value_type), value=v, dtype=value_type, **kw + ) + for v in value.values() + ) + ) + + return self.f.map(keys, values) + elif dtype.is_struct(): + items = [ + sge.Slice( + this=sge.convert(k), + expression=self.visit_Literal( + ops.Literal(v, field_dtype), value=v, dtype=field_dtype, **kw + ), + ) + for field_dtype, (k, v) in zip(dtype.types, value.items()) + ] + return sge.Struct.from_arg_list(items) + else: + raise NotImplementedError(f"Unsupported type: {dtype!r}") + + @visit_node.register(ops.BitwiseNot) + def visit_BitwiseNot(self, op, *, arg): + return sge.BitwiseNot(this=arg) + + ### Mathematical Calisthenics + + @visit_node.register(ops.E) + def visit_E(self, op): + return self.f.exp(1) + + @visit_node.register(ops.Log) + def visit_Log(self, op, *, arg, base): + if base is None: + return self.f.ln(arg) + elif str(base) in ("2", "10"): + return self.f[f"log{base}"](arg) + else: + return self.f.ln(arg) / self.f.ln(base) + + @visit_node.register(ops.Clip) + def visit_Clip(self, op, *, arg, lower, upper): + if upper is not None: + arg = self.if_(arg.is_(NULL), arg, self.f.least(upper, arg)) + + if lower is not None: + arg = self.if_(arg.is_(NULL), arg, self.f.greatest(lower, arg)) + + return arg + + @visit_node.register(ops.FloorDivide) + def visit_FloorDivide(self, op, *, left, right): + return self.cast(self.f.floor(left / right), op.dtype) + + @visit_node.register(ops.Ceil) + @visit_node.register(ops.Floor) + def visit_CeilFloor(self, op, *, arg): + return self.cast(self.f[type(op).__name__.lower()](arg), op.dtype) + + @visit_node.register(ops.Round) + def visit_Round(self, op, *, arg, digits): + if digits is not None: + return sge.Round(this=arg, decimals=digits) + return sge.Round(this=arg) + + ### Dtype Dysmorphia + + @visit_node.register(ops.TryCast) + def visit_TryCast(self, op, *, arg, to): + return sge.TryCast(this=arg, to=self.type_mapper.from_ibis(to)) + + ### Comparator Conundrums + + @visit_node.register(ops.Between) + def visit_Between(self, op, *, arg, lower_bound, upper_bound): + return sge.Between(this=arg, low=lower_bound, high=upper_bound) + + @visit_node.register(ops.Negate) + def visit_Negate(self, op, *, arg): + return -paren(arg) + + @visit_node.register(ops.Not) + def visit_Not(self, op, *, arg): + if isinstance(arg, sge.Filter): + return sge.Filter(this=sg.not_(arg.this), expression=arg.expression) + return sg.not_(paren(arg)) + + ### Timey McTimeFace + + @visit_node.register(ops.Date) + def visit_Date(self, op, *, arg): + return sge.Date(this=arg) + + @visit_node.register(ops.DateFromYMD) + def visit_DateFromYMD(self, op, *, year, month, day): + return sge.DateFromParts(year=year, month=month, day=day) + + @visit_node.register(ops.Time) + def visit_Time(self, op, *, arg): + return self.cast(arg, to=dt.time) + + @visit_node.register(ops.TimestampNow) + def visit_TimestampNow(self, op): + """DuckDB current timestamp defaults to timestamp + tz.""" + return self.cast(sge.CurrentTimestamp(), dt.timestamp) + + @visit_node.register(ops.Strftime) + def visit_Strftime(self, op, *, arg, format_str): + if not isinstance(op.format_str, ops.Literal): + raise com.UnsupportedOperationError( + f"{self.dialect} `format_str` must be a literal `str`; got {type(op.format_str)}" + ) + return sge.TimeToStr(this=arg, format=format_str) + + @visit_node.register(ops.ExtractEpochSeconds) + def visit_ExtractEpochSeconds(self, op, *, arg): + return self.f.epoch(self.cast(arg, dt.timestamp)) + + @visit_node.register(ops.ExtractYear) + def visit_ExtractYear(self, op, *, arg): + return self.f.extract("year", arg) + + @visit_node.register(ops.ExtractMonth) + def visit_ExtractMonth(self, op, *, arg): + return self.f.extract("month", arg) + + @visit_node.register(ops.ExtractDay) + def visit_ExtractDay(self, op, *, arg): + return self.f.extract("day", arg) + + @visit_node.register(ops.ExtractDayOfYear) + def visit_ExtractDayOfYear(self, op, *, arg): + return self.f.extract("dayofyear", arg) + + @visit_node.register(ops.ExtractQuarter) + def visit_ExtractQuarter(self, op, *, arg): + return self.f.extract("quarter", arg) + + @visit_node.register(ops.ExtractWeekOfYear) + def visit_ExtractWeekOfYear(self, op, *, arg): + return self.f.extract("week", arg) + + @visit_node.register(ops.ExtractHour) + def visit_ExtractHour(self, op, *, arg): + return self.f.extract("hour", arg) + + @visit_node.register(ops.ExtractMinute) + def visit_ExtractMinute(self, op, *, arg): + return self.f.extract("minute", arg) + + @visit_node.register(ops.ExtractSecond) + def visit_ExtractSecond(self, op, *, arg): + return self.f.extract("second", arg) + + @visit_node.register(ops.TimestampTruncate) + @visit_node.register(ops.DateTruncate) + @visit_node.register(ops.TimeTruncate) + def visit_TimestampTruncate(self, op, *, arg, unit): + unit_mapping = { + "Y": "year", + "M": "month", + "W": "week", + "D": "day", + "h": "hour", + "m": "minute", + "s": "second", + "ms": "ms", + "us": "us", + } + + unit = unit.short + if (duckunit := unit_mapping.get(unit)) is None: + raise com.UnsupportedOperationError(f"Unsupported truncate unit {unit}") + + return self.f.date_trunc(duckunit, arg) + + @visit_node.register(ops.DayOfWeekIndex) + def visit_DayOfWeekIndex(self, op, *, arg): + return (self.f.dayofweek(arg) + 6) % 7 + + @visit_node.register(ops.DayOfWeekName) + def visit_DayOfWeekName(self, op, *, arg): + # day of week number is 0-indexed + # Sunday == 0 + # Saturday == 6 + return sge.Case( + this=(self.f.dayofweek(arg) + 6) % 7, + ifs=list(starmap(self.if_, enumerate(calendar.day_name))), + ) + + @visit_node.register(ops.IntervalFromInteger) + def visit_IntervalFromInteger(self, op, *, arg, unit): + return sge.Interval(this=sge.convert(arg), unit=unit.singular.upper()) + + ### String Instruments + + @visit_node.register(ops.Strip) + def visit_Strip(self, op, *, arg): + return self.f.trim(arg, string.whitespace) + + @visit_node.register(ops.RStrip) + def visit_RStrip(self, op, *, arg): + return self.f.rtrim(arg, string.whitespace) + + @visit_node.register(ops.LStrip) + def visit_LStrip(self, op, *, arg): + return self.f.ltrim(arg, string.whitespace) + + @visit_node.register(ops.Substring) + def visit_Substring(self, op, *, arg, start, length): + if_pos = sge.Substring(this=arg, start=start + 1, length=length) + if_neg = sge.Substring(this=arg, start=start, length=length) + + return self.if_(start >= 0, if_pos, if_neg) + + @visit_node.register(ops.StringFind) + def visit_StringFind(self, op, *, arg, substr, start, end): + if end is not None: + raise com.UnsupportedOperationError( + "String find doesn't support `end` argument" + ) + + if start is not None: + arg = self.f.substr(arg, start + 1) + pos = self.f.strpos(arg, substr) + return self.if_(pos > 0, pos + start, 0) + + return self.f.strpos(arg, substr) + + @visit_node.register(ops.RegexSearch) + def visit_RegexSearch(self, op, *, arg, pattern): + return self.f.regexp_matches(arg, pattern, "s") + + @visit_node.register(ops.RegexReplace) + def visit_RegexReplace(self, op, *, arg, pattern, replacement): + return self.f.regexp_replace(arg, pattern, replacement, "g") + + @visit_node.register(ops.RegexExtract) + def visit_RegexExtract(self, op, *, arg, pattern, index): + return self.f.regexp_extract(arg, pattern, index, dialect=self.dialect) + + @visit_node.register(ops.StringConcat) + def visit_StringConcat(self, op, *, arg): + return self.f.concat(*arg) + + @visit_node.register(ops.StringSQLLike) + def visit_StringSQLLike(self, op, *, arg, pattern, escape): + return arg.like(pattern) + + @visit_node.register(ops.StringSQLILike) + def visit_StringSQLILike(self, op, *, arg, pattern, escape): + return arg.ilike(pattern) + + @visit_node.register(ops.StringToTimestamp) + def visit_StringToTimestamp(self, op, *, arg, format_str): + return sge.StrToTime(this=arg, format=format_str) + + ### NULL PLAYER CHARACTER + @visit_node.register(ops.IsNull) + def visit_IsNull(self, op, *, arg): + return arg.is_(NULL) + + @visit_node.register(ops.NotNull) + def visit_NotNull(self, op, *, arg): + return arg.is_(sg.not_(NULL)) + + @visit_node.register(ops.InValues) + def visit_InValues(self, op, *, value, options): + return value.isin(*options) + + ### Definitely Not Tensors + + @visit_node.register(ops.ArrayStringJoin) + def visit_ArrayStringJoin(self, op, *, sep, arg): + return self.f.array_to_string(arg, sep) + + ### Counting + + @visit_node.register(ops.CountDistinct) + def visit_CountDistinct(self, op, *, arg, where): + return self.agg.count(sge.Distinct(expressions=[arg]), where=where) + + @visit_node.register(ops.CountDistinctStar) + def visit_CountDistinctStar(self, op, *, arg, where): + return self.agg.count(sge.Distinct(expressions=[STAR]), where=where) + + @visit_node.register(ops.CountStar) + def visit_CountStar(self, op, *, arg, where): + return self.agg.count(STAR, where=where) + + @visit_node.register(ops.Sum) + def visit_Sum(self, op, *, arg, where): + arg = self.cast(arg, op.dtype) if op.arg.dtype.is_boolean() else arg + return self.agg.sum(arg, where=where) + + ### Stats + + @visit_node.register(ops.Quantile) + @visit_node.register(ops.MultiQuantile) + def visit_Quantile(self, op, *, arg, quantile, where): + suffix = "cont" if op.arg.dtype.is_numeric() else "disc" + return self.agg[f"quantile_{suffix}"](arg, quantile, where=where) + + @visit_node.register(ops.Variance) + @visit_node.register(ops.StandardDev) + @visit_node.register(ops.Covariance) + def visit_VarianceStandardDevCovariance(self, op, *, how, where, **kw): + hows = {"sample": "samp", "pop": "pop"} + funcs = { + ops.Variance: "var", + ops.StandardDev: "stddev", + ops.Covariance: "covar", + } + + args = [] + + for oparg, arg in zip(op.args, kw.values()): + if (arg_dtype := oparg.dtype).is_boolean(): + arg = self.cast(arg, dt.Int32(nullable=arg_dtype.nullable)) + args.append(arg) + + funcname = f"{funcs[type(op)]}_{hows[how]}" + return self.agg[funcname](*args, where=where) + + @visit_node.register(ops.Arbitrary) + def visit_Arbitrary(self, op, *, arg, how, where): + if how == "heavy": + raise com.UnsupportedOperationError( + f"how='heavy' not supported in the {self.dialect} backend" + ) + return self.agg[how](arg, where=where) + + @visit_node.register(ops.SimpleCase) + @visit_node.register(ops.SearchedCase) + def visit_SimpleCase(self, op, *, base=None, cases, results, default): + return sge.Case( + this=base, ifs=list(map(self.if_, cases, results)), default=default + ) + + @visit_node.register(ops.ExistsSubquery) + def visit_ExistsSubquery(self, op, *, rel): + return self.f.exists(rel.this) + + @visit_node.register(ops.InSubquery) + def visit_InSubquery(self, op, *, rel, needle): + return needle.isin(rel.this) + + @visit_node.register(ops.ArrayColumn) + def visit_ArrayColumn(self, op, *, cols): + return self.f.array(*cols) + + @visit_node.register(ops.StructColumn) + def visit_StructColumn(self, op, *, names, values): + return sge.Struct.from_arg_list( + [ + sge.Slice(this=sge.convert(name), expression=value) + for name, value in zip(names, values) + ] + ) + + @visit_node.register(ops.StructField) + def visit_StructField(self, op, *, arg, field): + return arg[sge.convert(field)] + + @visit_node.register(ops.IdenticalTo) + def visit_IdenticalTo(self, op, *, left, right): + return sge.NullSafeEQ(this=left, expression=right) + + @visit_node.register(ops.Greatest) + def visit_Greatest(self, op, *, arg): + return self.f.greatest(*arg) + + @visit_node.register(ops.Least) + def visit_Least(self, op, *, arg): + return self.f.least(*arg) + + @visit_node.register(ops.Coalesce) + def visit_Coalesce(self, op, *, arg): + return self.f.coalesce(*arg) + + ### Ordering and window functions + + @visit_node.register(ops.RowNumber) + def visit_RowNumber(self, op): + return sge.RowNumber() + + @visit_node.register(ops.SortKey) + def visit_SortKey(self, op, *, expr, ascending: bool): + return sge.Ordered(this=expr, desc=not ascending) + + @visit_node.register(ops.ApproxMedian) + def visit_ApproxMedian(self, op, *, arg, where): + return self.agg.approx_quantile(arg, 0.5, where=where) + + @visit_node.register(ops.WindowBoundary) + def visit_WindowBoundary(self, op, *, value, preceding): + # TODO: bit of a hack to return a dict, but there's no sqlglot expression + # that corresponds to _only_ this information + return {"value": value, "side": "preceding" if preceding else "following"} + + @visit_node.register(Window) + def visit_Window(self, op, *, how, func, start, end, group_by, order_by): + if start is None: + start = {} + if end is None: + end = {} + + start_value = start.get("value", "UNBOUNDED") + start_side = start.get("side", "PRECEDING") + end_value = end.get("value", "UNBOUNDED") + end_side = end.get("side", "FOLLOWING") + + spec = sge.WindowSpec( + kind=op.how.upper(), + start=start_value, + start_side=start_side, + end=end_value, + end_side=end_side, + over="OVER", + ) + order = sge.Order(expressions=order_by) if order_by else None + + return sge.Window(this=func, partition_by=group_by, order=order, spec=spec) + + @visit_node.register(ops.Lag) + @visit_node.register(ops.Lead) + def visit_LagLead(self, op, *, arg, offset, default): + args = [arg] + + if default is not None: + if offset is None: + offset = 1 + + args.append(offset) + args.append(default) + elif offset is not None: + args.append(offset) + + return self.f[type(op).__name__.lower()](*args) + + @visit_node.register(ops.Argument) + def visit_Argument(self, op, *, name: str, shape, dtype): + return sg.to_identifier(op.param) + + @visit_node.register(ops.RowID) + def visit_RowID(self, op, *, table): + return sg.column(op.name, table=table.alias_or_name, quoted=self.quoted) + + @visit_node.register(ops.ScalarUDF) + def visit_ScalarUDF(self, op, **kw): + return self.f[op.__full_name__](*kw.values()) + + @visit_node.register(ops.AggUDF) + def visit_AggUDF(self, op, *, where, **kw): + return self.agg[op.__full_name__](*kw.values(), where=where) + + @visit_node.register(ops.TimeDelta) + @visit_node.register(ops.DateDelta) + @visit_node.register(ops.TimestampDelta) + def visit_TimestampDelta(self, op, *, part, left, right): + # dialect is necessary due to sqlglot's default behavior + # of `part` coming last + return sge.DateDiff( + this=left, expression=right, unit=part, dialect=self.dialect + ) + + @visit_node.register(ops.TimestampBucket) + def visit_TimestampBucket(self, op, *, arg, interval, offset): + origin = self.f.cast("epoch", self.type_mapper.from_ibis(dt.timestamp)) + if offset is not None: + origin += offset + return self.f.time_bucket(interval, arg, origin) + + @visit_node.register(ops.ArrayConcat) + def visit_ArrayConcat(self, op, *, arg): + return sge.ArrayConcat(this=arg[0], expressions=list(arg[1:])) + + @visit_node.register(ops.ArrayContains) + def visit_ArrayContains(self, op, *, arg, other): + return sge.ArrayContains(this=arg, expression=other) + + ## relations + + def _dedup_name( + self, key: str, value: sge.Expression + ) -> Iterator[sge.Alias | sge.Column]: + return ( + value.as_(key, quoted=self.quoted) + if not isinstance(value, sge.Column) or key != value.name + else value + ) + + @visit_node.register(Select) + def visit_Select(self, op, *, parent, selections, predicates, sort_keys): + # if we've constructed a useless projection return the parent relation + if not selections and not predicates and not sort_keys: + return parent + + result = parent + + if selections: + result = sg.select(*starmap(self._dedup_name, selections.items())).from_( + result + ) + + if predicates: + result = result.where(*predicates) + + if sort_keys: + result = result.order_by(*sort_keys) + + return result + + @visit_node.register(ops.DummyTable) + def visit_DummyTable(self, op, *, values): + return sg.select(*starmap(self._dedup_name, values.items())) + + @visit_node.register(ops.UnboundTable) + def visit_UnboundTable(self, op, *, name: str, schema: sch.Schema): + return sg.table(name, quoted=self.quoted) + + @visit_node.register(ops.InMemoryTable) + def visit_InMemoryTable(self, op, *, name: str, schema: sch.Schema, data): + return sg.table(name, quoted=self.quoted) + + @visit_node.register(ops.DatabaseTable) + def visit_DatabaseTable(self, op, *, name, namespace, schema, source): + return sg.table( + name, db=namespace.schema, catalog=namespace.database, quoted=self.quoted + ) + + @visit_node.register(ops.SelfReference) + def visit_SelfReference(self, op, *, parent, identifier): + return parent + + @visit_node.register(ops.JoinChain) + def visit_JoinChain(self, op, *, first, rest, values): + result = sg.select(*starmap(self._dedup_name, values.items())).from_(first) + + for link in rest: + if isinstance(link, sge.Alias): + link = link.this + result = result.join(link) + return result + + @visit_node.register(ops.JoinLink) + def visit_JoinLink(self, op, *, how, table, predicates): + sides = { + "inner": None, + "left": "left", + "right": "right", + "semi": "left", + "anti": "left", + "cross": None, + "outer": "full", + "asof": "left", + "any_left": "left", + "any_inner": None, + } + kinds = { + "any_left": "any", + "any_inner": "any", + "asof": "asof", + "inner": "inner", + "left": "outer", + "right": "outer", + "semi": "semi", + "anti": "anti", + "cross": "cross", + "outer": "outer", + } + assert predicates + return sge.Join( + this=table, side=sides[how], kind=kinds[how], on=sg.and_(*predicates) + ) + + @staticmethod + def _gen_valid_name(name: str) -> str: + return name + + @visit_node.register(ops.Project) + def visit_Project(self, op, *, parent, values): + # needs_alias should never be true here in explicitly, but it may get + # passed via a (recursive) call to translate_val + return sg.select(*starmap(self._dedup_name, values.items())).from_(parent) + + @staticmethod + def _generate_groups(groups): + return map(sge.convert, range(1, len(groups) + 1)) + + @visit_node.register(ops.Aggregate) + def visit_Aggregate(self, op, *, parent, groups, metrics): + sel = sg.select( + *starmap( + self._dedup_name, toolz.keymap(self._gen_valid_name, groups).items() + ), + *starmap( + self._dedup_name, toolz.keymap(self._gen_valid_name, metrics).items() + ), + ).from_(parent) + + if groups: + sel = sel.group_by(*self._generate_groups(groups.values())) + + return sel + + def _add_parens(self, op, sg_expr): + if type(op) in _BINARY_INFIX_OPS: + return paren(sg_expr) + return sg_expr + + @visit_node.register(ops.Filter) + def visit_Filter(self, op, *, parent, predicates): + predicates = ( + self._add_parens(raw_predicate, predicate) + for raw_predicate, predicate in zip(op.predicates, predicates) + ) + try: + return parent.where(*predicates) + except AttributeError: + return sg.select(STAR).from_(parent).where(*predicates) + + @visit_node.register(ops.Sort) + def visit_Sort(self, op, *, parent, keys): + try: + return parent.order_by(*keys) + except AttributeError: + return sg.select(STAR).from_(parent).order_by(*keys) + + @visit_node.register(ops.Union) + def visit_Union(self, op, *, left, right, distinct): + if isinstance(left, sge.Table): + left = sg.select(STAR).from_(left) + + if isinstance(right, sge.Table): + right = sg.select(STAR).from_(right) + + return sg.union( + left.args.get("this", left), + right.args.get("this", right), + distinct=distinct, + ) + + @visit_node.register(ops.Intersection) + def visit_Intersection(self, op, *, left, right, distinct): + if isinstance(left, sge.Table): + left = sg.select(STAR).from_(left) + + if isinstance(right, sge.Table): + right = sg.select(STAR).from_(right) + + return sg.intersect( + left.args.get("this", left), + right.args.get("this", right), + distinct=distinct, + ) + + @visit_node.register(ops.Difference) + def visit_Difference(self, op, *, left, right, distinct): + if isinstance(left, sge.Table): + left = sg.select(STAR).from_(left) + + if isinstance(right, sge.Table): + right = sg.select(STAR).from_(right) + + return sg.except_( + left.args.get("this", left), + right.args.get("this", right), + distinct=distinct, + ) + + @visit_node.register(ops.Limit) + def visit_Limit(self, op, *, parent, n, offset): + # push limit/offset into subqueries + if isinstance(parent, sge.Subquery) and parent.this.args.get("limit") is None: + result = parent.this + alias = parent.alias + else: + result = sg.select(STAR).from_(parent) + alias = None + + if isinstance(n, int): + result = result.limit(n) + elif n is not None: + result = result.limit(sg.select(n).from_(parent).subquery()) + else: + assert n is None, n + if self.no_limit_value is not None: + result = result.limit(self.no_limit_value) + + assert offset is not None, "offset is None" + + if not isinstance(offset, int): + skip = offset + skip = sg.select(skip).from_(parent).subquery() + elif not offset: + if alias is not None: + return result.subquery(alias) + return result + else: + skip = offset + + result = result.offset(skip) + if alias is not None: + return result.subquery(alias) + return result + + @visit_node.register(ops.Distinct) + def visit_Distinct(self, op, *, parent): + return sg.select(STAR).distinct().from_(parent) + + @visit_node.register(ops.DropNa) + def visit_DropNa(self, op, *, parent, how, subset): + if subset is None: + subset = [ + sg.column(name, table=parent.alias_or_name, quoted=self.quoted) + for name in op.schema.names + ] + + if subset: + predicate = functools.reduce( + sg.and_ if how == "any" else sg.or_, + (sg.not_(col.is_(NULL)) for col in subset), + ) + elif how == "all": + predicate = FALSE + else: + predicate = None + + if predicate is None: + return parent + + try: + return parent.where(predicate) + except AttributeError: + return sg.select(STAR).from_(parent).where(predicate) + + @visit_node.register(ops.FillNa) + def visit_FillNa(self, op, *, parent, replacements): + if isinstance(replacements, Mapping): + mapping = replacements + else: + mapping = { + name: replacements + for name, dtype in op.schema.items() + if dtype.nullable + } + exprs = [ + ( + sg.alias( + sge.Coalesce( + this=sg.column(col, quoted=self.quoted), + expressions=[sge.convert(alt)], + ), + col, + ) + if (alt := mapping.get(col)) is not None + else sg.column(col, quoted=self.quoted) + ) + for col in op.schema.keys() + ] + return sg.select(*exprs).from_(parent) + + @visit_node.register(ops.View) + def visit_View(self, op, *, child, name: str): + # TODO: find a way to do this without creating a temporary view + backend = op.child.to_expr()._find_backend() + backend._create_temp_view(table_name=name, source=sg.select(STAR).from_(child)) + return sg.table(name, quoted=self.quoted) + + @visit_node.register(ops.SQLStringView) + def visit_SQLStringView(self, op, *, query: str, name: str, child): + table = sg.table(name, quoted=self.quoted) + return ( + sg.select(STAR).from_(table).with_(table, as_=query, dialect=self.dialect) + ) + + @visit_node.register(ops.SQLQueryResult) + def visit_SQLQueryResult(self, op, *, query, schema, source): + return sg.parse_one(query, read=self.dialect).subquery() + + @visit_node.register(ops.Unnest) + def visit_Unnest(self, op, *, arg): + return sge.Explode(this=arg) + + @visit_node.register(ops.RegexSplit) + def visit_RegexSplit(self, op, *, arg, pattern): + return sge.RegexpSplit(this=arg, expression=pattern) + + @visit_node.register(ops.Levenshtein) + def visit_Levenshtein(self, op, *, left, right): + return sge.Levenshtein(this=left, expression=right) + + @visit_node.register(ops.JoinTable) + def visit_JoinTable(self, op, *, parent, index): + return parent + + +_SIMPLE_OPS = { + ops.All: "bool_and", + ops.Any: "bool_or", + ops.ArgMax: "max_by", + ops.ArgMin: "min_by", + ops.Power: "pow", + # Unary operations + ops.IsNan: "isnan", + ops.IsInf: "isinf", + ops.Abs: "abs", + ops.Exp: "exp", + ops.Sqrt: "sqrt", + ops.Ln: "ln", + ops.Log2: "log2", + ops.Log10: "log", + ops.Acos: "acos", + ops.Asin: "asin", + ops.Atan: "atan", + ops.Atan2: "atan2", + ops.Cos: "cos", + ops.Sin: "sin", + ops.Tan: "tan", + ops.Cot: "cot", + ops.Pi: "pi", + ops.RandomScalar: "random", + ops.Sign: "sign", + # Unary aggregates + ops.ApproxCountDistinct: "approx_distinct", + ops.Median: "median", + ops.Mean: "avg", + ops.Max: "max", + ops.Min: "min", + ops.ArgMin: "argmin", + ops.ArgMax: "argmax", + ops.First: "first", + ops.Last: "last", + ops.Count: "count", + ops.All: "bool_and", + ops.Any: "bool_or", + ops.ArrayCollect: "array_agg", + ops.GroupConcat: "group_concat", + # string operations + ops.StringContains: "contains", + ops.StringLength: "length", + ops.Lowercase: "lower", + ops.Uppercase: "upper", + ops.StartsWith: "starts_with", + ops.StrRight: "right", + # Other operations + ops.IfElse: "if", + ops.ArrayLength: "length", + ops.NullIf: "nullif", + ops.Repeat: "repeat", + ops.Map: "map", + ops.JSONGetItem: "json_extract", + ops.ArrayFlatten: "flatten", + # common enough to be in the base, but not modeled in sqlglot + ops.NTile: "ntile", + ops.Degrees: "degrees", + ops.Radians: "radians", + ops.FirstValue: "first_value", + ops.LastValue: "last_value", + ops.NthValue: "nth_value", + ops.MinRank: "rank", + ops.DenseRank: "dense_rank", + ops.PercentRank: "percent_rank", + ops.CumeDist: "cume_dist", + ops.ArrayLength: "array_size", + ops.ArraySort: "array_sort", + ops.Capitalize: "initcap", + ops.Translate: "translate", + ops.StringReplace: "replace", + ops.Reverse: "reverse", + ops.StringSplit: "split", +} + +_BINARY_INFIX_OPS = { + # Binary operations + ops.Add: sge.Add, + ops.Subtract: sge.Sub, + ops.Multiply: sge.Mul, + ops.Divide: sge.Div, + ops.Modulus: sge.Mod, + # Comparisons + ops.GreaterEqual: sge.GTE, + ops.Greater: sge.GT, + ops.LessEqual: sge.LTE, + ops.Less: sge.LT, + ops.Equals: sge.EQ, + ops.NotEquals: sge.NEQ, + # Boolean comparisons + ops.And: sge.And, + ops.Or: sge.Or, + ops.Xor: sge.Xor, + # Bitwise business + ops.BitwiseLeftShift: sge.BitwiseLeftShift, + ops.BitwiseRightShift: sge.BitwiseRightShift, + ops.BitwiseAnd: sge.BitwiseAnd, + ops.BitwiseOr: sge.BitwiseOr, + ops.BitwiseXor: sge.BitwiseXor, + # Time arithmetic + ops.DateAdd: sge.Add, + ops.DateSub: sge.Sub, + ops.DateDiff: sge.Sub, + ops.TimestampAdd: sge.Add, + ops.TimestampSub: sge.Sub, + ops.TimestampDiff: sge.Sub, + # Interval Marginalia + ops.IntervalAdd: sge.Add, + ops.IntervalMultiply: sge.Mul, + ops.IntervalSubtract: sge.Sub, +} + +for _op, _sym in _BINARY_INFIX_OPS.items(): + + @SQLGlotCompiler.visit_node.register(_op) + def _fmt(self, op, *, _sym: sge.Expression = _sym, left, right): + return _sym( + this=self._add_parens(op.left, left), + expression=self._add_parens(op.right, right), + ) + + setattr(SQLGlotCompiler, f"visit_{_op.__name__}", _fmt) + + +del _op, _sym, _fmt + + +for _op, _name in _SIMPLE_OPS.items(): + assert isinstance(type(_op), type), type(_op) + if issubclass(_op, ops.Reduction): + + @SQLGlotCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, where, **kw): + return self.agg[_name](*kw.values(), where=where) + + else: + + @SQLGlotCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, **kw): + return self.f[_name](*kw.values()) + + setattr(SQLGlotCompiler, f"visit_{_op.__name__}", _fmt) + + +del _op, _name, _fmt diff --git a/ibis/backends/base/sqlglot/datatypes.py b/ibis/backends/base/sqlglot/datatypes.py index 7d59e48226ba..326f20315cf1 100644 --- a/ibis/backends/base/sqlglot/datatypes.py +++ b/ibis/backends/base/sqlglot/datatypes.py @@ -408,6 +408,13 @@ def _from_sqlglot_TIMESTAMP_MS(cls) -> dt.Timestamp: def _from_sqlglot_TIMESTAMP_NS(cls) -> dt.Timestamp: return dt.Timestamp(scale=9, nullable=cls.default_nullable) + @classmethod + def _from_ibis_GeoSpatial(cls, dtype: dt.GeoSpatial): + assert ( + dtype.geotype == "geometry" + ), "DuckDB only supports geometry types; geography types are not supported" + return sge.DataType(this=typecode.GEOMETRY) + class TrinoType(SqlglotType): dialect = "trino" diff --git a/ibis/backends/base/sqlglot/rewrites.py b/ibis/backends/base/sqlglot/rewrites.py index fcaed94d78dd..ca999208aa39 100644 --- a/ibis/backends/base/sqlglot/rewrites.py +++ b/ibis/backends/base/sqlglot/rewrites.py @@ -5,6 +5,7 @@ from typing import Literal, Optional +import toolz from public import public import ibis.expr.datashape as ds @@ -114,8 +115,8 @@ def merge_select_select(_): return Select( _.parent.parent, selections=selections, - predicates=_.parent.predicates + predicates, - sort_keys=_.parent.sort_keys + sort_keys, + predicates=tuple(toolz.unique(_.parent.predicates + predicates)), + sort_keys=tuple(toolz.unique(_.parent.sort_keys + sort_keys)), ) diff --git a/ibis/backends/clickhouse/__init__.py b/ibis/backends/clickhouse/__init__.py index b5f90b64f47d..746d35f41cc9 100644 --- a/ibis/backends/clickhouse/__init__.py +++ b/ibis/backends/clickhouse/__init__.py @@ -2,16 +2,18 @@ import ast import atexit +import contextlib import glob -from contextlib import closing, suppress +from contextlib import closing from functools import partial from typing import TYPE_CHECKING, Any, Literal +from urllib.parse import parse_qs, urlparse import clickhouse_connect as cc import pyarrow as pa import pyarrow_hotfix # noqa: F401 -import sqlalchemy as sa import sqlglot as sg +import sqlglot.expressions as sge import toolz from clickhouse_connect.driver.external import ExternalData @@ -23,8 +25,9 @@ import ibis.expr.types as ir from ibis import util from ibis.backends.base import BaseBackend, CanCreateDatabase -from ibis.backends.base.sqlglot import STAR, C, F -from ibis.backends.clickhouse.compiler import translate +from ibis.backends.base.sqlglot import SQLGlotBackend +from ibis.backends.base.sqlglot.compiler import C +from ibis.backends.clickhouse.compiler import ClickHouseCompiler from ibis.backends.clickhouse.datatypes import ClickhouseType if TYPE_CHECKING: @@ -33,15 +36,14 @@ import pandas as pd - from ibis.common.typing import SupportsSchema - def _to_memtable(v): return ibis.memtable(v).op() if not isinstance(v, ops.InMemoryTable) else v -class Backend(BaseBackend, CanCreateDatabase): +class Backend(SQLGlotBackend, CanCreateDatabase): name = "clickhouse" + compiler = ClickHouseCompiler() # ClickHouse itself does, but the client driver does not supports_temporary_tables = False @@ -57,25 +59,6 @@ class Options(ibis.config.Config): bool_type: Literal["Bool", "UInt8", "Int8"] = "Bool" - def _log(self, sql: str) -> None: - """Log `sql`. - - This method can be implemented by subclasses. Logging occurs when - `ibis.options.verbose` is `True`. - """ - util.log(sql) - - def sql( - self, - query: str, - schema: SupportsSchema | None = None, - dialect: str | None = None, - ) -> ir.Table: - query = self._transpile_sql(query, dialect=dialect) - if schema is None: - schema = self._get_schema_using_query(query) - return ops.SQLQueryResult(query, ibis.schema(schema), self).to_expr() - def _from_url(self, url: str, **kwargs) -> BaseBackend: """Connect to a backend using a URL `url`. @@ -91,25 +74,32 @@ def _from_url(self, url: str, **kwargs) -> BaseBackend: BaseBackend A backend instance """ - url = sa.engine.make_url(url) - - kwargs = toolz.merge( - { - name: value - for name in ("host", "port", "database", "password") - if (value := getattr(url, name, None)) - }, - kwargs, - ) - if username := url.username: - kwargs["user"] = username - - kwargs.update(url.query) + url = urlparse(url) + database = url.path[1:] + query_params = parse_qs(url.query) + + connect_args = { + "user": url.username, + "password": url.password or "", + "host": url.hostname, + "database": database or "", + } + + for name, value in query_params.items(): + if len(value) > 1: + connect_args[name] = value + elif len(value) == 1: + connect_args[name] = value[0] + else: + raise com.IbisError(f"Invalid URL parameter: {name}") + + kwargs.update(connect_args) self._convert_kwargs(kwargs) + return self.connect(**kwargs) def _convert_kwargs(self, kwargs): - with suppress(KeyError): + with contextlib.suppress(KeyError): kwargs["secure"] = bool(ast.literal_eval(kwargs["secure"])) def do_connect( @@ -182,15 +172,20 @@ def do_connect( def version(self) -> str: return self.con.server_version + @contextlib.contextmanager + def _safe_raw_sql(self, *args, **kwargs): + with contextlib.closing(self.raw_sql(*args, **kwargs)) as result: + yield result + @property def current_database(self) -> str: - with closing(self.raw_sql(sg.select(F.currentDatabase()))) as result: + with self._safe_raw_sql(sg.select(self.compiler.f.currentDatabase())) as result: [(db,)] = result.result_rows return db def list_databases(self, like: str | None = None) -> list[str]: - with closing( - self.raw_sql(sg.select(C.name).from_(sg.table("databases", db="system"))) + with self._safe_raw_sql( + sg.select(C.name).from_(sg.table("databases", db="system")) ) as result: results = result.result_columns @@ -206,13 +201,13 @@ def list_tables( query = sg.select(C.name).from_(sg.table("tables", db="system")) if database is None: - database = F.currentDatabase() + database = self.compiler.f.currentDatabase() else: - database = sg.exp.convert(database) + database = sge.convert(database) query = query.where(C.database.eq(database).or_(C.is_temporary)) - with closing(self.raw_sql(query)) as result: + with self._safe_raw_sql(query) as result: results = result.result_columns if results: @@ -380,6 +375,8 @@ def execute( if df.empty: df = pd.DataFrame(columns=schema.names) + else: + df.columns = list(schema.names) # TODO: remove the extra conversion # @@ -387,64 +384,6 @@ def execute( # in single column conversion and whole table conversion return expr.__pandas_result__(table.__pandas_result__(df)) - def _to_sqlglot( - self, expr: ir.Expr, limit: str | None = None, params=None, **_: Any - ): - """Compile an Ibis expression to a sqlglot object.""" - table_expr = expr.as_table() - - if limit == "default": - limit = ibis.options.sql.default_limit - if limit is not None: - table_expr = table_expr.limit(limit) - - if params is None: - params = {} - - sql = translate(table_expr.op(), params=params) - assert not isinstance(sql, sg.exp.Subquery) - - if isinstance(sql, sg.exp.Table): - sql = sg.select(STAR).from_(sql) - - assert not isinstance(sql, sg.exp.Subquery) - return sql - - def compile( - self, expr: ir.Expr, limit: str | None = None, params=None, **kwargs: Any - ): - """Compile an Ibis expression to a ClickHouse SQL string.""" - return self._to_sqlglot(expr, limit=limit, params=params, **kwargs).sql( - dialect=self.name, pretty=True - ) - - def _to_sql(self, expr: ir.Expr, **kwargs) -> str: - return self.compile(expr, **kwargs) - - def table(self, name: str, database: str | None = None) -> ir.Table: - """Construct a table expression. - - Parameters - ---------- - name - Table name - database - Database name - - Returns - ------- - Table - Table expression - """ - schema = self.get_schema(name, database=database) - op = ops.DatabaseTable( - name=name, - schema=schema, - source=self, - namespace=ops.Namespace(database=database), - ) - return op.to_expr() - def insert( self, name: str, @@ -468,7 +407,7 @@ def insert( def raw_sql( self, - query: str | sg.exp.Expression, + query: str | sge.Expression, external_tables: Mapping[str, pd.DataFrame] | None = None, **kwargs, ) -> Any: @@ -491,7 +430,7 @@ def raw_sql( """ external_tables = toolz.valmap(_to_memtable, external_tables or {}) external_data = self._normalize_external_tables(external_tables) - with suppress(AttributeError): + with contextlib.suppress(AttributeError): query = query.sql(dialect=self.name, pretty=True) self._log(query) return self.con.query(query, external_data=external_data, **kwargs) @@ -500,7 +439,9 @@ def close(self) -> None: """Close ClickHouse connection.""" self.con.close() - def get_schema(self, table_name: str, database: str | None = None) -> sch.Schema: + def get_schema( + self, table_name: str, database: str | None = None, schema: str | None = None + ) -> sch.Schema: """Return a Schema object for the indicated table and database. Parameters @@ -510,19 +451,25 @@ def get_schema(self, table_name: str, database: str | None = None) -> sch.Schema qualify the identifier. database Database name + schema + Schema name, not supported by ClickHouse Returns ------- sch.Schema Ibis schema """ - query = sg.exp.Describe(this=sg.table(table_name, db=database)) - with closing(self.raw_sql(query)) as results: + if schema is not None: + raise com.UnsupportedBackendFeatureError( + "`schema` namespaces are not supported by clickhouse" + ) + query = sge.Describe(this=sg.table(table_name, db=database)) + with self._safe_raw_sql(query) as results: names, types, *_ = results.result_columns return sch.Schema(dict(zip(names, map(ClickhouseType.from_string, types)))) - def _get_schema_using_query(self, query: str) -> sch.Schema: - name = util.gen_name("get_schema_using_query") + def _metadata(self, query: str) -> sch.Schema: + name = util.gen_name("clickhouse_metadata") with closing(self.raw_sql(f"CREATE VIEW {name} AS {query}")): pass try: @@ -531,43 +478,30 @@ def _get_schema_using_query(self, query: str) -> sch.Schema: finally: with closing(self.raw_sql(f"DROP VIEW {name}")): pass - return sch.Schema(dict(zip(names, map(ClickhouseType.from_string, types)))) - - @classmethod - def has_operation(cls, operation: type[ops.Value]) -> bool: - from ibis.backends.clickhouse.compiler.values import translate_val - - return translate_val.dispatch(operation) is not translate_val.dispatch(object) + return zip(names, map(ClickhouseType.from_string, types)) def create_database( self, name: str, *, force: bool = False, engine: str = "Atomic" ) -> None: - src = sg.exp.Create( + src = sge.Create( this=sg.to_identifier(name), kind="DATABASE", exists=force, - properties=sg.exp.Properties( - expressions=[sg.exp.EngineProperty(this=sg.to_identifier(engine))] + properties=sge.Properties( + expressions=[sge.EngineProperty(this=sg.to_identifier(engine))] ), ) - with closing(self.raw_sql(src)): + with self._safe_raw_sql(src): pass def drop_database(self, name: str, *, force: bool = False) -> None: - src = sg.exp.Drop(this=sg.to_identifier(name), kind="DATABASE", exists=force) - with closing(self.raw_sql(src)): + src = sge.Drop(this=sg.to_identifier(name), kind="DATABASE", exists=force) + with self._safe_raw_sql(src): pass def truncate_table(self, name: str, database: str | None = None) -> None: ident = sg.table(name, db=database).sql(self.name) - with closing(self.raw_sql(f"TRUNCATE TABLE {ident}")): - pass - - def drop_table( - self, name: str, database: str | None = None, force: bool = False - ) -> None: - src = sg.exp.Drop(this=sg.table(name, db=database), kind="TABLE", exists=force) - with closing(self.raw_sql(src)): + with self._safe_raw_sql(f"TRUNCATE TABLE {ident}"): pass def read_parquet( @@ -686,10 +620,10 @@ def create_table( if schema is None: schema = obj.schema() - this = sg.exp.Schema( + this = sge.Schema( this=sg.table(name, db=database), expressions=[ - sg.exp.ColumnDef( + sge.ColumnDef( this=sg.to_identifier(name), kind=ClickhouseType.from_ibis(typ) ) for name, typ in schema.items() @@ -698,20 +632,20 @@ def create_table( properties = [ # the engine cannot be quoted, since clickhouse won't allow e.g., # "File(Native)" - sg.exp.EngineProperty(this=sg.to_identifier(engine, quoted=False)) + sge.EngineProperty(this=sg.to_identifier(engine, quoted=False)) ] if temp: - properties.append(sg.exp.TemporaryProperty()) + properties.append(sge.TemporaryProperty()) if order_by is not None or engine == "MergeTree": # engine == "MergeTree" requires an order by clause, which is the # empty tuple if order_by is False-y properties.append( - sg.exp.Order( + sge.Order( expressions=[ - sg.exp.Ordered( - this=sg.exp.Tuple( + sge.Ordered( + this=sge.Tuple( expressions=list(map(sg.column, order_by or ())) ) ) @@ -721,8 +655,8 @@ def create_table( if partition_by is not None: properties.append( - sg.exp.PartitionedByProperty( - this=sg.exp.Schema( + sge.PartitionedByProperty( + this=sge.Schema( expressions=list(map(sg.to_identifier, partition_by)) ) ) @@ -730,19 +664,19 @@ def create_table( if sample_by is not None: properties.append( - sg.exp.SampleProperty( - this=sg.exp.Tuple(expressions=list(map(sg.column, sample_by))) + sge.SampleProperty( + this=sge.Tuple(expressions=list(map(sg.column, sample_by))) ) ) if settings: properties.append( - sg.exp.SettingsProperty( + sge.SettingsProperty( expressions=[ - sg.exp.SetItem( - this=sg.exp.EQ( + sge.SetItem( + this=sge.EQ( this=sg.to_identifier(name), - expression=sg.exp.convert(value), + expression=sge.convert(value), ) ) for name, value in settings.items() @@ -757,12 +691,12 @@ def create_table( expression = self._to_sqlglot(obj) external_tables.update(self._collect_in_memory_tables(obj)) - code = sg.exp.Create( + code = sge.Create( this=this, kind="TABLE", replace=overwrite, expression=expression, - properties=sg.exp.Properties(expressions=properties), + properties=sge.Properties(expressions=properties), ) external_data = self._normalize_external_tables(external_tables) @@ -781,46 +715,30 @@ def create_view( database: str | None = None, overwrite: bool = False, ) -> ir.Table: - src = sg.exp.Create( + expression = self._to_sqlglot(obj) + src = sge.Create( this=sg.table(name, db=database), kind="VIEW", replace=overwrite, - expression=self._to_sqlglot(obj), + expression=expression, ) external_tables = self._collect_in_memory_tables(obj) - with closing(self.raw_sql(src, external_tables=external_tables)): + with self._safe_raw_sql(src, external_tables=external_tables): pass return self.table(name, database=database) - def drop_view( - self, name: str, *, database: str | None = None, force: bool = False - ) -> None: - src = sg.exp.Drop(this=sg.table(name, db=database), kind="VIEW", exists=force) - with closing(self.raw_sql(src)): - pass - - def _load_into_cache(self, name, expr): - self.create_table(name, expr, schema=expr.schema(), temp=True) - - def _clean_up_cached_table(self, op): - self.drop_table(op.name) - - def _create_temp_view(self, table_name, source): - if table_name not in self._temp_views and table_name in self.list_tables(): - raise ValueError( - f"{table_name} already exists as a non-temporary table or view" - ) - src = sg.exp.Create( - this=sg.table(table_name), kind="VIEW", replace=True, expression=source + def _get_temp_view_definition(self, name: str, definition: str) -> str: + return sge.Create( + this=sg.to_identifier(name, quoted=self.compiler.quoted), + kind="VIEW", + expression=definition, + replace=True, ) - self.raw_sql(src) - self._temp_views.add(table_name) - self._register_temp_view_cleanup(table_name) def _register_temp_view_cleanup(self, name: str) -> None: def drop(self, name: str, query: str): self.raw_sql(query) self._temp_views.discard(name) - query = sg.exp.Drop(this=sg.table(name), kind="VIEW", exists=True) + query = sge.Drop(this=sg.table(name), kind="VIEW", exists=True) atexit.register(drop, self, name=name, query=query) diff --git a/ibis/backends/clickhouse/compiler.py b/ibis/backends/clickhouse/compiler.py new file mode 100644 index 000000000000..7d04c531d77f --- /dev/null +++ b/ibis/backends/clickhouse/compiler.py @@ -0,0 +1,741 @@ +from __future__ import annotations + +import calendar +import math +from functools import singledispatchmethod +from typing import Any + +import sqlglot as sg +import sqlglot.expressions as sge +from sqlglot import exp +from sqlglot.dialects import ClickHouse +from sqlglot.dialects.dialect import rename_func + +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt +import ibis.expr.operations as ops +from ibis import util +from ibis.backends.base.sqlglot.compiler import ( + NULL, + STAR, + SQLGlotCompiler, + parenthesize, +) +from ibis.backends.clickhouse.datatypes import ClickhouseType +from ibis.expr.rewrites import rewrite_sample + +ClickHouse.Generator.TRANSFORMS |= { + exp.ArraySize: rename_func("length"), + exp.ArraySort: rename_func("arraySort"), +} + + +class ClickHouseCompiler(SQLGlotCompiler): + __slots__ = () + + dialect = "clickhouse" + type_mapper = ClickhouseType + rewrites = (rewrite_sample, *SQLGlotCompiler.rewrites) + + def _aggregate(self, funcname: str, *args, where): + has_filter = where is not None + func = self.f[funcname + "If" * has_filter] + args += (where,) * has_filter + return func(*args) + + @singledispatchmethod + def visit_node(self, op, **kw): + return super().visit_node(op, **kw) + + @visit_node.register(ops.Cast) + def visit_Cast(self, op, *, arg, to): + _interval_cast_suffixes = { + "s": "Second", + "m": "Minute", + "h": "Hour", + "D": "Day", + "W": "Week", + "M": "Month", + "Q": "Quarter", + "Y": "Year", + } + + if to.is_interval(): + suffix = _interval_cast_suffixes[to.unit.short] + return self.f[f"toInterval{suffix}"](arg) + + result = self.cast(arg, to) + if (timezone := getattr(to, "timezone", None)) is not None: + return self.f.toTimeZone(result, timezone) + return result + + @visit_node.register(ops.TryCast) + def visit_TryCast(self, op, *, arg, to): + return self.f.accurateCastOrNull(arg, self.type_mapper.to_string(to)) + + @visit_node.register(ops.ArrayIndex) + def visit_ArrayIndex(self, op, *, arg, index): + return arg[self.if_(index >= 0, index + 1, index)] + + @visit_node.register(ops.ArrayRepeat) + def visit_ArrayRepeat(self, op, *, arg, times): + param = sg.to_identifier("_") + func = sge.Lambda(this=arg, expressions=[param]) + return self.f.arrayFlatten(self.f.arrayMap(func, self.f.range(times))) + + @visit_node.register(ops.ArraySlice) + def visit_ArraySlice(self, op, *, arg, start, stop): + start = parenthesize(op.start, start) + start_correct = self.if_(start < 0, start, start + 1) + + if stop is not None: + stop = parenthesize(op.stop, stop) + + length = self.if_( + stop < 0, + stop, + self.if_( + start < 0, + self.f.greatest(0, stop - (self.f.length(arg) + start)), + self.f.greatest(0, stop - start), + ), + ) + return self.f.arraySlice(arg, start_correct, length) + else: + return self.f.arraySlice(arg, start_correct) + + @visit_node.register(ops.CountStar) + def visit_CountStar(self, op, *, where, arg): + if where is not None: + return self.f.countIf(where) + return sge.Count(this=STAR) + + @visit_node.register(ops.Quantile) + @visit_node.register(ops.MultiQuantile) + def visit_QuantileMultiQuantile(self, op, *, arg, quantile, where): + if where is None: + return self.agg.quantile(arg, quantile, where=where) + + func = "quantile" + "s" * isinstance(op, ops.MultiQuantile) + return sge.ParameterizedAgg( + this=f"{func}If", + expressions=util.promote_list(quantile), + params=[arg, where], + ) + + @visit_node.register(ops.Correlation) + def visit_Correlation(self, op, *, left, right, how, where): + if how == "pop": + raise ValueError( + "ClickHouse only implements `sample` correlation coefficient" + ) + return self.agg.corr(left, right, where=where) + + @visit_node.register(ops.Arbitrary) + def visit_Arbitrary(self, op, *, arg, how, where): + if how == "first": + return self.agg.any(arg, where=where) + elif how == "last": + return self.agg.anyLast(arg, where=where) + else: + assert how == "heavy" + return self.agg.anyHeavy(arg, where=where) + + @visit_node.register(ops.Substring) + def visit_Substring(self, op, *, arg, start, length): + # Clickhouse is 1-indexed + suffix = (length,) * (length is not None) + if_pos = self.f.substring(arg, start + 1, *suffix) + if_neg = self.f.substring(arg, self.f.length(arg) + start + 1, *suffix) + return self.if_(start >= 0, if_pos, if_neg) + + @visit_node.register(ops.StringFind) + def visit_StringFind(self, op, *, arg, substr, start, end): + if end is not None: + raise com.UnsupportedOperationError( + "String find doesn't support end argument" + ) + + if start is not None: + return self.f.locate(arg, substr, start) + + return self.f.locate(arg, substr) + + @visit_node.register(ops.RegexSearch) + def visit_RegexSearch(self, op, *, arg, pattern): + return sge.RegexpLike(this=arg, expression=pattern) + + @visit_node.register(ops.RegexExtract) + def visit_RegexExtract(self, op, *, arg, pattern, index): + arg = self.cast(arg, dt.String(nullable=False)) + + pattern = self.f.concat("(", pattern, ")") + + if index is None: + index = 0 + + index += 1 + + then = self.f.extractGroups(arg, pattern)[index] + + return self.if_(self.f.notEmpty(then), then, NULL) + + @visit_node.register(ops.FindInSet) + def visit_FindInSet(self, op, *, needle, values): + return self.f.indexOf(self.f.array(*values), needle) + + @visit_node.register(ops.Sign) + def visit_Sign(self, op, *, arg): + """Workaround for missing sign function in older versions of clickhouse.""" + return self.f.intDivOrZero(arg, self.f.abs(arg)) + + @visit_node.register(ops.Hash) + def visit_Hash(self, op, *, arg): + return self.f.sipHash64(arg) + + @visit_node.register(ops.HashBytes) + def visit_HashBytes(self, op, *, arg, how): + supported_algorithms = frozenset( + ( + "MD5", + "halfMD5", + "SHA1", + "SHA224", + "SHA256", + "intHash32", + "intHash64", + "cityHash64", + "sipHash64", + "sipHash128", + ) + ) + if how not in supported_algorithms: + raise com.UnsupportedOperationError(f"Unsupported hash algorithm {how}") + + return self.f[how](arg) + + @visit_node.register(ops.IntervalFromInteger) + def visit_IntervalFromInteger(self, op, *, arg, unit): + dtype = op.dtype + if dtype.unit.short in ("ms", "us", "ns"): + raise com.UnsupportedOperationError( + "Clickhouse doesn't support subsecond interval resolutions" + ) + return super().visit_node(op, arg=arg, unit=unit) + + @visit_node.register(ops.Literal) + def visit_Literal(self, op, *, value, dtype, **kw): + if value is None: + return super().visit_node(op, value=value, dtype=dtype, **kw) + elif dtype.is_inet(): + v = str(value) + return self.f.toIPv6(v) if ":" in v else self.f.toIPv4(v) + elif dtype.is_string(): + return sge.convert(str(value).replace(r"\0", r"\\0")) + elif dtype.is_decimal(): + precision = dtype.precision + if precision is None or not 1 <= precision <= 76: + raise NotImplementedError( + f"Unsupported precision. Supported values: [1 : 76]. Current value: {precision!r}" + ) + + if 1 <= precision <= 9: + type_name = self.f.toDecimal32 + elif 10 <= precision <= 18: + type_name = self.f.toDecimal64 + elif 19 <= precision <= 38: + type_name = self.f.toDecimal128 + else: + type_name = self.f.toDecimal256 + return type_name(value, dtype.scale) + elif dtype.is_numeric(): + if not math.isfinite(value): + return sge.Literal.number(str(value)) + return sge.convert(value) + elif dtype.is_interval(): + if dtype.unit.short in ("ms", "us", "ns"): + raise com.UnsupportedOperationError( + "Clickhouse doesn't support subsecond interval resolutions" + ) + + return sge.Interval( + this=sge.convert(str(value)), unit=dtype.resolution.upper() + ) + elif dtype.is_timestamp(): + funcname = "parseDateTime" + + if micros := value.microsecond: + funcname += "64" + + funcname += "BestEffort" + + args = [value.isoformat()] + + if micros % 1000: + args.append(6) + elif micros // 1000: + args.append(3) + + if (timezone := dtype.timezone) is not None: + args.append(timezone) + + return self.f[funcname](*args) + elif dtype.is_date(): + return self.f.toDate(value.isoformat()) + elif dtype.is_array(): + value_type = dtype.value_type + values = [ + self.visit_Literal( + ops.Literal(v, dtype=value_type), value=v, dtype=value_type, **kw + ) + for v in value + ] + return self.f.array(*values) + elif dtype.is_map(): + value_type = dtype.value_type + keys = [] + values = [] + + for k, v in value.items(): + keys.append(sge.convert(k)) + values.append( + self.visit_Literal( + ops.Literal(v, dtype=value_type), + value=v, + dtype=value_type, + **kw, + ) + ) + + return self.f.map(self.f.array(*keys), self.f.array(*values)) + elif dtype.is_struct(): + fields = [ + self.visit_Literal( + ops.Literal(v, dtype=field_type), value=v, dtype=field_type, **kw + ) + for field_type, v in zip(dtype.types, value.values()) + ] + return self.f.tuple(*fields) + else: + return super().visit_node(op, value=value, dtype=dtype, **kw) + + @visit_node.register(ops.TimestampFromUNIX) + def visit_TimestampFromUNIX(self, op, *, arg, unit): + if (unit := unit.short) in {"ms", "us", "ns"}: + raise com.UnsupportedOperationError(f"{unit!r} unit is not supported!") + return self.f.toDateTime(arg) + + @visit_node.register(ops.DateTruncate) + @visit_node.register(ops.TimestampTruncate) + @visit_node.register(ops.TimeTruncate) + def visit_TimeTruncate(self, op, *, arg, unit): + converters = { + "Y": "toStartOfYear", + "M": "toStartOfMonth", + "W": "toMonday", + "D": "toDate", + "h": "toStartOfHour", + "m": "toStartOfMinute", + "s": "toDateTime", + } + + unit = unit.short + if (converter := converters.get(unit)) is None: + raise com.UnsupportedOperationError(f"Unsupported truncate unit {unit}") + + return self.f[converter](arg) + + @visit_node.register(ops.TimestampBucket) + def visit_TimestampBucket(self, op, *, arg, interval, offset): + if offset is not None: + raise com.UnsupportedOperationError( + "Timestamp bucket with offset is not supported" + ) + + return self.f.toStartOfInterval(arg, interval) + + @visit_node.register(ops.DateFromYMD) + def visit_DateFromYMD(self, op, *, year, month, day): + return self.f.toDate( + self.f.concat( + self.f.toString(year), + "-", + self.f.leftPad(self.f.toString(month), 2, "0"), + "-", + self.f.leftPad(self.f.toString(day), 2, "0"), + ) + ) + + @visit_node.register(ops.TimestampFromYMDHMS) + def visit_TimestampFromYMDHMS( + self, op, *, year, month, day, hours, minutes, seconds, **_ + ): + to_datetime = self.f.toDateTime( + self.f.concat( + self.f.toString(year), + "-", + self.f.leftPad(self.f.toString(month), 2, "0"), + "-", + self.f.leftPad(self.f.toString(day), 2, "0"), + " ", + self.f.leftPad(self.f.toString(hours), 2, "0"), + ":", + self.f.leftPad(self.f.toString(minutes), 2, "0"), + ":", + self.f.leftPad(self.f.toString(seconds), 2, "0"), + ) + ) + if timezone := op.dtype.timezone: + return self.f.toTimeZone(to_datetime, timezone) + return to_datetime + + @visit_node.register(ops.StringSplit) + def visit_StringSplit(self, op, *, arg, delimiter): + return self.f.splitByString( + delimiter, self.cast(arg, dt.String(nullable=False)) + ) + + @visit_node.register(ops.StringJoin) + def visit_StringJoin(self, op, *, sep, arg): + return self.f.arrayStringConcat(self.f.array(*arg), sep) + + @visit_node.register(ops.Capitalize) + def visit_Capitalize(self, op, *, arg): + return self.f.concat( + self.f.upper(self.f.substr(arg, 1, 1)), self.f.lower(self.f.substr(arg, 2)) + ) + + @visit_node.register(ops.GroupConcat) + def visit_GroupConcat(self, op, *, arg, sep, where): + call = self.agg.groupArray(arg, where=where) + return self.if_(self.f.empty(call), NULL, self.f.arrayStringConcat(call, sep)) + + @visit_node.register(ops.Cot) + def visit_Cot(self, op, *, arg): + return 1.0 / self.f.tan(arg) + + @visit_node.register(ops.StructColumn) + def visit_StructColumn(self, op, *, values, names): + # ClickHouse struct types cannot be nullable + # (non-nested fields can be nullable) + return self.cast(self.f.tuple(*values), op.dtype.copy(nullable=False)) + + @visit_node.register(ops.Clip) + def visit_Clip(self, op, *, arg, lower, upper): + if upper is not None: + arg = self.if_(self.f.isNull(arg), NULL, self.f.least(upper, arg)) + + if lower is not None: + arg = self.if_(self.f.isNull(arg), NULL, self.f.greatest(lower, arg)) + + return arg + + @visit_node.register(ops.StructField) + def visit_StructField(self, op, *, arg, field: str): + arg_dtype = op.arg.dtype + idx = arg_dtype.names.index(field) + return self.cast(sge.Dot(this=arg, expression=sge.convert(idx + 1)), op.dtype) + + @visit_node.register(ops.Repeat) + def visit_Repeat(self, op, *, arg, times): + return self.f.repeat(arg, self.f.accurateCast(times, "UInt64")) + + @visit_node.register(ops.StringContains) + def visit_StringContains(self, op, haystack, needle): + return self.f.locate(haystack, needle) > 0 + + @visit_node.register(ops.DayOfWeekIndex) + def visit_DayOfWeekIndex(self, op, *, arg): + weekdays = len(calendar.day_name) + return (((self.f.toDayOfWeek(arg) - 1) % weekdays) + weekdays) % weekdays + + @visit_node.register(ops.DayOfWeekName) + def visit_DayOfWeekName(self, op, *, arg): + # ClickHouse 20 doesn't support dateName + # + # ClickHouse 21 supports dateName is broken for regexen: + # https://github.com/ClickHouse/ClickHouse/issues/32777 + # + # ClickHouses 20 and 21 also have a broken case statement hence the ifnull: + # https://github.com/ClickHouse/ClickHouse/issues/32849 + # + # We test against 20 in CI, so we implement day_of_week_name as follows + days = calendar.day_name + num_weekdays = len(days) + base = ( + ((self.f.toDayOfWeek(arg) - 1) % num_weekdays) + num_weekdays + ) % num_weekdays + return sge.Case( + this=base, + ifs=list(map(self.if_, *zip(*enumerate(days)))), + default=sge.convert(""), + ) + + @visit_node.register(ops.Map) + def visit_Map(self, op, *, keys, values): + # cast here to allow lookups of nullable columns + return self.cast(self.f.tuple(keys, values), op.dtype) + + @visit_node.register(ops.MapGet) + def visit_MapGet(self, op, *, arg, key, default): + return self.if_(self.f.mapContains(arg, key), arg[key], default) + + @visit_node.register(ops.ArrayConcat) + def visit_ArrayConcat(self, op, *, arg): + return self.f.arrayConcat(*arg) + + @visit_node.register(ops.BitAnd) + @visit_node.register(ops.BitOr) + @visit_node.register(ops.BitXor) + def visit_BitAndOrXor(self, op, *, arg, where): + if not (dtype := op.arg.dtype).is_unsigned_integer(): + nbits = dtype.nbytes * 8 + arg = self.f[f"reinterpretAsUInt{nbits}"](arg) + return self.agg[f"group{type(op).__name__}"](arg, where=where) + + @visit_node.register(ops.StandardDev) + @visit_node.register(ops.Variance) + @visit_node.register(ops.Covariance) + def visit_StandardDevVariance(self, op, *, how, where, **kw): + funcs = { + ops.StandardDev: "stddev", + ops.Variance: "var", + ops.Covariance: "covar", + } + func = funcs[type(op)] + variants = {"sample": f"{func}Samp", "pop": f"{func}Pop"} + funcname = variants[how] + return self.agg[funcname](*kw.values(), where=where) + + @visit_node.register(ops.ArrayDistinct) + def visit_ArrayDistinct(self, op, *, arg): + null_element = self.if_( + self.f.countEqual(arg, NULL) > 0, self.f.array(NULL), self.f.array() + ) + return self.f.arrayConcat(self.f.arrayDistinct(arg), null_element) + + @visit_node.register(ops.ExtractMicrosecond) + def visit_ExtractMicrosecond(self, op, *, arg): + dtype = op.dtype + return self.cast( + self.f.toUnixTimestamp64Micro(self.cast(arg, op.arg.dtype.copy(scale=6))) + % 1_000_000, + dtype, + ) + + @visit_node.register(ops.ExtractMillisecond) + def visit_ExtractMillisecond(self, op, *, arg): + dtype = op.dtype + return self.cast( + self.f.toUnixTimestamp64Milli(self.cast(arg, op.arg.dtype.copy(scale=3))) + % 1_000, + dtype, + ) + + @visit_node.register(ops.Lag) + @visit_node.register(ops.Lead) + def formatter(self, op, *, arg, offset, default): + args = [arg] + + if default is not None: + if offset is None: + offset = 1 + + args.append(offset) + args.append(default) + elif offset is not None: + args.append(offset) + + func = self.f[f"{type(op).__name__.lower()}InFrame"] + return func(*args) + + @visit_node.register(ops.ExtractFile) + def visit_ExtractFile(self, op, *, arg): + return self.f.cutFragment(self.f.pathFull(arg)) + + @visit_node.register(ops.ExtractQuery) + def visit_ExtractQuery(self, op, *, arg, key): + if key is not None: + return self.f.extractURLParameter(arg, key) + else: + return self.f.queryString(arg) + + @visit_node.register(ops.ArrayStringJoin) + def visit_ArrayStringJoin(self, op, *, arg, sep): + return self.f.arrayStringConcat(arg, sep) + + @visit_node.register(ops.ArrayMap) + def visit_ArrayMap(self, op, *, arg, param, body): + func = sge.Lambda(this=body, expressions=[param]) + return self.f.arrayMap(func, arg) + + @visit_node.register(ops.ArrayFilter) + def visit_ArrayFilter(self, op, *, arg, param, body): + func = sge.Lambda(this=body, expressions=[param]) + return self.f.arrayFilter(func, arg) + + @visit_node.register(ops.ArrayRemove) + def visit_ArrayRemove(self, op, *, arg, other): + x = sg.to_identifier("x") + body = x.neq(other) + return self.f.arrayFilter(sge.Lambda(this=body, expressions=[x]), arg) + + @visit_node.register(ops.ArrayUnion) + def visit_ArrayUnion(self, op, *, left, right): + arg = self.f.arrayConcat(left, right) + null_element = self.if_( + self.f.countEqual(arg, NULL) > 0, self.f.array(NULL), self.f.array() + ) + return self.f.arrayConcat(self.f.arrayDistinct(arg), null_element) + + @visit_node.register(ops.ArrayZip) + def visit_ArrayZip(self, op: ops.ArrayZip, *, arg, **_: Any) -> str: + return self.f.arrayZip(*arg) + + @visit_node.register(ops.CountDistinctStar) + def visit_CountDistinctStar( + self, op: ops.CountDistinctStar, *, where, **_: Any + ) -> str: + columns = self.f.tuple(*map(sg.column, op.arg.schema.names)) + + if where is not None: + return self.f.countDistinctIf(columns, where) + else: + return self.f.countDistinct(columns) + + @visit_node.register(ops.TimestampRange) + def visit_TimestampRange(self, op, *, start, stop, step): + unit = op.step.dtype.unit.name.lower() + + if not isinstance(op.step, ops.Literal): + raise com.UnsupportedOperationError( + "ClickHouse doesn't support non-literal step values" + ) + + step_value = op.step.value + + offset = sg.to_identifier("offset") + + func = sge.Lambda( + this=self.f.dateAdd(sg.to_identifier(unit), offset, start), + expressions=[offset], + ) + + if step_value == 0: + return self.f.array() + + return self.f.arrayMap( + func, self.f.range(0, self.f.timestampDiff(unit, start, stop), step_value) + ) + + @visit_node.register(ops.RegexSplit) + def visit_RegexSplit(self, op, *, arg, pattern): + return self.f.splitByRegexp(pattern, self.cast(arg, dt.String(nullable=False))) + + @staticmethod + def _generate_groups(groups): + return groups + + @visit_node.register(ops.RowID) + @visit_node.register(ops.CumeDist) + @visit_node.register(ops.PercentRank) + @visit_node.register(ops.Time) + @visit_node.register(ops.TimeDelta) + @visit_node.register(ops.StringToTimestamp) + @visit_node.register(ops.Levenshtein) + def visit_Undefined(self, op, **_): + raise com.OperationNotDefinedError(type(op).__name__) + + +_SIMPLE_OPS = { + ops.All: "min", + ops.Any: "max", + ops.ApproxCountDistinct: "uniqHLL12", + ops.ApproxMedian: "median", + ops.ArgMax: "argMax", + ops.ArgMin: "argMin", + ops.ArrayCollect: "groupArray", + ops.ArrayContains: "has", + ops.ArrayFlatten: "arrayFlatten", + ops.ArrayIntersect: "arrayIntersect", + ops.ArrayPosition: "indexOf", + ops.BitwiseAnd: "bitAnd", + ops.BitwiseLeftShift: "bitShiftLeft", + ops.BitwiseNot: "bitNot", + ops.BitwiseOr: "bitOr", + ops.BitwiseRightShift: "bitShiftRight", + ops.BitwiseXor: "bitXor", + ops.Capitalize: "initcap", + ops.CountDistinct: "uniq", + ops.Date: "toDate", + ops.E: "e", + ops.EndsWith: "endsWith", + ops.ExtractAuthority: "netloc", + ops.ExtractDay: "toDayOfMonth", + ops.ExtractDayOfYear: "toDayOfYear", + ops.ExtractEpochSeconds: "toRelativeSecondNum", + ops.ExtractFragment: "fragment", + ops.ExtractHost: "domain", + ops.ExtractHour: "toHour", + ops.ExtractMinute: "toMinute", + ops.ExtractMonth: "toMonth", + ops.ExtractPath: "path", + ops.ExtractProtocol: "protocol", + ops.ExtractQuarter: "toQuarter", + ops.ExtractSecond: "toSecond", + ops.ExtractWeekOfYear: "toISOWeek", + ops.ExtractYear: "toYear", + ops.First: "any", + ops.IntegerRange: "range", + ops.IsInf: "isInfinite", + ops.IsNan: "isNaN", + ops.IsNull: "isNull", + ops.LPad: "leftPad", + ops.LStrip: "trimLeft", + ops.Last: "anyLast", + ops.Ln: "log", + ops.Log10: "log10", + ops.MapContains: "mapContains", + ops.MapKeys: "mapKeys", + ops.MapLength: "length", + ops.MapMerge: "mapUpdate", + ops.MapValues: "mapValues", + ops.Median: "quantileExactExclusive", + ops.NotNull: "isNotNull", + ops.NullIf: "nullIf", + ops.RPad: "rightPad", + ops.RStrip: "trimRight", + ops.RandomScalar: "randCanonical", + ops.RegexReplace: "replaceRegexpAll", + ops.Repeat: "repeat", + ops.RowNumber: "row_number", + ops.StartsWith: "startsWith", + ops.StrRight: "right", + ops.Strftime: "formatDateTime", + ops.StringAscii: "ascii", + ops.StringLength: "length", + ops.StringReplace: "replaceAll", + ops.Strip: "trimBoth", + ops.TimestampNow: "now", + ops.Translate: "translate", + ops.TypeOf: "toTypeName", + ops.Unnest: "arrayJoin", +} + +for _op, _name in _SIMPLE_OPS.items(): + assert isinstance(type(_op), type), type(_op) + if issubclass(_op, ops.Reduction): + + @ClickHouseCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, where, **kw): + return self.agg[_name](*kw.values(), where=where) + + else: + + @ClickHouseCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, **kw): + return self.f[_name](*kw.values()) + + setattr(ClickHouseCompiler, f"visit_{_op.__name__}", _fmt) + +del _op, _name, _fmt diff --git a/ibis/backends/clickhouse/compiler/__init__.py b/ibis/backends/clickhouse/compiler/__init__.py deleted file mode 100644 index 5d3b87047e38..000000000000 --- a/ibis/backends/clickhouse/compiler/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -from __future__ import annotations - -from public import public - -from ibis.backends.clickhouse.compiler.core import translate -from ibis.backends.clickhouse.compiler.relations import translate_rel -from ibis.backends.clickhouse.compiler.values import translate_val - -public( - translate=translate, - translate_rel=translate_rel, - translate_val=translate_val, -) diff --git a/ibis/backends/clickhouse/compiler/core.py b/ibis/backends/clickhouse/compiler/core.py deleted file mode 100644 index ad7fba431718..000000000000 --- a/ibis/backends/clickhouse/compiler/core.py +++ /dev/null @@ -1,132 +0,0 @@ -"""ClickHouse ibis expression to sqlglot compiler. - -The compiler is built with a few `singledispatch` functions: - - 1. `translate_rel` for compiling `ops.TableNode`s - 1. `translate_val` for compiling `ops.Value`s - -## `translate` - -### Node Implementation - -There's a single `ops.Node` implementation for `ops.TableNode`s instances. - -This function compiles each node in topological order. The topological sorting, -result caching, and iteration are all handled by -`ibis.expr.operations.core.Node.map`. -""" - -from __future__ import annotations - -import itertools -from typing import TYPE_CHECKING, Any - -import sqlglot as sg - -import ibis.expr.operations as ops -import ibis.expr.types as ir -from ibis.backends.clickhouse.compiler.relations import translate_rel -from ibis.backends.clickhouse.compiler.values import translate_val -from ibis.common.deferred import _ -from ibis.expr.analysis import c, find_first_base_table, p, x, y -from ibis.expr.rewrites import rewrite_dropna, rewrite_fillna, rewrite_sample - -if TYPE_CHECKING: - from collections.abc import Mapping - - -def _translate_node(node, **kwargs): - if isinstance(node, ops.Value): - return translate_val(node, **kwargs) - assert isinstance(node, ops.TableNode) - return translate_rel(node, **kwargs) - - -def translate(op: ops.TableNode, params: Mapping[ir.Value, Any]) -> sg.exp.Expression: - """Translate an ibis operation to a sqlglot expression. - - Parameters - ---------- - op - An ibis `TableNode` - params - A mapping of expressions to concrete values - - Returns - ------- - sqlglot.expressions.Expression - A sqlglot expression - """ - - gen_alias_index = itertools.count() - - def fn(node, _, **kwargs): - result = _translate_node(node, **kwargs) - - # don't alias root nodes or value ops - if node is op or isinstance(node, ops.Value): - return result - - assert isinstance(node, ops.TableNode) - - alias_index = next(gen_alias_index) - alias = f"t{alias_index:d}" - - try: - return result.subquery(alias) - except AttributeError: - return sg.alias(result, alias) - - # substitute parameters immediately to avoid having to define a - # ScalarParameter translation rule - # - # this lets us avoid threading `params` through every `translate_val` call - # only to be used in the one place it would be needed: the ScalarParameter - # `translate_val` rule - params = {param.op(): value for param, value in params.items()} - replace_literals = p.ScalarParameter(dtype=x) >> ( - lambda _, x: ops.Literal(value=params[_], dtype=x) - ) - - # replace the right side of InColumn into a scalar subquery for sql - # backends - replace_in_column_with_table_array_view = p.InColumn(options=y) >> _.copy( - options=c.TableArrayView( - c.Selection(table=lambda _, y: find_first_base_table(y), selections=(y,)) - ), - ) - - # replace any checks against an empty right side of the IN operation with - # `False` - replace_empty_in_values_with_false = p.InValues(options=()) >> c.Literal( - False, dtype="bool" - ) - - # subtract one from one-based functions to convert to zero-based indexing - subtract_one_from_one_indexed_functions = ( - p.WindowFunction(p.RankBase | p.NTile) - | p.StringFind - | p.FindInSet - | p.ArrayPosition - ) >> c.Subtract(_, 1) - - add_one_to_nth_value_input = p.NthValue >> _.copy(nth=c.Add(_.nth, 1)) - - nullify_empty_string_results = (p.ExtractURLField | p.DayOfWeekName) >> c.NullIf( - _, "" - ) - - op = op.replace( - replace_literals - | replace_in_column_with_table_array_view - | replace_empty_in_values_with_false - | subtract_one_from_one_indexed_functions - | add_one_to_nth_value_input - | nullify_empty_string_results - | rewrite_fillna - | rewrite_dropna - | rewrite_sample - ) - # apply translate rules in topological order - node = op.map(fn)[op] - return node.this if isinstance(node, sg.exp.Subquery) else node diff --git a/ibis/backends/clickhouse/compiler/relations.py b/ibis/backends/clickhouse/compiler/relations.py deleted file mode 100644 index ae2be72a80e8..000000000000 --- a/ibis/backends/clickhouse/compiler/relations.py +++ /dev/null @@ -1,215 +0,0 @@ -from __future__ import annotations - -import functools -from typing import Any - -import sqlglot as sg - -import ibis.common.exceptions as com -import ibis.expr.operations as ops -from ibis.backends.base.sqlglot import STAR - - -@functools.singledispatch -def translate_rel(op: ops.TableNode, **_): - """Translate a table node into sqlglot.""" - raise com.OperationNotDefinedError(f"No translation rule for {type(op)}") - - -@translate_rel.register -def _dummy(op: ops.DummyTable, *, values, **_): - return sg.select(*values) - - -@translate_rel.register -def _physical_table(op: ops.PhysicalTable, **_): - return sg.table(op.name) - - -@translate_rel.register -def _database_table(op: ops.DatabaseTable, *, name, namespace, **_): - return sg.table(name, db=namespace.schema, catalog=namespace.database) - - -def replace_tables_with_star_selection(node, alias=None): - if isinstance(node, (sg.exp.Subquery, sg.exp.Table, sg.exp.CTE)): - return sg.exp.Column( - this=STAR, - table=sg.to_identifier(alias if alias is not None else node.alias_or_name), - ) - return node - - -@translate_rel.register -def _selection(op: ops.Selection, *, table, selections, predicates, sort_keys, **_): - # needs_alias should never be true here in explicitly, but it may get - # passed via a (recursive) call to translate_val - if isinstance(op.table, ops.Join) and not isinstance( - op.table, (ops.LeftSemiJoin, ops.LeftAntiJoin) - ): - args = table.this.args - from_ = args["from"] - (join,) = args["joins"] - else: - from_ = join = None - - alias = table.alias_or_name - selections = tuple( - replace_tables_with_star_selection( - node, - # replace the table name with the alias if the table is **not** a - # join, because we may be selecting from a subquery or an aliased - # table; otherwise we'll select from the _unaliased_ table or the - # _child_ table, which may have a different alias than the one we - # generated for the input table - alias if from_ is None and join is None else None, - ) - for node in selections - ) or (STAR,) - - sel = sg.select(*selections).from_(from_ if from_ is not None else table) - - if join is not None: - sel = sel.join(join) - - if predicates: - if join is not None: - sel = sg.select(STAR).from_(sel.subquery(alias)) - sel = sel.where(*predicates) - - if sort_keys: - sel = sel.order_by(*sort_keys) - - return sel - - -@translate_rel.register(ops.Aggregation) -def _aggregation( - op: ops.Aggregation, *, table, metrics, by, having, predicates, sort_keys, **_ -): - selections = (by + metrics) or (STAR,) - sel = sg.select(*selections).from_(table) - - if by: - sel = sel.group_by( - *(key.this if isinstance(key, sg.exp.Alias) else key for key in by) - ) - - if predicates: - sel = sel.where(*predicates) - - if having: - sel = sel.having(*having) - - if sort_keys: - sel = sel.order_by(*sort_keys) - - return sel - - -_JOIN_TYPES = { - ops.InnerJoin: "INNER", - ops.AnyInnerJoin: "ANY", - ops.LeftJoin: "LEFT OUTER", - ops.AnyLeftJoin: "LEFT ANY", - ops.RightJoin: "RIGHT OUTER", - ops.OuterJoin: "FULL OUTER", - ops.CrossJoin: "CROSS", - ops.LeftSemiJoin: "LEFT SEMI", - ops.LeftAntiJoin: "LEFT ANTI", - ops.AsOfJoin: "LEFT ASOF", -} - - -@translate_rel.register -def _join(op: ops.Join, *, left, right, predicates, **_): - on = sg.and_(*predicates) if predicates else None - join_type = _JOIN_TYPES[type(op)] - try: - # dialect must be passed to allow clickhouse's ANY/LEFT ANY/ASOF joins - return left.join(right, join_type=join_type, on=on, dialect="clickhouse") - except AttributeError: - select_args = [f"{left.alias_or_name}.*"] - - # select from both the left and right side of the join if the join - # is not a filtering join (semi join or anti join); filtering joins - # only return the left side columns - if not isinstance(op, (ops.LeftSemiJoin, ops.LeftAntiJoin)): - select_args.append(f"{right.alias_or_name}.*") - return ( - sg.select(*select_args) - .from_(left) - .join(right, join_type=join_type, on=on, dialect="clickhouse") - ) - - -@translate_rel.register -def _self_ref(op: ops.SelfReference, *, table, **_): - return sg.alias(table, op.name) - - -@translate_rel.register -def _query(op: ops.SQLQueryResult, *, query, **_): - return sg.parse_one(query, read="clickhouse").subquery() - - -_SET_OP_FUNC = { - ops.Union: sg.union, - ops.Intersection: sg.intersect, - ops.Difference: sg.except_, -} - - -@translate_rel.register -def _set_op(op: ops.SetOp, *, left, right, distinct: bool = False, **_): - if isinstance(left, sg.exp.Table): - left = sg.select(STAR).from_(left) - - if isinstance(right, sg.exp.Table): - right = sg.select(STAR).from_(right) - - func = _SET_OP_FUNC[type(op)] - - left = left.args.get("this", left) - right = right.args.get("this", right) - - return func(left, right, distinct=distinct) - - -@translate_rel.register -def _limit(op: ops.Limit, *, table, n, offset, **_): - result = sg.select(STAR).from_(table) - - if n is not None: - if not isinstance(n, int): - limit = sg.select(n).from_(table).subquery() - else: - limit = n - result = result.limit(limit) - - if not isinstance(offset, int): - return result.offset( - sg.select(offset).from_(table).subquery().sql("clickhouse") - ) - - return result.offset(offset) if offset != 0 else result - - -@translate_rel.register -def _distinct(op: ops.Distinct, *, table, **_): - return sg.select(STAR).distinct().from_(table) - - -@translate_rel.register -def _sql_string_view(op: ops.SQLStringView, query: str, **_: Any): - table = sg.table(op.name) - return sg.select(STAR).from_(table).with_(table, as_=query, dialect="clickhouse") - - -@translate_rel.register -def _view(op: ops.View, *, child, name: str, **_): - # TODO: find a way to do this without creating a temporary view - backend = op.child.to_expr()._find_backend() - source = sg.select(STAR).from_(child) - backend._create_temp_view(table_name=name, source=source) - return sg.table(name) diff --git a/ibis/backends/clickhouse/compiler/values.py b/ibis/backends/clickhouse/compiler/values.py deleted file mode 100644 index 49d55195bd38..000000000000 --- a/ibis/backends/clickhouse/compiler/values.py +++ /dev/null @@ -1,1051 +0,0 @@ -from __future__ import annotations - -import calendar -import functools -import math -import operator -from functools import partial -from typing import Any - -import sqlglot as sg - -import ibis.common.exceptions as com -import ibis.expr.datatypes as dt -import ibis.expr.operations as ops -from ibis import util -from ibis.backends.base.sqlglot import NULL, STAR, AggGen, C, F, interval, make_cast -from ibis.backends.clickhouse.datatypes import ClickhouseType - - -def _aggregate(funcname, *args, where): - has_filter = where is not None - func = F[funcname + "If" * has_filter] - args += (where,) * has_filter - return func(*args) - - -agg = AggGen(aggfunc=_aggregate) -if_ = F["if"] -cast = make_cast(ClickhouseType) - - -@functools.singledispatch -def translate_val(op, **_): - """Translate a value expression into sqlglot.""" - raise com.OperationNotDefinedError(f"No translation rule for {type(op)}") - - -@translate_val.register(ops.TableColumn) -def _column(op, *, table, name, **_): - return sg.column(name, table=table.alias_or_name) - - -@translate_val.register(ops.Alias) -def _alias(op, *, arg, name, **_): - return arg.as_(name) - - -_interval_cast_suffixes = { - "s": "Second", - "m": "Minute", - "h": "Hour", - "D": "Day", - "W": "Week", - "M": "Month", - "Q": "Quarter", - "Y": "Year", -} - - -@translate_val.register(ops.Cast) -def _cast(op, *, arg, to, **_): - if to.is_interval(): - suffix = _interval_cast_suffixes[to.unit.short] - return F[f"toInterval{suffix}"](arg) - - result = cast(arg, to) - if (timezone := getattr(to, "timezone", None)) is not None: - return F.toTimeZone(result, timezone) - return result - - -@translate_val.register(ops.TryCast) -def _try_cast(op, *, arg, to, **_): - return F.accurateCastOrNull(arg, ClickhouseType.to_string(to)) - - -@translate_val.register(ops.Between) -def _between(op, *, arg, lower_bound, upper_bound, **_): - return sg.exp.Between(this=arg, low=lower_bound, high=upper_bound) - - -@translate_val.register(ops.Negate) -def _negate(op, *, arg, **_): - return -sg.exp.Paren(this=arg) - - -@translate_val.register(ops.Not) -def _not(op, *, arg, **_): - return sg.not_(sg.exp.Paren(this=arg)) - - -def _parenthesize(op, arg): - if isinstance(op, (ops.Binary, ops.Unary)): - return sg.exp.Paren(this=arg) - else: - # function calls don't need parens - return arg - - -@translate_val.register(ops.ArrayIndex) -def _array_index_op(op, *, arg, index, **_): - return arg[if_(index >= 0, index + 1, index)] - - -@translate_val.register(ops.ArrayRepeat) -def _array_repeat_op(op, *, arg, times, **_): - return ( - sg.select(F.arrayFlatten(F.groupArray(C.arr))) - .from_( - sg.select(arg.as_("arr")) - .from_(sg.table("numbers", db="system")) - .limit(times) - .subquery() - ) - .subquery() - ) - - -@translate_val.register(ops.ArraySlice) -def _array_slice_op(op, *, arg, start, stop, **_): - start = _parenthesize(op.start, start) - start_correct = if_(start < 0, start, start + 1) - - if stop is not None: - stop = _parenthesize(op.stop, stop) - - length = if_( - stop < 0, - stop, - if_( - start < 0, - F.greatest(0, stop - (F.length(arg) + start)), - F.greatest(0, stop - start), - ), - ) - return F.arraySlice(arg, start_correct, length) - else: - return F.arraySlice(arg, start_correct) - - -@translate_val.register(ops.CountStar) -def _count_star(op, *, where, **_): - if where is not None: - return F.countIf(where) - return sg.exp.Count(this=STAR) - - -def _quantile(func: str): - def _compile(op, *, arg, quantile, where, **_): - if where is None: - return agg.quantile(arg, quantile, where=where) - - return sg.exp.ParameterizedAgg( - this=f"{func}If", - expressions=util.promote_list(quantile), - params=[arg, where], - ) - - return _compile - - -translate_val.register(ops.Quantile)(_quantile("quantile")) -translate_val.register(ops.MultiQuantile)(_quantile("quantiles")) - - -def _agg_variance_like(func): - variants = {"sample": f"{func}Samp", "pop": f"{func}Pop"} - - def formatter(_, *, how, where, **kw): - funcname = variants[how] - return agg[funcname](*kw.values(), where=where) - - return formatter - - -@translate_val.register(ops.Correlation) -def _corr(op, *, left, right, how, where, **_): - if how == "pop": - raise ValueError("ClickHouse only implements `sample` correlation coefficient") - return agg.corr(left, right, where=where) - - -@translate_val.register(ops.Arbitrary) -def _arbitrary(op, *, arg, how, where, **_): - if how == "first": - return agg.any(arg, where=where) - elif how == "last": - return agg.anyLast(arg, where=where) - else: - assert how == "heavy" - return agg.anyHeavy(arg, where=where) - - -@translate_val.register(ops.Substring) -def _substring(op, *, arg, start, length, **_): - # Clickhouse is 1-indexed - suffix = (length,) * (length is not None) - if_pos = F.substring(arg, start + 1, *suffix) - if_neg = F.substring(arg, F.length(arg) + start + 1, *suffix) - return if_(start >= 0, if_pos, if_neg) - - -@translate_val.register(ops.StringFind) -def _string_find(op, *, arg, substr, start, end, **_): - if end is not None: - raise com.UnsupportedOperationError("String find doesn't support end argument") - - if start is not None: - return F.locate(arg, substr, start) - - return F.locate(arg, substr) - - -@translate_val.register(ops.RegexSearch) -def _regex_search(op, *, arg, pattern, **_): - return sg.exp.RegexpLike(this=arg, expression=pattern) - - -@translate_val.register(ops.RegexExtract) -def _regex_extract(op, *, arg, pattern, index, **_): - arg = cast(arg, dt.String(nullable=False)) - - pattern = F.concat("(", pattern, ")") - - if index is None: - index = 0 - - index += 1 - - then = F.extractGroups(arg, pattern)[index] - - return if_(F.notEmpty(then), then, NULL) - - -@translate_val.register(ops.FindInSet) -def _index_of(op, *, needle, values, **_): - return F.indexOf(F.array(*values), needle) - - -@translate_val.register(ops.Round) -def _round(op, *, arg, digits, **_): - if digits is not None: - return F.round(arg, digits) - return F.round(arg) - - -@translate_val.register(ops.Sign) -def _sign(op, *, arg, **_): - """Workaround for missing sign function.""" - return F.intDivOrZero(arg, F.abs(arg)) - - -@translate_val.register(ops.Hash) -def _hash(op, *, arg, **_): - return F.sipHash64(arg) - - -_SUPPORTED_ALGORITHMS = frozenset( - ( - "MD5", - "halfMD5", - "SHA1", - "SHA224", - "SHA256", - "intHash32", - "intHash64", - "cityHash64", - "sipHash64", - "sipHash128", - ) -) - - -@translate_val.register(ops.HashBytes) -def _hash_bytes(op, *, arg, how, **_): - if how not in _SUPPORTED_ALGORITHMS: - raise com.UnsupportedOperationError(f"Unsupported hash algorithm {how}") - - return F[how](arg) - - -@translate_val.register(ops.Log) -def _log(op, *, arg, base, **_): - if base is None: - return F.ln(arg) - elif str(base) in ("2", "10"): - return F[f"log{base}"](arg) - else: - return F.ln(arg) / F.ln(base) - - -@translate_val.register(ops.IntervalFromInteger) -def _interval_from_integer(op, *, arg, unit, **_): - dtype = op.dtype - if dtype.unit.short in ("ms", "us", "ns"): - raise com.UnsupportedOperationError( - "Clickhouse doesn't support subsecond interval resolutions" - ) - - return interval(arg, unit=dtype.resolution.upper()) - - -@translate_val.register(ops.Literal) -def _literal(op, *, value, dtype, **kw): - if value is None and dtype.nullable: - if dtype.is_null(): - return NULL - return cast(NULL, dtype) - elif dtype.is_boolean(): - return sg.exp.convert(bool(value)) - elif dtype.is_inet(): - v = str(value) - return F.toIPv6(v) if ":" in v else F.toIPv4(v) - elif dtype.is_string(): - return sg.exp.convert(str(value).replace(r"\0", r"\\0")) - elif dtype.is_macaddr(): - return sg.exp.convert(str(value)) - elif dtype.is_decimal(): - precision = dtype.precision - if precision is None or not 1 <= precision <= 76: - raise NotImplementedError( - f"Unsupported precision. Supported values: [1 : 76]. Current value: {precision!r}" - ) - - if 1 <= precision <= 9: - type_name = F.toDecimal32 - elif 10 <= precision <= 18: - type_name = F.toDecimal64 - elif 19 <= precision <= 38: - type_name = F.toDecimal128 - else: - type_name = F.toDecimal256 - return type_name(value, dtype.scale) - elif dtype.is_numeric(): - if math.isnan(value): - return sg.exp.Literal(this="NaN", is_string=False) - elif math.isinf(value): - inf = sg.exp.Literal(this="inf", is_string=False) - return -inf if value < 0 else inf - return sg.exp.convert(value) - elif dtype.is_interval(): - if dtype.unit.short in ("ms", "us", "ns"): - raise com.UnsupportedOperationError( - "Clickhouse doesn't support subsecond interval resolutions" - ) - - return interval(value, unit=dtype.resolution.upper()) - elif dtype.is_timestamp(): - funcname = "parseDateTime" - - if micros := value.microsecond: - funcname += "64" - - funcname += "BestEffort" - - args = [value.isoformat()] - - if micros % 1000: - args.append(6) - elif micros // 1000: - args.append(3) - - if (timezone := dtype.timezone) is not None: - args.append(timezone) - - return F[funcname](*args) - elif dtype.is_date(): - return F.toDate(value.isoformat()) - elif dtype.is_array(): - value_type = dtype.value_type - values = [ - _literal(ops.Literal(v, dtype=value_type), value=v, dtype=value_type, **kw) - for v in value - ] - return F.array(*values) - elif dtype.is_map(): - value_type = dtype.value_type - keys = [] - values = [] - - for k, v in value.items(): - keys.append(sg.exp.convert(k)) - values.append( - _literal( - ops.Literal(v, dtype=value_type), value=v, dtype=value_type, **kw - ) - ) - - return F.map(F.array(*keys), F.array(*values)) - elif dtype.is_struct(): - fields = [ - _literal(ops.Literal(v, dtype=field_type), value=v, dtype=field_type, **kw) - for field_type, v in zip(dtype.types, value.values()) - ] - return F.tuple(*fields) - else: - raise NotImplementedError(f"Unsupported type: {dtype!r}") - - -@translate_val.register(ops.SimpleCase) -@translate_val.register(ops.SearchedCase) -def _case(op, *, base=None, cases, results, default, **_): - return sg.exp.Case(this=base, ifs=list(map(if_, cases, results)), default=default) - - -@translate_val.register(ops.TableArrayView) -def _table_array_view(op, *, table, **_): - return table.args["this"].subquery() - - -@translate_val.register(ops.TimestampFromUNIX) -def _timestamp_from_unix(op, *, arg, unit, **_): - if (unit := unit.short) in {"ms", "us", "ns"}: - raise com.UnsupportedOperationError(f"{unit!r} unit is not supported!") - return F.toDateTime(arg) - - -@translate_val.register(ops.DateTruncate) -@translate_val.register(ops.TimestampTruncate) -@translate_val.register(ops.TimeTruncate) -def _truncate(op, *, arg, unit, **_): - converters = { - "Y": F.toStartOfYear, - "M": F.toStartOfMonth, - "W": F.toMonday, - "D": F.toDate, - "h": F.toStartOfHour, - "m": F.toStartOfMinute, - "s": F.toDateTime, - } - - unit = unit.short - if (converter := converters.get(unit)) is None: - raise com.UnsupportedOperationError(f"Unsupported truncate unit {unit}") - - return converter(arg) - - -@translate_val.register(ops.TimestampBucket) -def _timestamp_bucket(op, *, arg, interval, offset, **_): - if offset is not None: - raise com.UnsupportedOperationError( - "Timestamp bucket with offset is not supported" - ) - - return F.toStartOfInterval(arg, interval) - - -@translate_val.register(ops.DateFromYMD) -def _date_from_ymd(op, *, year, month, day, **_): - return F.toDate( - F.concat( - F.toString(year), - "-", - F.leftPad(F.toString(month), 2, "0"), - "-", - F.leftPad(F.toString(day), 2, "0"), - ) - ) - - -@translate_val.register(ops.TimestampFromYMDHMS) -def _timestamp_from_ymdhms(op, *, year, month, day, hours, minutes, seconds, **_): - to_datetime = F.toDateTime( - F.concat( - F.toString(year), - "-", - F.leftPad(F.toString(month), 2, "0"), - "-", - F.leftPad(F.toString(day), 2, "0"), - " ", - F.leftPad(F.toString(hours), 2, "0"), - ":", - F.leftPad(F.toString(minutes), 2, "0"), - ":", - F.leftPad(F.toString(seconds), 2, "0"), - ) - ) - if timezone := op.dtype.timezone: - return F.toTimeZone(to_datetime, timezone) - return to_datetime - - -@translate_val.register(ops.ExistsSubquery) -def _exists_subquery(op, *, foreign_table, predicates, **_): - # https://github.com/ClickHouse/ClickHouse/issues/6697 - # - # this would work if clickhouse supported correlated subqueries - subq = sg.select(1).from_(foreign_table).where(sg.condition(predicates)).subquery() - return F.exists(subq) - - -@translate_val.register(ops.StringSplit) -def _string_split(op, *, arg, delimiter, **_): - return F.splitByString(delimiter, cast(arg, dt.String(nullable=False))) - - -@translate_val.register(ops.StringJoin) -def _string_join(op, *, sep, arg, **_): - return F.arrayStringConcat(F.array(*arg), sep) - - -@translate_val.register(ops.StringConcat) -def _string_concat(op, *, arg, **_): - return F.concat(*arg) - - -@translate_val.register(ops.StringSQLLike) -def _string_like(op, *, arg, pattern, **_): - return arg.like(pattern) - - -@translate_val.register(ops.StringSQLILike) -def _string_ilike(op, *, arg, pattern, **_): - return arg.ilike(pattern) - - -@translate_val.register(ops.Capitalize) -def _string_capitalize(op, *, arg, **_): - return F.concat(F.upper(F.substr(arg, 1, 1)), F.lower(F.substr(arg, 2))) - - -@translate_val.register(ops.GroupConcat) -def _group_concat(op, *, arg, sep, where, **_): - call = agg.groupArray(arg, where=where) - return if_(F.empty(call), NULL, F.arrayStringConcat(call, sep)) - - -@translate_val.register(ops.StrRight) -def _string_right(op, *, arg, nchars, **_): - nchars = _parenthesize(op.nchars, nchars) - return F.substring(arg, -nchars) - - -@translate_val.register(ops.Cot) -def _cotangent(op, *, arg, **_): - return 1.0 / F.tan(arg) - - -def _bit_agg(func: str): - def _translate(op, *, arg, where, **_): - if not (dtype := op.arg.dtype).is_unsigned_integer(): - nbits = dtype.nbytes * 8 - arg = F[f"reinterpretAsUInt{nbits}"](arg) - return agg[func](arg, where=where) - - return _translate - - -@translate_val.register(ops.ArrayColumn) -def _array_column(op, *, cols, **_): - return F.array(*cols) - - -@translate_val.register(ops.StructColumn) -def _struct_column(op, *, values, **_): - # ClickHouse struct types cannot be nullable - # (non-nested fields can be nullable) - return cast(F.tuple(*values), op.dtype.copy(nullable=False)) - - -@translate_val.register(ops.Clip) -def _clip(op, *, arg, lower, upper, **_): - if upper is not None: - arg = if_(F.isNull(arg), NULL, F.least(upper, arg)) - - if lower is not None: - arg = if_(F.isNull(arg), NULL, F.greatest(lower, arg)) - - return arg - - -@translate_val.register(ops.StructField) -def _struct_field(op, *, arg, field: str, **_): - arg_dtype = op.arg.dtype - idx = arg_dtype.names.index(field) - return cast(sg.exp.Dot(this=arg, expression=sg.exp.convert(idx + 1)), op.dtype) - - -@translate_val.register(ops.Repeat) -def _repeat(op, *, arg, times, **_): - return F.repeat(arg, F.accurateCast(times, "UInt64")) - - -@translate_val.register(ops.FloorDivide) -def _floor_divide(op, *, left, right, **_): - return F.floor(left / right) - - -@translate_val.register(ops.StringContains) -def _string_contains(op, haystack, needle, **_): - return F.locate(haystack, needle) > 0 - - -@translate_val.register(ops.InValues) -def _in_values(op, *, value, options, **_): - return _parenthesize(op.value, value).isin(*options) - - -@translate_val.register(ops.InColumn) -def _in_column(op, *, value, options, **_): - return value.isin(options.this if isinstance(options, sg.exp.Subquery) else options) - - -_DAYS = calendar.day_name -_NUM_WEEKDAYS = len(_DAYS) - - -@translate_val.register(ops.DayOfWeekIndex) -def _day_of_week_index(op, *, arg, **_): - weekdays = _NUM_WEEKDAYS - return (((F.toDayOfWeek(arg) - 1) % weekdays) + weekdays) % weekdays - - -@translate_val.register(ops.DayOfWeekName) -def day_of_week_name(op, *, arg, **_): - # ClickHouse 20 doesn't support dateName - # - # ClickHouse 21 supports dateName is broken for regexen: - # https://github.com/ClickHouse/ClickHouse/issues/32777 - # - # ClickHouses 20 and 21 also have a broken case statement hence the ifnull: - # https://github.com/ClickHouse/ClickHouse/issues/32849 - # - # We test against 20 in CI, so we implement day_of_week_name as follows - num_weekdays = _NUM_WEEKDAYS - base = (((F.toDayOfWeek(arg) - 1) % num_weekdays) + num_weekdays) % num_weekdays - return sg.exp.Case( - this=base, - ifs=[if_(i, day) for i, day in enumerate(_DAYS)], - default=sg.exp.convert(""), - ) - - -@translate_val.register(ops.Greatest) -@translate_val.register(ops.Least) -@translate_val.register(ops.Coalesce) -def _vararg_func(op, *, arg, **_): - return F[op.__class__.__name__.lower()](*arg) - - -@translate_val.register(ops.Map) -def _map(op, *, keys, values, **_): - # cast here to allow lookups of nullable columns - return cast(F.tuple(keys, values), op.dtype) - - -@translate_val.register(ops.MapGet) -def _map_get(op, *, arg, key, default, **_): - return if_(F.mapContains(arg, key), arg[key], default) - - -@translate_val.register(ops.ArrayConcat) -def _array_concat(op, *, arg, **_): - return F.arrayConcat(*arg) - - -def _binary_infix(func): - def formatter(op, *, left, right, **_): - left = _parenthesize(op.left, left) - right = _parenthesize(op.right, right) - return func(left, right) - - return formatter - - -_binary_infix_ops = { - # Binary operations - ops.Add: operator.add, - ops.Subtract: operator.sub, - ops.Multiply: operator.mul, - ops.Divide: operator.truediv, - ops.Modulus: operator.mod, - # Comparisons - ops.Equals: sg.exp.Condition.eq, - ops.NotEquals: sg.exp.Condition.neq, - ops.GreaterEqual: operator.ge, - ops.Greater: operator.gt, - ops.LessEqual: operator.le, - ops.Less: operator.lt, - # Boolean comparisons - ops.And: operator.and_, - ops.Or: operator.or_, - ops.Xor: F.xor, - ops.DateAdd: operator.add, - ops.DateSub: operator.sub, - ops.DateDiff: operator.sub, - ops.TimestampAdd: operator.add, - ops.TimestampSub: operator.sub, - ops.TimestampDiff: operator.sub, -} - - -for _op, _func in _binary_infix_ops.items(): - translate_val.register(_op)(_binary_infix(_func)) - -del _op, _func - -translate_val.register(ops.BitAnd)(_bit_agg("groupBitAnd")) -translate_val.register(ops.BitOr)(_bit_agg("groupBitOr")) -translate_val.register(ops.BitXor)(_bit_agg("groupBitXor")) - -translate_val.register(ops.StandardDev)(_agg_variance_like("stddev")) -translate_val.register(ops.Variance)(_agg_variance_like("var")) -translate_val.register(ops.Covariance)(_agg_variance_like("covar")) - - -_simple_ops = { - ops.Power: "pow", - # Unary operations - ops.TypeOf: "toTypeName", - ops.IsNan: "isNaN", - ops.IsInf: "isInfinite", - ops.Abs: "abs", - ops.Ceil: "ceil", - ops.Floor: "floor", - ops.Exp: "exp", - ops.Sqrt: "sqrt", - ops.Ln: "log", - ops.Log2: "log2", - ops.Log10: "log10", - ops.Acos: "acos", - ops.Asin: "asin", - ops.Atan: "atan", - ops.Atan2: "atan2", - ops.Cos: "cos", - ops.Sin: "sin", - ops.Tan: "tan", - ops.Pi: "pi", - ops.E: "e", - ops.RandomScalar: "randCanonical", - # Unary aggregates - ops.ApproxMedian: "median", - ops.Median: "quantileExactExclusive", - # TODO: there is also a `uniq` function which is the - # recommended way to approximate cardinality - ops.ApproxCountDistinct: "uniqHLL12", - ops.Mean: "avg", - ops.Sum: "sum", - ops.Max: "max", - ops.Min: "min", - ops.Any: "max", - ops.All: "min", - ops.ArgMin: "argMin", - ops.ArgMax: "argMax", - ops.ArrayCollect: "groupArray", - ops.Count: "count", - ops.CountDistinct: "uniq", - ops.First: "any", - ops.Last: "anyLast", - # string operations - ops.StringLength: "length", - ops.Lowercase: "lower", - ops.Uppercase: "upper", - ops.Reverse: "reverse", - ops.StringReplace: "replaceAll", - ops.StartsWith: "startsWith", - ops.EndsWith: "endsWith", - ops.LPad: "leftPad", - ops.RPad: "rightPad", - ops.LStrip: "trimLeft", - ops.RStrip: "trimRight", - ops.Strip: "trimBoth", - ops.RegexReplace: "replaceRegexpAll", - ops.StringAscii: "ascii", - # Temporal operations - ops.Date: "toDate", - ops.TimestampNow: "now", - ops.ExtractYear: "toYear", - ops.ExtractMonth: "toMonth", - ops.ExtractDay: "toDayOfMonth", - ops.ExtractDayOfYear: "toDayOfYear", - ops.ExtractQuarter: "toQuarter", - ops.ExtractWeekOfYear: "toISOWeek", - ops.ExtractHour: "toHour", - ops.ExtractMinute: "toMinute", - ops.ExtractSecond: "toSecond", - # Other operations - ops.E: "e", - # for more than 2 args this should be arrayGreatest|Least(array([])) - # because clickhouse"s greatest and least doesn"t support varargs - ops.IfElse: "if", - ops.ArrayLength: "length", - ops.Unnest: "arrayJoin", - ops.Degrees: "degrees", - ops.Radians: "radians", - ops.Strftime: "formatDateTime", - ops.IsNull: "isNull", - ops.NotNull: "isNotNull", - ops.NullIf: "nullIf", - ops.MapContains: "mapContains", - ops.MapLength: "length", - ops.MapKeys: "mapKeys", - ops.MapValues: "mapValues", - ops.MapMerge: "mapUpdate", - ops.BitwiseAnd: "bitAnd", - ops.BitwiseOr: "bitOr", - ops.BitwiseXor: "bitXor", - ops.BitwiseLeftShift: "bitShiftLeft", - ops.BitwiseRightShift: "bitShiftRight", - ops.BitwiseNot: "bitNot", - ops.ArraySort: "arraySort", - ops.ArrayContains: "has", - ops.FirstValue: "first_value", - ops.LastValue: "last_value", - ops.NTile: "ntile", - ops.ArrayIntersect: "arrayIntersect", - ops.ExtractEpochSeconds: "toRelativeSecondNum", - ops.NthValue: "nth_value", - ops.MinRank: "rank", - ops.DenseRank: "dense_rank", - ops.RowNumber: "row_number", - ops.ExtractProtocol: "protocol", - ops.ExtractAuthority: "netloc", - ops.ExtractHost: "domain", - ops.ExtractPath: "path", - ops.ExtractFragment: "fragment", - ops.ArrayPosition: "indexOf", - ops.ArrayFlatten: "arrayFlatten", - ops.IntegerRange: "range", -} - - -for _op, _name in _simple_ops.items(): - assert isinstance(type(_op), type), type(_op) - if issubclass(_op, ops.Reduction): - - @translate_val.register(_op) - def _fmt(_, _name: str = _name, *, where, **kw): - return agg[_name](*kw.values(), where=where) - - else: - - @translate_val.register(_op) - def _fmt(_, _name: str = _name, **kw): - return F[_name](*kw.values()) - - -del _fmt, _name, _op - - -@translate_val.register(ops.ArrayDistinct) -def _array_distinct(op, *, arg, **_): - null_element = if_(F.countEqual(arg, NULL) > 0, F.array(NULL), F.array()) - return F.arrayConcat(F.arrayDistinct(arg), null_element) - - -@translate_val.register(ops.ExtractMicrosecond) -def _extract_microsecond(op, *, arg, **_): - dtype = op.dtype - return cast( - F.toUnixTimestamp64Micro(cast(arg, op.arg.dtype.copy(scale=6))) % 1_000_000, - dtype, - ) - - -@translate_val.register(ops.ExtractMillisecond) -def _extract_millisecond(op, *, arg, **_): - dtype = op.dtype - return cast( - F.toUnixTimestamp64Milli(cast(arg, op.arg.dtype.copy(scale=3))) % 1_000, dtype - ) - - -@translate_val.register -def _sort_key(op: ops.SortKey, *, expr, ascending: bool, **_): - return sg.exp.Ordered(this=expr, desc=not ascending) - - -@translate_val.register(ops.WindowBoundary) -def _window_boundary(op, *, value, preceding, **_): - # TODO: bit of a hack to return a dict, but there's no sqlglot expression - # that corresponds to _only_ this information - return {"value": value, "side": "preceding" if preceding else "following"} - - -@translate_val.register(ops.RowsWindowFrame) -@translate_val.register(ops.RangeWindowFrame) -def _window_frame(op, *, group_by, order_by, start, end, max_lookback=None, **_): - if max_lookback is not None: - raise NotImplementedError( - "`max_lookback` is not supported in the ClickHouse backend" - ) - - if start is None: - start = {} - - start_value = start.get("value", "UNBOUNDED") - start_side = start.get("side", "PRECEDING") - - if end is None: - end = {} - - end_value = end.get("value", "UNBOUNDED") - end_side = end.get("side", "FOLLOWING") - - spec = sg.exp.WindowSpec( - kind=op.how.upper(), - start=start_value, - start_side=start_side, - end=end_value, - end_side=end_side, - over="OVER", - ) - - order = sg.exp.Order(expressions=order_by) if order_by else None - - # TODO: bit of a hack to return a partial, but similar to `WindowBoundary` - # there's no sqlglot expression that corresponds to _only_ this information - return partial(sg.exp.Window, partition_by=group_by, order=order, spec=spec) - - -@translate_val.register(ops.WindowFunction) -def _window(op: ops.WindowFunction, *, func, frame, **_: Any): - # frame is a partial call to sg.exp.Window - return frame(this=func) - - -def shift_like(op_class, func): - @translate_val.register(op_class) - def formatter(op, *, arg, offset, default, **_): - args = [arg] - - if default is not None: - if offset is None: - offset = 1 - - args.append(offset) - args.append(default) - elif offset is not None: - args.append(offset) - - return func(*args) - - return formatter - - -shift_like(ops.Lag, F.lagInFrame) -shift_like(ops.Lead, F.leadInFrame) - - -@translate_val.register(ops.ExtractFile) -def _extract_file(op, *, arg, **_): - return F.cutFragment(F.pathFull(arg)) - - -@translate_val.register(ops.ExtractQuery) -def _extract_query(op, *, arg, key, **_): - if key is not None: - return F.extractURLParameter(arg, key) - else: - return F.queryString(arg) - - -@translate_val.register(ops.ArrayStringJoin) -def _array_string_join(op, *, arg, sep, **_): - return F.arrayStringConcat(arg, sep) - - -@translate_val.register(ops.Argument) -def _argument(op, **_): - return sg.to_identifier(op.param) - - -@translate_val.register(ops.ArrayMap) -def _array_map(op, *, arg, param, body, **_): - func = sg.exp.Lambda(this=body, expressions=[param]) - return F.arrayMap(func, arg) - - -@translate_val.register(ops.ArrayFilter) -def _array_filter(op, *, arg, param, body, **_): - func = sg.exp.Lambda(this=body, expressions=[param]) - return F.arrayFilter(func, arg) - - -@translate_val.register(ops.ArrayRemove) -def _array_remove(op, *, arg, other, **_): - x = sg.to_identifier("x") - body = x.neq(other) - return F.arrayFilter(sg.exp.Lambda(this=body, expressions=[x]), arg) - - -@translate_val.register(ops.ArrayUnion) -def _array_union(op, *, left, right, **_): - arg = F.arrayConcat(left, right) - null_element = if_(F.countEqual(arg, NULL) > 0, F.array(NULL), F.array()) - return F.arrayConcat(F.arrayDistinct(arg), null_element) - - -@translate_val.register(ops.ArrayZip) -def _array_zip(op: ops.ArrayZip, *, arg, **_: Any) -> str: - return F.arrayZip(*arg) - - -@translate_val.register(ops.CountDistinctStar) -def _count_distinct_star(op: ops.CountDistinctStar, *, where, **_: Any) -> str: - columns = F.tuple(*map(sg.column, op.arg.schema.names)) - - if where is not None: - return F.countDistinctIf(columns, where) - else: - return F.countDistinct(columns) - - -@translate_val.register(ops.ScalarUDF) -def _scalar_udf(op, **kw) -> str: - return F[op.__full_name__](*kw.values()) - - -@translate_val.register(ops.AggUDF) -def _agg_udf(op, *, where, **kw) -> str: - return agg[op.__full_name__](*kw.values(), where=where) - - -@translate_val.register(ops.DateDelta) -@translate_val.register(ops.TimestampDelta) -def _delta(op, *, part, left, right, **_): - return sg.exp.DateDiff(this=left, expression=right, unit=part) - - -@translate_val.register(ops.TimestampRange) -def _timestamp_range(op, *, start, stop, step, **_): - unit = op.step.dtype.unit.name.lower() - - if not isinstance(op.step, ops.Literal): - raise com.UnsupportedOperationError( - "ClickHouse doesn't support non-literal step values" - ) - - step_value = op.step.value - - offset = sg.to_identifier("offset") - - # e.g., offset -> dateAdd(DAY, offset, start) - func = sg.exp.Lambda( - this=F.dateAdd(sg.to_identifier(unit), offset, start), expressions=[offset] - ) - - if step_value == 0: - return F.array() - - result = F.arrayMap( - func, F.range(0, F.timestampDiff(unit, start, stop), step_value) - ) - return result - - -@translate_val.register(ops.RegexSplit) -def _regex_split(op, *, arg, pattern, **_): - return F.splitByRegexp(pattern, cast(arg, dt.String(nullable=False))) diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_like/out2.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_like/out2.sql index 0fc7c1f4cbe2..8ec21cad88cf 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_like/out2.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_string_column_like/out2.sql @@ -1,7 +1,3 @@ SELECT - ( - t0.string_col LIKE 'foo%' - ) OR ( - t0.string_col LIKE '%bar' - ) AS "Or(StringSQLLike(string_col, 'foo%'), StringSQLLike(string_col, '%bar'))" + t0.string_col LIKE 'foo%' OR t0.string_col LIKE '%bar' AS "Or(StringSQLLike(string_col, 'foo%'), StringSQLLike(string_col, '%bar'))" FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/ceil/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/ceil/out.sql index c2b64e683d01..fafc564ab456 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/ceil/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_functions/test_translate_math_functions/ceil/out.sql @@ -1,3 +1,3 @@ SELECT - CEIL(t0.double_col) AS "Ceil(double_col)" + CAST(CEIL(t0.double_col) AS Nullable(Int64)) AS "Ceil(double_col)" FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda1/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda1/out.sql index c2f4023b696c..3c61319c349c 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda1/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda1/out.sql @@ -1,5 +1,3 @@ SELECT - ( - LN(t0.int_col) - ) + t0.double_col AS "Add(Log(int_col), double_col)" + LN(t0.int_col) + t0.double_col AS "Add(Log(int_col), double_col)" FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda2/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda2/out.sql index ab420235e83d..08d6cd257a99 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda2/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_operators/test_binary_infix_parenthesization/lambda2/out.sql @@ -1,7 +1,5 @@ SELECT - t0.tinyint_col + ( - -( - t0.int_col + t0.double_col - ) + t0.tinyint_col + -( + t0.int_col + t0.double_col ) AS "Add(tinyint_col, Negate(Add(int_col, double_col)))" FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_array_join_in_subquery/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_array_join_in_subquery/out.sql index c405dd7be487..a62f4b1f78bc 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_array_join_in_subquery/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_array_join_in_subquery/out.sql @@ -1,5 +1,5 @@ SELECT t0.id IN (SELECT arrayJoin(t1.ids) AS ids - FROM way_view AS t1) AS "InColumn(id, ids)" + FROM way_view AS t1) AS "InSubquery(id)" FROM node_view AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_join/out.sql new file mode 100644 index 000000000000..a13c5f564c3e --- /dev/null +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_complex_join/out.sql @@ -0,0 +1,18 @@ +SELECT + t5.a, + t5.b, + t5.c, + t5.d, + t5.c / ( + t5.a - t5.b + ) AS e +FROM ( + SELECT + t2.a, + t2.b, + t3.c, + t3.d + FROM s AS t2 + INNER JOIN t AS t3 + ON t2.a = t3.c +) AS t5 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out1.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out1.sql index 6320f23aa1a2..1bd6720ed390 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out1.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out1.sql @@ -1,5 +1,17 @@ SELECT - * + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month FROM functional_alltypes AS t0 WHERE t0.string_col IN ('foo', 'bar') \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out2.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out2.sql index 9eb5f653d7cc..85fd1cae375d 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out2.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_isin_notin_in_select/out2.sql @@ -1,5 +1,17 @@ SELECT - * + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month FROM functional_alltypes AS t0 WHERE NOT ( diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_join_self_reference/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_join_self_reference/out.sql index 4a756ab86ec9..728987548b00 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_join_self_reference/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_join_self_reference/out.sql @@ -1,5 +1,17 @@ SELECT - t0.* -FROM functional_alltypes AS t0 -INNER JOIN functional_alltypes AS t1 - ON t0.id = t1.id \ No newline at end of file + t1.id, + t1.bool_col, + t1.tinyint_col, + t1.smallint_col, + t1.int_col, + t1.bigint_col, + t1.float_col, + t1.double_col, + t1.date_string_col, + t1.string_col, + t1.timestamp_col, + t1.year, + t1.month +FROM functional_alltypes AS t1 +INNER JOIN functional_alltypes AS t3 + ON t1.id = t3.id \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out1.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out1.sql index 79de229147a0..2ae649a0ea76 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out1.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out1.sql @@ -1,12 +1,17 @@ SELECT - t0.key, + t1.key, SUM(( ( - t0.value + 1 + t1.value + 1 ) + 2 ) + 3) AS abc -FROM t0 AS t0 -WHERE - t0.value = 42 +FROM ( + SELECT + t0.key, + t0.value + FROM t0 AS t0 + WHERE + t0.value = 42 +) AS t1 GROUP BY - t0.key \ No newline at end of file + t1.key \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out2.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out2.sql index 0596895c1ccf..d22a599a88a7 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out2.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_named_from_filter_groupby/out2.sql @@ -1,12 +1,17 @@ SELECT - t0.key, + t1.key, SUM(( ( - t0.value + 1 + t1.value + 1 ) + 2 ) + 3) AS foo -FROM t0 AS t0 -WHERE - t0.value = 42 +FROM ( + SELECT + t0.key, + t0.value + FROM t0 AS t0 + WHERE + t0.value = 42 +) AS t1 GROUP BY - t0.key \ No newline at end of file + t1.key \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_self_reference_simple/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_self_reference_simple/out.sql index b1f45cbca8ef..99d5c76e03f3 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_self_reference_simple/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_self_reference_simple/out.sql @@ -1,3 +1,3 @@ SELECT * -FROM functional_alltypes AS functional_alltypes_ref \ No newline at end of file +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_inner_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_inner_join/out.sql index f879a888124a..a1bdbc4f3fc3 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_inner_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_inner_join/out.sql @@ -1,5 +1,26 @@ SELECT - t0.* -FROM batting AS t0 -ANY JOIN awards_players AS t1 - ON t0.playerID = t1.awardID \ No newline at end of file + t2.playerID, + t2.yearID, + t2.stint, + t2.teamID, + t2.lgID, + t2.G, + t2.AB, + t2.R, + t2.H, + t2.X2B, + t2.X3B, + t2.HR, + t2.RBI, + t2.SB, + t2.CS, + t2.BB, + t2.SO, + t2.IBB, + t2.HBP, + t2.SH, + t2.SF, + t2.GIDP +FROM batting AS t2 +ANY JOIN awards_players AS t3 + ON t2.playerID = t3.awardID \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_left_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_left_join/out.sql index 88c96b29443c..651c9ca46694 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_left_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-any_left_join/out.sql @@ -1,5 +1,26 @@ SELECT - t0.* -FROM batting AS t0 -LEFT ANY JOIN awards_players AS t1 - ON t0.playerID = t1.awardID \ No newline at end of file + t2.playerID, + t2.yearID, + t2.stint, + t2.teamID, + t2.lgID, + t2.G, + t2.AB, + t2.R, + t2.H, + t2.X2B, + t2.X3B, + t2.HR, + t2.RBI, + t2.SB, + t2.CS, + t2.BB, + t2.SO, + t2.IBB, + t2.HBP, + t2.SH, + t2.SF, + t2.GIDP +FROM batting AS t2 +LEFT ANY JOIN awards_players AS t3 + ON t2.playerID = t3.awardID \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-inner_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-inner_join/out.sql index 700f214f0382..ebcca144d254 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-inner_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-inner_join/out.sql @@ -1,5 +1,26 @@ SELECT - t0.* -FROM batting AS t0 -INNER JOIN awards_players AS t1 - ON t0.playerID = t1.awardID \ No newline at end of file + t2.playerID, + t2.yearID, + t2.stint, + t2.teamID, + t2.lgID, + t2.G, + t2.AB, + t2.R, + t2.H, + t2.X2B, + t2.X3B, + t2.HR, + t2.RBI, + t2.SB, + t2.CS, + t2.BB, + t2.SO, + t2.IBB, + t2.HBP, + t2.SH, + t2.SF, + t2.GIDP +FROM batting AS t2 +INNER JOIN awards_players AS t3 + ON t2.playerID = t3.awardID \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-left_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-left_join/out.sql index 9e158d9dd8a1..5ae2ee1998b1 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-left_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-awardID-left_join/out.sql @@ -1,5 +1,26 @@ SELECT - t0.* -FROM batting AS t0 -LEFT OUTER JOIN awards_players AS t1 - ON t0.playerID = t1.awardID \ No newline at end of file + t2.playerID, + t2.yearID, + t2.stint, + t2.teamID, + t2.lgID, + t2.G, + t2.AB, + t2.R, + t2.H, + t2.X2B, + t2.X3B, + t2.HR, + t2.RBI, + t2.SB, + t2.CS, + t2.BB, + t2.SO, + t2.IBB, + t2.HBP, + t2.SH, + t2.SF, + t2.GIDP +FROM batting AS t2 +LEFT OUTER JOIN awards_players AS t3 + ON t2.playerID = t3.awardID \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_inner_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_inner_join/out.sql index 363aaebf890c..43bddb923f0a 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_inner_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_inner_join/out.sql @@ -1,5 +1,26 @@ SELECT - t0.* -FROM batting AS t0 -ANY JOIN awards_players AS t1 - ON t0.playerID = t1.playerID \ No newline at end of file + t2.playerID, + t2.yearID, + t2.stint, + t2.teamID, + t2.lgID, + t2.G, + t2.AB, + t2.R, + t2.H, + t2.X2B, + t2.X3B, + t2.HR, + t2.RBI, + t2.SB, + t2.CS, + t2.BB, + t2.SO, + t2.IBB, + t2.HBP, + t2.SH, + t2.SF, + t2.GIDP +FROM batting AS t2 +ANY JOIN awards_players AS t3 + ON t2.playerID = t3.playerID \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_left_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_left_join/out.sql index 3ba9f0d4e06f..5586b8b01ee0 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_left_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-any_left_join/out.sql @@ -1,5 +1,26 @@ SELECT - t0.* -FROM batting AS t0 -LEFT ANY JOIN awards_players AS t1 - ON t0.playerID = t1.playerID \ No newline at end of file + t2.playerID, + t2.yearID, + t2.stint, + t2.teamID, + t2.lgID, + t2.G, + t2.AB, + t2.R, + t2.H, + t2.X2B, + t2.X3B, + t2.HR, + t2.RBI, + t2.SB, + t2.CS, + t2.BB, + t2.SO, + t2.IBB, + t2.HBP, + t2.SH, + t2.SF, + t2.GIDP +FROM batting AS t2 +LEFT ANY JOIN awards_players AS t3 + ON t2.playerID = t3.playerID \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-inner_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-inner_join/out.sql index 5d0d8dc31e6e..f611516b394e 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-inner_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-inner_join/out.sql @@ -1,5 +1,26 @@ SELECT - t0.* -FROM batting AS t0 -INNER JOIN awards_players AS t1 - ON t0.playerID = t1.playerID \ No newline at end of file + t2.playerID, + t2.yearID, + t2.stint, + t2.teamID, + t2.lgID, + t2.G, + t2.AB, + t2.R, + t2.H, + t2.X2B, + t2.X3B, + t2.HR, + t2.RBI, + t2.SB, + t2.CS, + t2.BB, + t2.SO, + t2.IBB, + t2.HBP, + t2.SH, + t2.SF, + t2.GIDP +FROM batting AS t2 +INNER JOIN awards_players AS t3 + ON t2.playerID = t3.playerID \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-left_join/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-left_join/out.sql index cc098eca7bfb..c820c7e05b88 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-left_join/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_joins/playerID-playerID-left_join/out.sql @@ -1,5 +1,26 @@ SELECT - t0.* -FROM batting AS t0 -LEFT OUTER JOIN awards_players AS t1 - ON t0.playerID = t1.playerID \ No newline at end of file + t2.playerID, + t2.yearID, + t2.stint, + t2.teamID, + t2.lgID, + t2.G, + t2.AB, + t2.R, + t2.H, + t2.X2B, + t2.X3B, + t2.HR, + t2.RBI, + t2.SB, + t2.CS, + t2.BB, + t2.SO, + t2.IBB, + t2.HBP, + t2.SH, + t2.SF, + t2.GIDP +FROM batting AS t2 +LEFT OUTER JOIN awards_players AS t3 + ON t2.playerID = t3.playerID \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_scalar_aggregates/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_scalar_aggregates/out.sql index c960b3b70a1a..86d975c44589 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_scalar_aggregates/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_simple_scalar_aggregates/out.sql @@ -1,5 +1,21 @@ SELECT - SUM(t0.float_col) AS "Sum(float_col)" -FROM functional_alltypes AS t0 -WHERE - t0.int_col > 0 \ No newline at end of file + SUM(t1.float_col) AS "Sum(float_col)" +FROM ( + SELECT + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month + FROM functional_alltypes AS t0 + WHERE + t0.int_col > 0 +) AS t1 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_table_column_unbox/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_table_column_unbox/out.sql index 0bc4ec1e4852..d2bf6243fdea 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_table_column_unbox/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_table_column_unbox/out.sql @@ -1,12 +1,28 @@ SELECT - t1.string_col + t2.string_col FROM ( SELECT - t0.string_col, - SUM(t0.float_col) AS total - FROM functional_alltypes AS t0 - WHERE - t0.int_col > 0 + t1.string_col, + SUM(t1.float_col) AS total + FROM ( + SELECT + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month + FROM functional_alltypes AS t0 + WHERE + t0.int_col > 0 + ) AS t1 GROUP BY - t0.string_col -) AS t1 \ No newline at end of file + t1.string_col +) AS t2 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_simple_comparisons/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_simple_comparisons/out.sql index 6806def413a1..cb651e1f7369 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_simple_comparisons/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_simple_comparisons/out.sql @@ -1,5 +1,17 @@ SELECT - * + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month FROM functional_alltypes AS t0 WHERE t0.float_col > 0 AND t0.int_col < ( diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_between/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_between/out.sql index 7982cdf7c584..a27f8a736dc5 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_between/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_where_with_between/out.sql @@ -1,5 +1,17 @@ SELECT - * + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month FROM functional_alltypes AS t0 WHERE t0.int_col > 0 AND t0.float_col BETWEEN 0 AND 1 \ No newline at end of file diff --git a/ibis/backends/clickhouse/tests/test_select.py b/ibis/backends/clickhouse/tests/test_select.py index 207fcef8d393..b74ce39ff621 100644 --- a/ibis/backends/clickhouse/tests/test_select.py +++ b/ibis/backends/clickhouse/tests/test_select.py @@ -403,3 +403,12 @@ def test_array_join_in_subquery(snapshot): out = ibis.clickhouse.compile(expr) snapshot.assert_match(out, "out.sql") + + +def test_complex_join(snapshot): + t1 = ibis.table({"a": "int", "b": "int"}, name="s") + t2 = ibis.table({"c": "int", "d": "int"}, name="t") + t3 = t1.join(t2, t1.a == t2.c) + q = t3.mutate(e=t3.c / (t3.a - t3.b)) + out = ibis.clickhouse.compile(q) + snapshot.assert_match(out, "out.sql") diff --git a/ibis/backends/conftest.py b/ibis/backends/conftest.py index dd9e5474d58b..a7459052119f 100644 --- a/ibis/backends/conftest.py +++ b/ibis/backends/conftest.py @@ -538,7 +538,6 @@ def ddl_con(ddl_backend): @pytest.fixture( params=_get_backends_to_test( keep=( - "duckdb", "exasol", "mssql", "mysql", diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index 75ab280e3a63..b1df75561970 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -6,16 +6,19 @@ import contextlib import os import warnings +from operator import itemgetter from pathlib import Path from typing import TYPE_CHECKING, Any +from urllib.parse import parse_qs, urlparse import duckdb +import pandas as pd import pyarrow as pa import pyarrow_hotfix # noqa: F401 -import sqlalchemy as sa import sqlglot as sg -import toolz +import sqlglot.expressions as sge +import ibis import ibis.common.exceptions as exc import ibis.expr.datatypes as dt import ibis.expr.operations as ops @@ -23,21 +26,21 @@ import ibis.expr.types as ir from ibis import util from ibis.backends.base import CanCreateSchema -from ibis.backends.base.sql.alchemy import AlchemyCrossSchemaBackend -from ibis.backends.base.sql.alchemy.geospatial import geospatial_supported -from ibis.backends.base.sqlglot import C, F -from ibis.backends.duckdb.compiler import DuckDBSQLCompiler -from ibis.backends.duckdb.datatypes import DuckDBType +from ibis.backends.base.sqlglot import SQLGlotBackend +from ibis.backends.base.sqlglot.compiler import STAR, C, F +from ibis.backends.base.sqlglot.datatypes import DuckDBType +from ibis.backends.duckdb.compiler import DuckDBCompiler +from ibis.backends.duckdb.datatypes import DuckDBPandasData from ibis.expr.operations.udf import InputType -from ibis.formats.pandas import PandasData if TYPE_CHECKING: from collections.abc import Iterable, Iterator, Mapping, MutableMapping, Sequence - import pandas as pd import torch from fsspec import AbstractFileSystem + from ibis.backends.base.sql import BaseBackend + def normalize_filenames(source_list): # Promote to list @@ -46,22 +49,6 @@ def normalize_filenames(source_list): return list(map(util.normalize_filename, source_list)) -def _format_kwargs(kwargs: Mapping[str, Any]): - bindparams, pieces = [], [] - for name, value in kwargs.items(): - bindparam = sa.bindparam(name, value) - if isinstance(paramtype := bindparam.type, sa.String): - # special case strings to avoid double escaping backslashes - pieces.append(f"{name} = '{value!s}'") - elif not isinstance(paramtype, sa.types.NullType): - bindparams.append(bindparam) - pieces.append(f"{name} = :{name}") - else: # fallback to string strategy - pieces.append(f"{name} = {value!r}") - - return sa.text(", ".join(pieces)).bindparams(*bindparams) - - _UDF_INPUT_TYPE_MAPPING = { InputType.PYARROW: duckdb.functional.ARROW, InputType.PYTHON: duckdb.functional.NATIVE, @@ -69,75 +56,294 @@ def _format_kwargs(kwargs: Mapping[str, Any]): class _Settings: - def __init__(self, con): + def __init__(self, con: duckdb.DuckDBPyConnection) -> None: self.con = con - def __getitem__(self, key): - try: - with self.con.begin() as con: - return con.exec_driver_sql( - f"select value from duckdb_settings() where name = '{key}'" - ).one() - except sa.exc.NoResultFound: - raise KeyError(key) + def __getitem__(self, key: str) -> Any: + maybe_value = self.con.execute( + f"select value from duckdb_settings() where name = '{key}'" + ).fetchone() + if maybe_value is not None: + return maybe_value[0] + raise KeyError(key) def __setitem__(self, key, value): - with self.con.begin() as con: - con.exec_driver_sql(f"SET {key}='{value}'") + self.con.execute(f"SET {key} = '{value}'") def __repr__(self): - with self.con.begin() as con: - kv = con.exec_driver_sql( - "select map(array_agg(name), array_agg(value)) from duckdb_settings()" - ).scalar() + ((kv,),) = self.con.execute( + "select map(array_agg(name), array_agg(value)) from duckdb_settings()" + ).fetch() return repr(dict(zip(kv["key"], kv["value"]))) -class Backend(AlchemyCrossSchemaBackend, CanCreateSchema): +class Backend(SQLGlotBackend, CanCreateSchema): name = "duckdb" - compiler = DuckDBSQLCompiler - supports_create_or_replace = True + compiler = DuckDBCompiler() + + def _define_udf_translation_rules(self, expr): + """No-op: UDF translation rules are defined in the compiler.""" @property def settings(self) -> _Settings: - return _Settings(self) + return _Settings(self.con) @property def current_database(self) -> str: - return self._scalar_query(sa.select(sa.func.current_database())) + with self._safe_raw_sql(sg.select(self.compiler.f.current_database())) as cur: + [(db,)] = cur.fetchall() + return db - def list_databases(self, like: str | None = None) -> list[str]: - s = sa.table( - "schemata", - sa.column("catalog_name", sa.TEXT()), - schema="information_schema", + @property + def current_schema(self) -> str: + with self._safe_raw_sql(sg.select(self.compiler.f.current_schema())) as cur: + [(schema,)] = cur.fetchall() + return schema + + def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: + with contextlib.suppress(AttributeError): + query = query.sql(dialect=self.name) + return self.con.execute(query, **kwargs) + + def _to_sqlglot( + self, expr: ir.Expr, limit: str | None = None, params=None, **_: Any + ): + sql = super()._to_sqlglot(expr, limit=limit, params=params) + + table_expr = expr.as_table() + geocols = frozenset( + name for name, typ in table_expr.schema().items() if typ.is_geospatial() + ) + + if not geocols: + return sql + + return sg.select( + *( + self.compiler.f.st_aswkb( + sg.column(col, quoted=self.compiler.quoted) + ).as_(col) + if col in geocols + else col + for col in table_expr.columns + ) + ).from_(sql.subquery()) + + def create_table( + self, + name: str, + obj: pd.DataFrame | pa.Table | ir.Table | None = None, + *, + schema: ibis.Schema | None = None, + database: str | None = None, + temp: bool = False, + overwrite: bool = False, + ): + """Create a table in DuckDB. + + Parameters + ---------- + name + Name of the table to create + obj + The data with which to populate the table; optional, but at least + one of `obj` or `schema` must be specified + schema + The schema of the table to create; optional, but at least one of + `obj` or `schema` must be specified + database + The name of the database in which to create the table; if not + passed, the current database is used. + temp + Create a temporary table + overwrite + If `True`, replace the table if it already exists, otherwise fail + if the table exists + """ + if obj is None and schema is None: + raise ValueError("Either `obj` or `schema` must be specified") + + properties = [] + + if temp: + properties.append(sge.TemporaryProperty()) + + if obj is not None: + if not isinstance(obj, ir.Expr): + table = ibis.memtable(obj) + else: + table = obj + + self._run_pre_execute_hooks(table) + + query = self._to_sqlglot(table) + else: + query = None + + column_defs = [ + sge.ColumnDef( + this=sg.to_identifier(colname, quoted=self.compiler.quoted), + kind=self.compiler.type_mapper.from_ibis(typ), + constraints=( + None + if typ.nullable + else [sge.ColumnConstraint(kind=sge.NotNullColumnConstraint())] + ), + ) + for colname, typ in (schema or table.schema()).items() + ] + + if overwrite: + temp_name = util.gen_name("duckdb_table") + else: + temp_name = name + + table = sg.table(temp_name, catalog=database, quoted=self.compiler.quoted) + target = sge.Schema(this=table, expressions=column_defs) + + create_stmt = sge.Create( + kind="TABLE", + this=target, + properties=sge.Properties(expressions=properties), + ) + + this = sg.table(name, catalog=database, quoted=self.compiler.quoted) + with self._safe_raw_sql(create_stmt) as cur: + if query is not None: + insert_stmt = sge.Insert(this=table, expression=query).sql(self.name) + cur.execute(insert_stmt).fetchall() + + if overwrite: + cur.execute( + sge.Drop(kind="TABLE", this=this, exists=True).sql(self.name) + ).fetchall() + cur.execute( + f"ALTER TABLE IF EXISTS {table.sql(self.name)} RENAME TO {this.sql(self.name)}" + ).fetchall() + + return self.table(name, schema=database) + + def _load_into_cache(self, name, expr): + self.create_table(name, expr, schema=expr.schema(), temp=True) + + def _clean_up_cached_table(self, op): + self.drop_table(op.name) + + def table( + self, name: str, schema: str | None = None, database: str | None = None + ) -> ir.Table: + """Construct a table expression. + + Parameters + ---------- + name + Table name + schema + Schema name + database + Database name + + Returns + ------- + Table + Table expression + """ + table_schema = self.get_schema(name, schema=schema, database=database) + # load geospatial only if geo columns + if any(typ.is_geospatial() for typ in table_schema.types): + self.load_extension("spatial") + return ops.DatabaseTable( + name, + schema=table_schema, + source=self, + namespace=ops.Namespace(database=database, schema=schema), + ).to_expr() + + def get_schema( + self, table_name: str, schema: str | None = None, database: str | None = None + ) -> sch.Schema: + """Compute the schema of a `table`. + + Parameters + ---------- + table_name + May **not** be fully qualified. Use `database` if you want to + qualify the identifier. + schema + Schema name + database + Database name + + Returns + ------- + sch.Schema + Ibis schema + """ + conditions = [sg.column("table_name").eq(sge.convert(table_name))] + + if database is not None: + conditions.append(sg.column("table_catalog").eq(sge.convert(database))) + + if schema is not None: + conditions.append(sg.column("table_schema").eq(sge.convert(schema))) + + query = ( + sg.select( + "column_name", + "data_type", + sg.column("is_nullable").eq(sge.convert("YES")).as_("nullable"), + ) + .from_(sg.table("columns", db="information_schema")) + .where(sg.and_(*conditions)) + .order_by("ordinal_position") ) - query = sa.select(sa.distinct(s.c.catalog_name)) - with self.begin() as con: - results = list(con.execute(query).scalars()) - return self._filter_with_like(results, like=like) + with self._safe_raw_sql(query) as cur: + meta = cur.fetch_arrow_table() + + if not meta: + raise exc.IbisError(f"Table not found: {table_name!r}") + + names = meta["column_name"].to_pylist() + types = meta["data_type"].to_pylist() + nullables = meta["nullable"].to_pylist() + + return sch.Schema( + { + name: DuckDBType.from_string(typ, nullable=nullable) + for name, typ, nullable in zip(names, types, nullables) + } + ) + + @contextlib.contextmanager + def _safe_raw_sql(self, *args, **kwargs): + yield self.raw_sql(*args, **kwargs) + + def list_databases(self, like: str | None = None) -> list[str]: + col = "catalog_name" + query = sg.select(sge.Distinct(expressions=[sg.column(col)])).from_( + sg.table("schemata", db="information_schema") + ) + with self._safe_raw_sql(query) as cur: + result = cur.fetch_arrow_table() + dbs = result[col] + return self._filter_with_like(dbs.to_pylist(), like) def list_schemas( self, like: str | None = None, database: str | None = None ) -> list[str]: - # override duckdb because all databases are always visible - text = """\ -SELECT schema_name -FROM information_schema.schemata -WHERE catalog_name = :database""" - query = sa.text(text).bindparams( - database=database if database is not None else self.current_database + col = "schema_name" + query = sg.select(sge.Distinct(expressions=[sg.column(col)])).from_( + sg.table("schemata", db="information_schema") ) - with self.begin() as con: - schemas = list(con.execute(query).scalars()) - return self._filter_with_like(schemas, like=like) + if database is not None: + query = query.where(sg.column("catalog_name").eq(sge.convert(database))) - @property - def current_schema(self) -> str: - return self._scalar_query(sa.select(sa.func.current_schema())) + with self._safe_raw_sql(query) as cur: + out = cur.fetch_arrow_table() + return self._filter_with_like(out[col].to_pylist(), like=like) @staticmethod def _convert_kwargs(kwargs: MutableMapping) -> None: @@ -156,47 +362,6 @@ def version(self) -> str: return importlib.metadata.version("duckdb") - @staticmethod - def _new_sa_metadata(): - meta = sa.MetaData() - - # _new_sa_metadata is invoked whenever `_get_sqla_table` is called, so - # it's safe to store columns as keys, that is, columns from different - # tables with the same name won't collide - complex_type_info_cache = {} - - @sa.event.listens_for(meta, "column_reflect") - def column_reflect(inspector, table, column_info): - import duckdb_engine.datatypes as ddt - - # duckdb_engine as of 0.7.2 doesn't expose the inner types of any - # complex types so we have to extract it from duckdb directly - ddt_struct_type = getattr(ddt, "Struct", sa.types.NullType) - ddt_map_type = getattr(ddt, "Map", sa.types.NullType) - if isinstance( - column_info["type"], (sa.ARRAY, ddt_struct_type, ddt_map_type) - ): - engine = inspector.engine - colname = column_info["name"] - if (coltype := complex_type_info_cache.get(colname)) is None: - quote = engine.dialect.identifier_preparer.quote - quoted_colname = quote(colname) - quoted_tablename = quote(table.name) - with engine.connect() as con: - # The .connection property is used to avoid creating a - # nested transaction - con.connection.execute( - f"DESCRIBE SELECT {quoted_colname} FROM {quoted_tablename}" - ) - _, typ, *_ = con.connection.fetchone() - complex_type_info_cache[colname] = coltype = DuckDBType.from_string( - typ - ) - - column_info["type"] = DuckDBType.from_ibis(coltype) - - return meta - def do_connect( self, database: str | Path = ":memory:", @@ -246,54 +411,65 @@ def do_connect( Path(temp_directory).mkdir(parents=True, exist_ok=True) config["temp_directory"] = str(temp_directory) - engine = sa.create_engine( - f"duckdb:///{database}", - connect_args=dict(read_only=read_only, config=config), - poolclass=sa.pool.StaticPool, - ) + self.con = duckdb.connect(str(database), config=config, read_only=read_only) - @sa.event.listens_for(engine, "connect") - def configure_connection(dbapi_connection, connection_record): - if extensions is not None: - self._sa_load_extensions(dbapi_connection, extensions) - dbapi_connection.execute("SET TimeZone = 'UTC'") + # Load any pre-specified extensions + if extensions is not None: + self._load_extensions(extensions) - self._record_batch_readers_consumed = {} + # Default timezone + with self._safe_raw_sql("SET TimeZone = 'UTC'"): + pass - # TODO(cpcloud): remove this when duckdb is >0.8.1 - # this is here to workaround https://github.com/duckdb/duckdb/issues/8735 - with contextlib.suppress(duckdb.InvalidInputException): - duckdb.execute("SELECT ?", (1,)) - - engine.dialect._backslash_escapes = False - super().do_connect(engine) + self._record_batch_readers_consumed = {} + self._temp_views: set[str] = set() - @staticmethod - def _sa_load_extensions( - dbapi_con, extensions: list[str], force_install: bool = False + def _load_extensions( + self, extensions: list[str], force_install: bool = False ) -> None: - query = """ - WITH exts AS ( - SELECT extension_name AS name, aliases FROM duckdb_extensions() - WHERE installed AND loaded + f = self.compiler.f + query = ( + sg.select(f.unnest(f.list_append(C.aliases, C.extension_name))) + .from_(f.duckdb_extensions()) + .where(sg.and_(C.installed, C.loaded)) ) - SELECT name FROM exts - UNION (SELECT UNNEST(aliases) AS name FROM exts) + with self._safe_raw_sql(query) as cur: + installed = map(itemgetter(0), cur.fetchall()) + # Install and load all other extensions + todo = frozenset(extensions).difference(installed) + for extension in todo: + cur.install_extension(extension, force_install=force_install) + cur.load_extension(extension) + + def _from_url(self, url: str, **kwargs) -> BaseBackend: + """Connect to a backend using a URL `url`. + + Parameters + ---------- + url + URL with which to connect to a backend. + kwargs + Additional keyword arguments + + Returns + ------- + BaseBackend + A backend instance """ - installed = (name for (name,) in dbapi_con.sql(query).fetchall()) - # Install and load all other extensions - todo = set(extensions).difference(installed) - for extension in todo: - dbapi_con.install_extension(extension, force_install=force_install) - dbapi_con.load_extension(extension) + url = urlparse(url) + database = url.path[1:] or ":memory:" + query_params = parse_qs(url.query) + + for name, value in query_params.items(): + if len(value) > 1: + kwargs[name] = value + elif len(value) == 1: + kwargs[name] = value[0] + else: + raise exc.IbisError(f"Invalid URL parameter: {name}") - def _load_extensions( - self, extensions: list[str], force_install: bool = False - ) -> None: - with self.begin() as con: - self._sa_load_extensions( - con.connection, extensions, force_install=force_install - ) + self._convert_kwargs(kwargs) + return self.connect(database=database, **kwargs) def load_extension(self, extension: str, force_install: bool = False) -> None: """Install and load a duckdb extension by name or path. @@ -314,10 +490,9 @@ def create_schema( raise exc.UnsupportedOperationError( "DuckDB cannot create a schema in another database." ) - name = self._quote(name) - if_not_exists = "IF NOT EXISTS " * force - with self.begin() as con: - con.exec_driver_sql(f"CREATE SCHEMA {if_not_exists}{name}") + + name = sg.to_identifier(database, quoted=True) + return sge.Create(this=name, kind="SCHEMA", replace=force) def drop_schema( self, name: str, database: str | None = None, force: bool = False @@ -326,10 +501,9 @@ def drop_schema( raise exc.UnsupportedOperationError( "DuckDB cannot drop a schema in another database." ) - name = self._quote(name) - if_exists = "IF EXISTS " * force - with self.begin() as con: - con.exec_driver_sql(f"DROP SCHEMA {if_exists}{name}") + + name = sg.to_identifier(database, quoted=True) + return sge.Drop(this=name, kind="SCHEMA", replace=force) def register( self, @@ -366,7 +540,7 @@ def register( else: try: return self.read_in_memory(source, table_name=table_name, **kwargs) - except sa.exc.ProgrammingError: + except (duckdb.InvalidInputException, NameError): self._register_failure() if first.startswith(("parquet://", "parq://")) or first.endswith( @@ -397,12 +571,6 @@ def _register_failure(self): f"please call one of {msg} directly" ) - def _compile_temp_view(self, table_name, source): - raw_source = source.compile( - dialect=self.con.dialect, compile_kwargs=dict(literal_binds=True) - ) - return f'CREATE OR REPLACE TEMPORARY VIEW "{table_name}" AS {raw_source}' - @util.experimental def read_json( self, @@ -433,15 +601,18 @@ def read_json( if not table_name: table_name = util.gen_name("read_json") - source = sa.select(sa.literal_column("*")).select_from( - sa.func.read_json_auto( - sa.func.list_value(*normalize_filenames(source_list)), - _format_kwargs(kwargs), - ) + options = [ + sg.to_identifier(key).eq(sge.convert(val)) for key, val in kwargs.items() + ] + + self._create_temp_view( + table_name, + sg.select(STAR).from_( + self.compiler.f.read_json_auto( + normalize_filenames(source_list), *options + ) + ), ) - view = self._compile_temp_view(table_name, source) - with self.begin() as con: - con.exec_driver_sql(view) return self.table(table_name) @@ -485,13 +656,32 @@ def read_csv( kwargs.setdefault("header", True) kwargs["auto_detect"] = kwargs.pop("auto_detect", "columns" not in kwargs) - source = sa.select(sa.literal_column("*")).select_from( - sa.func.read_csv(sa.func.list_value(*source_list), _format_kwargs(kwargs)) + # TODO: clean this up + # We want to _usually_ quote arguments but if we quote `columns` it messes + # up DuckDB's struct parsing. + options = [ + sg.to_identifier(key).eq(sge.convert(val)) for key, val in kwargs.items() + ] + + if (columns := kwargs.pop("columns", None)) is not None: + options.append( + sg.to_identifier("columns").eq( + sge.Struct( + expressions=[ + sge.Slice( + this=sge.convert(key), expression=sge.convert(value) + ) + for key, value in columns.items() + ] + ) + ) + ) + + self._create_temp_view( + table_name, + sg.select(STAR).from_(self.compiler.f.read_csv(source_list, *options)), ) - view = self._compile_temp_view(table_name, source) - with self.begin() as con: - con.exec_driver_sql(view) return self.table(table_name) def read_geo( @@ -529,17 +719,24 @@ def read_geo( self.load_extension("spatial") source = util.normalize_filename(source) - if source.startswith(("http://", "https://", "s3://")): self._load_extensions(["httpfs"]) - source_expr = sa.select(sa.literal_column("*")).select_from( - sa.func.st_read(source, _format_kwargs(kwargs)) + source_expr = sg.select(STAR).from_( + self.compiler.f.st_read( + source, + *(sg.to_identifier(key).eq(val) for key, val in kwargs.items()), + ) ) - view = self._compile_temp_view(table_name, source_expr) - with self.begin() as con: - con.exec_driver_sql(view) + view = sge.Create( + kind="VIEW", + this=sg.table(table_name, quoted=self.compiler.quoted), + properties=sge.Properties(expressions=[sge.TemporaryProperty()]), + expression=source_expr, + ) + with self._safe_raw_sql(view): + pass return self.table(table_name) def read_parquet( @@ -576,11 +773,8 @@ def read_parquet( # pyarrow dataset try: self._read_parquet_duckdb_native(source_list, table_name, **kwargs) - except sa.exc.OperationalError as e: - if isinstance(e.orig, duckdb.IOException): - self._read_parquet_pyarrow_dataset(source_list, table_name, **kwargs) - else: - raise e + except duckdb.IOException: + self._read_parquet_pyarrow_dataset(source_list, table_name, **kwargs) return self.table(table_name) @@ -593,14 +787,13 @@ def _read_parquet_duckdb_native( ): self._load_extensions(["httpfs"]) - source = sa.select(sa.literal_column("*")).select_from( - sa.func.read_parquet( - sa.func.list_value(*source_list), _format_kwargs(kwargs) - ) + options = [ + sg.to_identifier(key).eq(sge.convert(val)) for key, val in kwargs.items() + ] + self._create_temp_view( + table_name, + sg.select(STAR).from_(self.compiler.f.read_parquet(source_list, *options)), ) - view = self._compile_temp_view(table_name, source) - with self.begin() as con: - con.exec_driver_sql(view) def _read_parquet_pyarrow_dataset( self, source_list: str | Iterable[str], table_name: str, **kwargs: Any @@ -612,12 +805,11 @@ def _read_parquet_pyarrow_dataset( # We don't create a view since DuckDB special cases Arrow Datasets # so if we also create a view we end up with both a "lazy table" # and a view with the same name - with self.begin() as con: - # DuckDB normally auto-detects Arrow Datasets that are defined - # in local variables but the `dataset` variable won't be local - # by the time we execute against this so we register it - # explicitly. - con.connection.register(table_name, dataset) + self.con.register(table_name, dataset) + # DuckDB normally auto-detects Arrow Datasets that are defined + # in local variables but the `dataset` variable won't be local + # by the time we execute against this so we register it + # explicitly. def read_in_memory( self, @@ -640,8 +832,7 @@ def read_in_memory( The just-registered table """ table_name = table_name or util.gen_name("read_in_memory") - with self.begin() as con: - con.connection.register(table_name, source) + self.con.register(table_name, source) if isinstance(source, pa.RecordBatchReader): # Ensure the reader isn't marked as started, in case the name is @@ -735,28 +926,26 @@ def list_tables( >>> con.list_tables(schema="my_schema") ['baz'] """ - database = ( - F.current_database() if database is None else sg.exp.convert(database) - ) - schema = F.current_schema() if schema is None else sg.exp.convert(schema) + database = F.current_database() if database is None else sge.convert(database) + schema = F.current_schema() if schema is None else sge.convert(schema) + col = "table_name" sql = ( - sg.select(C.table_name) + sg.select(col) .from_(sg.table("tables", db="information_schema")) .distinct() .where( C.table_catalog.eq(database).or_( - C.table_catalog.eq(sg.exp.convert("temp")) + C.table_catalog.eq(sge.convert("temp")) ), C.table_schema.eq(schema), ) .sql(self.name, pretty=True) ) - with self.begin() as con: - out = con.exec_driver_sql(sql).cursor.fetch_arrow_table() + out = self.con.execute(sql).fetch_arrow_table() - return self._filter_with_like(out["table_name"].to_pylist(), like) + return self._filter_with_like(out[col].to_pylist(), like) def read_postgres( self, uri: str, table_name: str | None = None, schema: str = "public" @@ -782,12 +971,13 @@ def read_postgres( "`table_name` is required when registering a postgres table" ) self._load_extensions(["postgres_scanner"]) - source = sa.select(sa.literal_column("*")).select_from( - sa.func.postgres_scan_pushdown(uri, schema, table_name) + + self._create_temp_view( + table_name, + sg.select(STAR).from_( + self.compiler.f.postgres_scan_pushdown(uri, schema, table_name) + ), ) - view = self._compile_temp_view(table_name, source) - with self.begin() as con: - con.exec_driver_sql(view) return self.table(table_name) @@ -836,12 +1026,14 @@ def read_sqlite(self, path: str | Path, table_name: str | None = None) -> ir.Tab raise ValueError("`table_name` is required when registering a sqlite table") self._load_extensions(["sqlite"]) - source = sa.select(sa.literal_column("*")).select_from( - sa.func.sqlite_scan(str(path), table_name) + self._create_temp_view( + table_name, + sg.select(STAR).from_( + self.compiler.f.sqlite_scan( + sg.to_identifier(str(path), quoted=True), table_name + ) + ), ) - view = self._compile_temp_view(table_name, source) - with self.begin() as con: - con.exec_driver_sql(view) return self.table(table_name) @@ -868,8 +1060,7 @@ def attach( if read_only: code += " (READ_ONLY)" - with self.begin() as con: - con.exec_driver_sql(code) + self.con.execute(code).fetchall() def detach(self, name: str) -> None: """Detach a database from the current DuckDB session. @@ -880,8 +1071,7 @@ def detach(self, name: str) -> None: The name of the database to detach. """ name = sg.to_identifier(name).sql(self.name) - with self.begin() as con: - con.exec_driver_sql(f"DETACH {name}") + self.con.execute(f"DETACH {name}").fetchall() def attach_sqlite( self, path: str | Path, overwrite: bool = False, all_varchar: bool = False @@ -916,10 +1106,11 @@ def attach_sqlite( >>> con.list_tables() ['t'] """ - self._load_extensions(["sqlite"]) - with self.begin() as con: - con.execute(sa.text(f"SET GLOBAL sqlite_all_varchar={all_varchar}")) - con.execute(sa.text(f"CALL sqlite_attach('{path}', overwrite={overwrite})")) + self.load_extension("sqlite") + with self._safe_raw_sql(f"SET GLOBAL sqlite_all_varchar={all_varchar}") as cur: + cur.execute( + f"CALL sqlite_attach('{path}', overwrite={overwrite})" + ).fetchall() def register_filesystem(self, filesystem: AbstractFileSystem): """Register an `fsspec` filesystem object with DuckDB. @@ -956,8 +1147,7 @@ def register_filesystem(self, filesystem: AbstractFileSystem): name string band string """ - with self.begin() as con: - con.connection.register_filesystem(filesystem) + self.con.register_filesystem(filesystem) def _run_pre_execute_hooks(self, expr: ir.Expr) -> None: # Warn for any tables depending on RecordBatchReaders that have already @@ -1007,18 +1197,20 @@ def to_pyarrow_batches( ::: """ self._run_pre_execute_hooks(expr) - query_ast = self.compiler.to_ast_ensure_limit(expr, limit, params=params) - sql = query_ast.compile() + table = expr.as_table() + sql = self.compile(table, limit=limit, params=params) - def batch_producer(con): - with con.begin() as c, contextlib.closing(c.execute(sql)) as cur: - yield from cur.cursor.fetch_record_batch(rows_per_batch=chunk_size) + def batch_producer(cur): + yield from cur.fetch_record_batch(rows_per_batch=chunk_size) + # TODO: check that this is still handled correctly # batch_producer keeps the `self.con` member alive long enough to # exhaust the record batch reader, even if the backend or connection # have gone out of scope in the caller + result = self.raw_sql(sql) + return pa.RecordBatchReader.from_batches( - expr.as_table().schema().to_pyarrow(), batch_producer(self.con) + expr.as_table().schema().to_pyarrow(), batch_producer(result) ) def to_pyarrow( @@ -1030,20 +1222,11 @@ def to_pyarrow( **_: Any, ) -> pa.Table: self._run_pre_execute_hooks(expr) - query_ast = self.compiler.to_ast_ensure_limit(expr, limit, params=params) - - # We use `.sql` instead of `.execute` below for performance - in - # certain cases duckdb query -> arrow table can be significantly faster - # in this configuration. Currently `.sql` doesn't support parametrized - # queries, so we need to compile with literal_binds for now. - sql = str( - query_ast.compile().compile( - dialect=self.con.dialect, compile_kwargs={"literal_binds": True} - ) - ) + table = expr.as_table() + sql = self.compile(table, limit=limit, params=params) - with self.begin() as con: - table = con.connection.sql(sql).to_arrow_table() + with self._safe_raw_sql(sql) as cur: + table = cur.fetch_arrow_table() return expr.__pyarrow_result__(table) @@ -1076,7 +1259,7 @@ def to_torch( """ compiled = self.compile(expr, limit=limit, params=params, **kwargs) with self._safe_raw_sql(compiled) as cur: - return cur.connection.connection.torch() + return cur.torch() @util.experimental def to_parquet( @@ -1132,8 +1315,8 @@ def to_parquet( query = self._to_sql(expr, params=params) args = ["FORMAT 'parquet'", *(f"{k.upper()} {v!r}" for k, v in kwargs.items())] copy_cmd = f"COPY ({query}) TO {str(path)!r} ({', '.join(args)})" - with self.begin() as con: - con.exec_driver_sql(copy_cmd) + with self._safe_raw_sql(copy_cmd): + pass @util.experimental def to_csv( @@ -1171,16 +1354,16 @@ def to_csv( *(f"{k.upper()} {v!r}" for k, v in kwargs.items()), ] copy_cmd = f"COPY ({query}) TO {str(path)!r} ({', '.join(args)})" - with self.begin() as con: - con.exec_driver_sql(copy_cmd) + with self._safe_raw_sql(copy_cmd): + pass - def fetch_from_cursor( + def _fetch_from_cursor( self, cursor: duckdb.DuckDBPyConnection, schema: sch.Schema ) -> pd.DataFrame: import pandas as pd import pyarrow.types as pat - table = cursor.cursor.fetch_arrow_table() + table = cursor.fetch_arrow_table() df = pd.DataFrame( { @@ -1198,10 +1381,7 @@ def fetch_from_cursor( for name, col in zip(table.column_names, table.columns) } ) - df = PandasData.convert_table(df, schema) - if not df.empty and geospatial_supported: - return self._to_geodataframe(df, schema) - return df + return DuckDBPandasData.convert_table(df, schema) # TODO(gforsyth): this may not need to be specialized in the future @staticmethod @@ -1224,15 +1404,19 @@ def _to_geodataframe(df, schema): return df def _metadata(self, query: str) -> Iterator[tuple[str, dt.DataType]]: - with self.begin() as con: - rows = con.exec_driver_sql(f"DESCRIBE {query}") + with self._safe_raw_sql(f"DESCRIBE {query}") as cur: + rows = cur.fetch_arrow_table() - for name, type, null in toolz.pluck( - ["column_name", "column_type", "null"], rows.mappings() - ): - nullable = null.lower() == "yes" - ibis_type = DuckDBType.from_string(type, nullable=nullable) - yield name, ibis_type + rows = rows.to_pydict() + + for name, typ, null in zip( + rows["column_name"], rows["column_type"], rows["null"] + ): + yield name, DuckDBType.from_string(typ, nullable=null == "YES") + + def _register_in_memory_tables(self, expr: ir.Expr) -> None: + for memtable in expr.op().find(ops.InMemoryTable): + self._register_in_memory_table(memtable) def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: schema = op.schema @@ -1245,34 +1429,24 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: # only register if we haven't already done so if (name := op.name) not in self.list_tables(): table = op.data.to_pyarrow(schema) - - # register creates a transaction, and we can't nest transactions so - # we create a function to encapsulate the whole shebang - def _register(name, table): - with self.begin() as con: - con.connection.register(name, table) - - _register(name, table) - - def _get_temp_view_definition( - self, name: str, definition: sa.sql.compiler.Compiled - ) -> str: - yield f"CREATE OR REPLACE TEMPORARY VIEW {name} AS {definition}" + table = getattr(table, "obj", table) + self.con.register(name, table) def _register_udfs(self, expr: ir.Expr) -> None: import ibis.expr.operations as ops - with self.con.connect() as con: - for udf_node in expr.op().find(ops.ScalarUDF): - compile_func = getattr( - self, f"_compile_{udf_node.__input_type__.name.lower()}_udf" - ) - with contextlib.suppress(duckdb.InvalidInputException): - con.connection.remove_function(udf_node.__class__.__name__) + con = self.con + + for udf_node in expr.op().find(ops.ScalarUDF): + compile_func = getattr( + self, f"_compile_{udf_node.__input_type__.name.lower()}_udf" + ) + with contextlib.suppress(duckdb.InvalidInputException): + con.remove_function(udf_node.__class__.__name__) - registration_func = compile_func(udf_node) - if registration_func is not None: - registration_func(con) + registration_func = compile_func(udf_node) + if registration_func is not None: + registration_func(con) def _compile_udf(self, udf_node: ops.ScalarUDF) -> None: func = udf_node.__func__ @@ -1284,7 +1458,7 @@ def _compile_udf(self, udf_node: ops.ScalarUDF) -> None: output_type = DuckDBType.to_string(udf_node.dtype) def register_udf(con): - return con.connection.create_function( + return con.create_function( name, func, input_types, @@ -1297,42 +1471,59 @@ def register_udf(con): _compile_python_udf = _compile_udf _compile_pyarrow_udf = _compile_udf + def _compile_builtin_udf(self, udf_node: ops.ScalarUDF) -> None: + """No op.""" + def _compile_pandas_udf(self, _: ops.ScalarUDF) -> None: raise NotImplementedError("duckdb doesn't support pandas UDFs") - def _get_compiled_statement(self, view: sa.Table, definition: sa.sql.Selectable): + def _get_compiled_statement(self, view, definition): # TODO: remove this once duckdb supports CTAS prepared statements return super()._get_compiled_statement( view, definition, compile_kwargs={"literal_binds": True} ) - def _insert_dataframe( - self, table_name: str, df: pd.DataFrame, overwrite: bool - ) -> None: - columns = list(df.columns) - t = sa.table(table_name, *map(sa.column, columns)) - - table_name = self._quote(table_name) - - # the table name df here matters, and *must* match the input variable's - # name because duckdb will look up this name in the outer scope of the - # insert call and pull in that variable's data to scan - source = sa.table("df", *map(sa.column, columns)) - - with self.begin() as con: - if overwrite: - con.execute(t.delete()) - con.execute(t.insert().from_select(columns, sa.select(source))) - - def table( + def insert( self, - name: str, + table_name: str, + obj: pd.DataFrame | ir.Table | list | dict, database: str | None = None, - schema: str | None = None, - ) -> ir.Table: - expr = super().table(name=name, database=database, schema=schema) - # load geospatial only if geo columns - if any(typ.is_geospatial() for typ in expr.op().schema.types): - self.load_extension("spatial") + overwrite: bool = False, + ) -> None: + """Insert data into a table. - return expr + Parameters + ---------- + table_name + The name of the table to which data needs will be inserted + obj + The source data or expression to insert + database + Name of the attached database that the table is located in. + overwrite + If `True` then replace existing contents of table + + Raises + ------ + NotImplementedError + If inserting data from a different database + ValueError + If the type of `obj` isn't supported + """ + table = sg.table(table_name, db=database) + if overwrite: + with self._safe_raw_sql(f"TRUNCATE TABLE {table.sql('duckdb')}"): + pass + + if isinstance(obj, ir.Table): + self._run_pre_execute_hooks(obj) + query = sge.insert( + expression=self.compile(obj), into=table, dialect="duckdb" + ) + with self._safe_raw_sql(query): + pass + else: + self.con.append( + table_name, + obj if isinstance(obj, pd.DataFrame) else pd.DataFrame(obj), + ) diff --git a/ibis/backends/duckdb/compiler.py b/ibis/backends/duckdb/compiler.py index eaac09d0b0a7..8429f1e79621 100644 --- a/ibis/backends/duckdb/compiler.py +++ b/ibis/backends/duckdb/compiler.py @@ -1,79 +1,408 @@ from __future__ import annotations -import sqlalchemy as sa -from sqlalchemy.ext.compiler import compiles +import math +from functools import partial, reduce, singledispatchmethod -import ibis.backends.base.sql.alchemy.datatypes as sat +import sqlglot as sg +import sqlglot.expressions as sge +from public import public + +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt import ibis.expr.operations as ops -from ibis.backends.base.sql.alchemy import AlchemyCompiler, AlchemyExprTranslator -from ibis.backends.base.sql.alchemy.query_builder import _AlchemyTableSetFormatter -from ibis.backends.duckdb.datatypes import DuckDBType -from ibis.backends.duckdb.registry import operation_registry +from ibis.backends.base.sqlglot.compiler import NULL, STAR, SQLGlotCompiler +from ibis.backends.base.sqlglot.datatypes import DuckDBType + +_INTERVAL_SUFFIXES = { + "ms": "milliseconds", + "us": "microseconds", + "s": "seconds", + "m": "minutes", + "h": "hours", + "D": "days", + "M": "months", + "Y": "years", +} -class DuckDBSQLExprTranslator(AlchemyExprTranslator): - _registry = operation_registry - _rewrites = AlchemyExprTranslator._rewrites.copy() - _has_reduction_filter_syntax = True - _supports_tuple_syntax = True - _dialect_name = "duckdb" +@public +class DuckDBCompiler(SQLGlotCompiler): + __slots__ = () + dialect = "duckdb" type_mapper = DuckDBType + def _aggregate(self, funcname: str, *args, where): + expr = self.f[funcname](*args) + if where is not None: + return sge.Filter(this=expr, expression=sge.Where(this=where)) + return expr -@compiles(sat.UInt8, "duckdb") -def compile_uint8(element, compiler, **kw): - return "UTINYINT" + @singledispatchmethod + def visit_node(self, op, **kwargs): + return super().visit_node(op, **kwargs) + @visit_node.register(ops.ArrayDistinct) + def visit_ArrayDistinct(self, op, *, arg): + return self.if_( + arg.is_(NULL), + NULL, + self.f.list_distinct(arg) + + self.if_( + self.f.list_count(arg) < self.f.len(arg), + self.f.array(NULL), + self.f.array(), + ), + ) -@compiles(sat.UInt16, "duckdb") -def compile_uint16(element, compiler, **kw): - return "USMALLINT" + @visit_node.register(ops.ArrayIndex) + def visit_ArrayIndex(self, op, *, arg, index): + return self.f.list_extract(arg, index + self.cast(index >= 0, op.index.dtype)) + @visit_node.register(ops.ArrayRepeat) + def visit_ArrayRepeat(self, op, *, arg, times): + func = sge.Lambda(this=arg, expressions=[sg.to_identifier("_")]) + return self.f.flatten(self.f.list_apply(self.f.range(times), func)) -@compiles(sat.UInt32, "duckdb") -def compile_uint32(element, compiler, **kw): - return "UINTEGER" + @visit_node.register(ops.Sample) + def visit_Sample( + self, op, *, parent, fraction: float, method: str, seed: int | None, **_ + ): + sample = sge.TableSample( + this=parent, + method="bernoulli" if method == "row" else "system", + percent=sge.convert(fraction * 100.0), + seed=None if seed is None else sge.convert(seed), + ) + return sg.select(STAR).from_(sample) + @visit_node.register(ops.ArraySlice) + def visit_ArraySlice(self, op, *, arg, start, stop): + arg_length = self.f.len(arg) -@compiles(sat.UInt64, "duckdb") -def compile_uint(element, compiler, **kw): - return "UBIGINT" + if start is None: + start = 0 + else: + start = self.f.least(arg_length, self._neg_idx_to_pos(arg, start)) + if stop is None: + stop = arg_length + else: + stop = self._neg_idx_to_pos(arg, stop) -@compiles(sat.ArrayType, "duckdb") -def compile_array(element, compiler, **kw): - if isinstance(value_type := element.value_type, sa.types.NullType): - # duckdb infers empty arrays with no other context as array - typ = "INTEGER" - else: - typ = compiler.process(value_type, **kw) - return f"{typ}[]" + return self.f.list_slice(arg, start + 1, stop) + + @visit_node.register(ops.ArrayMap) + def visit_ArrayMap(self, op, *, arg, body, param): + lamduh = sge.Lambda(this=body, expressions=[sg.to_identifier(param)]) + return self.f.list_apply(arg, lamduh) + + @visit_node.register(ops.ArrayFilter) + def visit_ArrayFilter(self, op, *, arg, body, param): + lamduh = sge.Lambda(this=body, expressions=[sg.to_identifier(param)]) + return self.f.list_filter(arg, lamduh) + + @visit_node.register(ops.ArrayIntersect) + def visit_ArrayIntersect(self, op, *, left, right): + param = sg.to_identifier("x") + body = self.f.list_contains(right, param) + lamduh = sge.Lambda(this=body, expressions=[param]) + return self.f.list_filter(left, lamduh) + + @visit_node.register(ops.ArrayRemove) + def visit_ArrayRemove(self, op, *, arg, other): + param = sg.to_identifier("x") + body = param.neq(other) + lamduh = sge.Lambda(this=body, expressions=[param]) + return self.f.list_filter(arg, lamduh) + + @visit_node.register(ops.ArrayUnion) + def visit_ArrayUnion(self, op, *, left, right): + arg = self.f.list_concat(left, right) + return self.if_( + arg.is_(NULL), + NULL, + self.f.list_distinct(arg) + + self.if_( + self.f.list_count(arg) < self.f.len(arg), + self.f.array(NULL), + self.f.array(), + ), + ) + + @visit_node.register(ops.ArrayZip) + def visit_ArrayZip(self, op, *, arg): + i = sg.to_identifier("i") + body = sge.Struct.from_arg_list( + [ + sge.Slice(this=k, expression=v[i]) + for k, v in zip(map(sge.convert, op.dtype.value_type.names), arg) + ] + ) + func = sge.Lambda(this=body, expressions=[i]) + return self.f.list_apply( + self.f.range( + 1, + # DuckDB Range excludes upper bound + self.f.greatest(*map(self.f.len, arg)) + 1, + ), + func, + ) + + @visit_node.register(ops.MapGet) + def visit_MapGet(self, op, *, arg, key, default): + return self.f.ifnull( + self.f.list_extract(self.f.element_at(arg, key), 1), default + ) + + @visit_node.register(ops.MapContains) + def visit_MapContains(self, op, *, arg, key): + return self.f.len(self.f.element_at(arg, key)).neq(0) + + @visit_node.register(ops.ToJSONMap) + @visit_node.register(ops.ToJSONArray) + def visit_ToJSONMap(self, op, *, arg): + return sge.TryCast(this=arg, to=self.type_mapper.from_ibis(op.dtype)) + + @visit_node.register(ops.ArrayConcat) + def visit_ArrayConcat(self, op, *, arg): + # TODO(cpcloud): map ArrayConcat to this in sqlglot instead of here + return reduce(self.f.list_concat, arg) + + @visit_node.register(ops.IntervalFromInteger) + def visit_IntervalFromInteger(self, op, *, arg, unit): + if unit.short == "ns": + raise com.UnsupportedOperationError( + f"{self.dialect} doesn't support nanosecond interval resolutions" + ) + + if unit.singular == "week": + return self.f.to_days(arg * 7) + return self.f[f"to_{unit.plural}"](arg) + + @visit_node.register(ops.FindInSet) + def visit_FindInSet(self, op, *, needle, values): + return self.f.list_indexof(self.f.array(*values), needle) + + @visit_node.register(ops.CountDistinctStar) + def visit_CountDistinctStar(self, op, *, where, arg): + # use a tuple because duckdb doesn't accept COUNT(DISTINCT a, b, c, ...) + # + # this turns the expression into COUNT(DISTINCT (a, b, c, ...)) + row = sge.Tuple( + expressions=list( + map(partial(sg.column, quoted=self.quoted), op.arg.schema.keys()) + ) + ) + return self.agg.count(sge.Distinct(expressions=[row]), where=where) + @visit_node.register(ops.StringJoin) + def visit_StringJoin(self, op, *, arg, sep): + return self.f.list_aggr(self.f.array(*arg), "string_agg", sep) -rewrites = DuckDBSQLExprTranslator.rewrites + @visit_node.register(ops.ExtractMillisecond) + def visit_ExtractMillisecond(self, op, *, arg): + return self.f.mod(self.f.extract("ms", arg), 1_000) + # DuckDB extracts subminute microseconds and milliseconds + # so we have to finesse it a little bit + @visit_node.register(ops.ExtractMicrosecond) + def visit_ExtractMicrosecond(self, op, *, arg): + return self.f.mod(self.f.extract("us", arg), 1_000_000) -@rewrites(ops.Any) -@rewrites(ops.All) -@rewrites(ops.StringContains) -def _no_op(expr): - return expr + @visit_node.register(ops.TimestampFromUNIX) + def visit_TimestampFromUNIX(self, op, *, arg, unit): + unit = unit.short + if unit == "ms": + return self.f.epoch_ms(arg) + elif unit == "s": + return sge.UnixToTime(this=arg) + else: + raise com.UnsupportedOperationError(f"{unit!r} unit is not supported!") + + @visit_node.register(ops.TimestampFromYMDHMS) + def visit_TimestampFromYMDHMS( + self, op, *, year, month, day, hours, minutes, seconds, **_ + ): + args = [year, month, day, hours, minutes, seconds] + + func = "make_timestamp" + if (timezone := op.dtype.timezone) is not None: + func += "tz" + args.append(timezone) + + return self.f[func](*args) + + @visit_node.register(ops.Cast) + def visit_Cast(self, op, *, arg, to): + if to.is_interval(): + func = self.f[f"to_{_INTERVAL_SUFFIXES[to.unit.short]}"] + return func(sg.cast(arg, to=self.type_mapper.from_ibis(dt.int32))) + elif to.is_timestamp() and op.arg.dtype.is_integer(): + return self.f.to_timestamp(arg) + + return self.cast(arg, to) + + @visit_node.register(ops.Literal) + def visit_Literal(self, op, *, value, dtype, **kw): + if value is None: + return super().visit_node(op, value=value, dtype=dtype, **kw) + elif dtype.is_interval(): + if dtype.unit.short == "ns": + raise com.UnsupportedOperationError( + f"{self.dialect} doesn't support nanosecond interval resolutions" + ) + return sge.Interval( + this=sge.convert(str(value)), unit=dtype.resolution.upper() + ) + elif dtype.is_uuid(): + return self.cast(str(value), dtype) + elif dtype.is_binary(): + return self.cast("".join(map("\\x{:02x}".format, value)), dtype) + elif dtype.is_numeric(): + # cast non finite values to float because that's the behavior of + # duckdb when a mixed decimal/float operation is performed + # + # float will be upcast to double if necessary by duckdb + if not math.isfinite(value): + return self.cast( + str(value), to=dt.float32 if dtype.is_decimal() else dtype + ) + return self.cast(value, dtype) + elif dtype.is_time(): + return self.f.make_time( + value.hour, value.minute, value.second + value.microsecond / 1e6 + ) + elif dtype.is_timestamp(): + args = [ + value.year, + value.month, + value.day, + value.hour, + value.minute, + value.second + value.microsecond / 1e6, + ] -class DuckDBTableSetFormatter(_AlchemyTableSetFormatter): - def _format_sample(self, op, table): - if op.method == "row": - method = sa.func.bernoulli + funcname = "make_timestamp" + + if (tz := dtype.timezone) is not None: + funcname += "tz" + args.append(tz) + + return self.f[funcname](*args) else: - method = sa.func.system - return table.tablesample( - sampling=method(sa.literal_column(f"{op.fraction * 100} PERCENT")), - seed=(None if op.seed is None else sa.literal_column(str(op.seed))), + return super().visit_node(op, value=value, dtype=dtype, **kw) + + @visit_node.register(ops.Capitalize) + def visit_Capitalize(self, op, *, arg): + return self.f.concat( + self.f.upper(self.f.substr(arg, 1, 1)), self.f.lower(self.f.substr(arg, 2)) ) + def _neg_idx_to_pos(self, array, idx): + arg_length = self.f.array_size(array) + return self.if_( + idx >= 0, + idx, + # Need to have the greatest here to handle the case where + # abs(neg_index) > arg_length + # e.g. where the magnitude of the negative index is greater than the + # length of the array + # You cannot index a[:-3] if a = [1, 2] + arg_length + self.f.greatest(idx, -arg_length), + ) + + @visit_node.register(ops.Correlation) + def visit_Correlation(self, op, *, left, right, how, where): + if how == "sample": + raise com.UnsupportedOperationError( + f"{self.dialect} only implements `pop` correlation coefficient" + ) + + # TODO: rewrite rule? + if (left_type := op.left.dtype).is_boolean(): + left = self.cast(left, dt.Int32(nullable=left_type.nullable)) + + if (right_type := op.right.dtype).is_boolean(): + right = self.cast(right, dt.Int32(nullable=right_type.nullable)) + + return self.agg.corr(left, right, where=where) + + @visit_node.register(ops.GeoConvert) + def visit_GeoConvert(self, op, *, arg, source, target): + # 4th argument is to specify that the result is always_xy so that it + # matches the behavior of the equivalent geopandas functionality + return self.f.st_transform(arg, source, target, True) + + +_SIMPLE_OPS = { + ops.ArrayPosition: "list_indexof", + ops.BitAnd: "bit_and", + ops.BitOr: "bit_or", + ops.BitXor: "bit_xor", + ops.EndsWith: "suffix", + ops.Hash: "hash", + ops.IntegerRange: "range", + ops.TimestampRange: "range", + ops.LPad: "lpad", + ops.MapKeys: "map_keys", + ops.MapLength: "cardinality", + ops.MapMerge: "map_concat", + ops.MapValues: "map_values", + ops.Mode: "mode", + ops.RPad: "rpad", + ops.StringAscii: "ascii", + ops.TimeFromHMS: "make_time", + ops.TypeOf: "typeof", + ops.GeoPoint: "st_point", + ops.GeoAsText: "st_astext", + ops.GeoArea: "st_area", + ops.GeoBuffer: "st_buffer", + ops.GeoCentroid: "st_centroid", + ops.GeoContains: "st_contains", + ops.GeoCovers: "st_covers", + ops.GeoCoveredBy: "st_coveredby", + ops.GeoCrosses: "st_crosses", + ops.GeoDifference: "st_difference", + ops.GeoDisjoint: "st_disjoint", + ops.GeoDistance: "st_distance", + ops.GeoDWithin: "st_dwithin", + ops.GeoEndPoint: "st_endpoint", + ops.GeoEnvelope: "st_envelope", + ops.GeoEquals: "st_equals", + ops.GeoGeometryType: "st_geometrytype", + ops.GeoIntersection: "st_intersection", + ops.GeoIntersects: "st_intersects", + ops.GeoIsValid: "st_isvalid", + ops.GeoLength: "st_length", + ops.GeoNPoints: "st_npoints", + ops.GeoOverlaps: "st_overlaps", + ops.GeoStartPoint: "st_startpoint", + ops.GeoTouches: "st_touches", + ops.GeoUnion: "st_union", + ops.GeoUnaryUnion: "st_union_agg", + ops.GeoWithin: "st_within", + ops.GeoX: "st_x", + ops.GeoY: "st_y", +} + + +for _op, _name in _SIMPLE_OPS.items(): + assert isinstance(type(_op), type), type(_op) + if issubclass(_op, ops.Reduction): + + @DuckDBCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, where, **kw): + return self.agg[_name](*kw.values(), where=where) + + else: + + @DuckDBCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, **kw): + return self.f[_name](*kw.values()) + + setattr(DuckDBCompiler, f"visit_{_op.__name__}", _fmt) + -class DuckDBSQLCompiler(AlchemyCompiler): - cheap_in_memory_tables = True - translator_class = DuckDBSQLExprTranslator - table_set_formatter_class = DuckDBTableSetFormatter +del _op, _name, _fmt diff --git a/ibis/backends/duckdb/datatypes.py b/ibis/backends/duckdb/datatypes.py index 603051e53daa..a4277ca82760 100644 --- a/ibis/backends/duckdb/datatypes.py +++ b/ibis/backends/duckdb/datatypes.py @@ -1,74 +1,11 @@ from __future__ import annotations -import duckdb_engine.datatypes as ducktypes -import sqlalchemy.dialects.postgresql as psql +import numpy as np -import ibis.expr.datatypes as dt -from ibis.backends.base.sql.alchemy.datatypes import AlchemyType -from ibis.backends.base.sqlglot.datatypes import DuckDBType as SqlglotDuckdbType +from ibis.formats.pandas import PandasData -try: - from geoalchemy2 import Geometry - class Geometry_WKB(Geometry): - as_binary = "ST_AsWKB" - -except ImportError: - - class Geometry_WKB: - ... - - -_from_duckdb_types = { - psql.BYTEA: dt.Binary, - psql.UUID: dt.UUID, - ducktypes.TinyInteger: dt.Int8, - ducktypes.SmallInteger: dt.Int16, - ducktypes.Integer: dt.Int32, - ducktypes.BigInteger: dt.Int64, - ducktypes.HugeInteger: dt.Decimal(38, 0), - ducktypes.UInt8: dt.UInt8, - ducktypes.UTinyInteger: dt.UInt8, - ducktypes.UInt16: dt.UInt16, - ducktypes.USmallInteger: dt.UInt16, - ducktypes.UInt32: dt.UInt32, - ducktypes.UInteger: dt.UInt32, - ducktypes.UInt64: dt.UInt64, - ducktypes.UBigInteger: dt.UInt64, -} - -_to_duckdb_types = { - dt.UUID: psql.UUID, - dt.Int8: ducktypes.TinyInteger, - dt.Int16: ducktypes.SmallInteger, - dt.Int32: ducktypes.Integer, - dt.Int64: ducktypes.BigInteger, - dt.UInt8: ducktypes.UTinyInteger, - dt.UInt16: ducktypes.USmallInteger, - dt.UInt32: ducktypes.UInteger, - dt.UInt64: ducktypes.UBigInteger, - # Handle projections with geometry columns - dt.Geometry: Geometry_WKB, -} - - -class DuckDBType(AlchemyType): - dialect = "duckdb" - - @classmethod - def to_ibis(cls, typ, nullable=True): - if dtype := _from_duckdb_types.get(type(typ)): - return dtype(nullable=nullable) - else: - return super().to_ibis(typ, nullable=nullable) - - @classmethod - def from_ibis(cls, dtype): - if typ := _to_duckdb_types.get(type(dtype)): - return typ - else: - return super().from_ibis(dtype) - - @classmethod - def from_string(cls, type_string, nullable=True): - return SqlglotDuckdbType.from_string(type_string, nullable=nullable) +class DuckDBPandasData(PandasData): + @staticmethod + def convert_Array(s, dtype, pandas_type): + return s.replace(np.nan, None) diff --git a/ibis/backends/duckdb/registry.py b/ibis/backends/duckdb/registry.py deleted file mode 100644 index 7b675acd369c..000000000000 --- a/ibis/backends/duckdb/registry.py +++ /dev/null @@ -1,585 +0,0 @@ -from __future__ import annotations - -import operator -from functools import partial -from typing import TYPE_CHECKING, Any - -import numpy as np -import sqlalchemy as sa -from sqlalchemy.ext.compiler import compiles -from sqlalchemy.sql.functions import GenericFunction - -import ibis.expr.operations as ops -from ibis.backends.base.sql import alchemy -from ibis.backends.base.sql.alchemy import unary -from ibis.backends.base.sql.alchemy.registry import ( - _table_column, - array_filter, - array_map, - geospatial_functions, - reduction, - try_cast, -) -from ibis.backends.duckdb.datatypes import Geometry_WKB -from ibis.backends.postgres.registry import ( - _array_index, - _array_slice, - fixed_arity, - operation_registry, -) -from ibis.common.exceptions import UnsupportedOperationError - -if TYPE_CHECKING: - from collections.abc import Mapping - - from ibis.backends.base.sql.alchemy.datatypes import StructType - -operation_registry = { - op: operation_registry[op] - for op in operation_registry.keys() - geospatial_functions.keys() -} - - -def _round(t, op): - arg, digits = op.args - sa_arg = t.translate(arg) - - if digits is None: - return sa.func.round(sa_arg) - - return sa.func.round(sa_arg, t.translate(digits)) - - -_LOG_BASE_FUNCS = { - 2: sa.func.log2, - 10: sa.func.log, -} - - -def _centroid(t, op): - arg = t.translate(op.arg) - return sa.func.st_centroid(arg, type_=Geometry_WKB) - - -def _geo_end_point(t, op): - arg = t.translate(op.arg) - return sa.func.st_endpoint(arg, type_=Geometry_WKB) - - -def _geo_start_point(t, op): - arg = t.translate(op.arg) - return sa.func.st_startpoint(arg, type_=Geometry_WKB) - - -def _envelope(t, op): - arg = t.translate(op.arg) - return sa.func.st_envelope(arg, type_=Geometry_WKB) - - -def _geo_buffer(t, op): - arg = t.translate(op.arg) - radius = t.translate(op.radius) - return sa.func.st_buffer(arg, radius, type_=Geometry_WKB) - - -def _geo_unary_union(t, op): - arg = t.translate(op.arg) - return sa.func.st_union_agg(arg, type_=Geometry_WKB) - - -def _geo_point(t, op): - left = t.translate(op.left) - right = t.translate(op.right) - return sa.func.st_point(left, right, type_=Geometry_WKB) - - -def _geo_difference(t, op): - left = t.translate(op.left) - right = t.translate(op.right) - return sa.func.st_difference(left, right, type_=Geometry_WKB) - - -def _geo_intersection(t, op): - left = t.translate(op.left) - right = t.translate(op.right) - return sa.func.st_intersection(left, right, type_=Geometry_WKB) - - -def _geo_union(t, op): - left = t.translate(op.left) - right = t.translate(op.right) - return sa.func.st_union(left, right, type_=Geometry_WKB) - - -def _geo_convert(t, op): - arg = t.translate(op.arg) - source = op.source - target = op.target - - # sa.true() setting always_xy=True - return sa.func.st_transform(arg, source, target, sa.true(), type_=Geometry_WKB) - - -def _generic_log(arg, base, *, type_): - return sa.func.ln(arg, type_=type_) / sa.func.ln(base, type_=type_) - - -def _log(t, op): - arg, base = op.args - sqla_type = t.get_sqla_type(op.dtype) - sa_arg = t.translate(arg) - if base is not None: - sa_base = t.translate(base) - try: - base_value = sa_base.value - except AttributeError: - return _generic_log(sa_arg, sa_base, type_=sqla_type) - else: - func = _LOG_BASE_FUNCS.get(base_value, _generic_log) - return func(sa_arg, type_=sqla_type) - return sa.func.ln(sa_arg, type_=sqla_type) - - -def _timestamp_from_unix(t, op): - arg, unit = op.args - arg = t.translate(arg) - - if unit.short == "ms": - return sa.func.epoch_ms(arg) - elif unit.short == "s": - return sa.func.to_timestamp(arg) - else: - raise UnsupportedOperationError(f"{unit!r} unit is not supported!") - - -def _timestamp_bucket(t, op): - arg = t.translate(op.arg) - interval = t.translate(op.interval) - - origin = sa.literal_column("'epoch'::TIMESTAMP") - - if op.offset is not None: - origin += t.translate(op.offset) - return sa.func.time_bucket(interval, arg, origin) - - -class struct_pack(GenericFunction): - def __init__(self, values: Mapping[str, Any], *, type: StructType) -> None: - super().__init__() - self.values = values - self.type = type - - -@compiles(struct_pack, "duckdb") -def compiles_struct_pack(element, compiler, **kw): - quote = compiler.preparer.quote - args = ", ".join( - f"{quote(key)} := {compiler.process(value, **kw)}" - for key, value in element.values.items() - ) - return f"struct_pack({args})" - - -def _literal(t, op): - dtype = op.dtype - value = op.value - - if value is None: - return ( - sa.null() if dtype.is_null() else sa.cast(sa.null(), t.get_sqla_type(dtype)) - ) - - sqla_type = t.get_sqla_type(dtype) - - if dtype.is_interval(): - return getattr(sa.func, f"to_{dtype.unit.plural}")(value) - elif dtype.is_array(): - values = value.tolist() if isinstance(value, np.ndarray) else value - return sa.cast(sa.func.list_value(*values), sqla_type) - elif dtype.is_floating(): - if not np.isfinite(value): - if np.isnan(value): - value = "NaN" - else: - assert np.isinf(value), "value is neither finite, nan nor infinite" - prefix = "-" * (value < 0) - value = f"{prefix}Inf" - return sa.cast(sa.literal(value), sqla_type) - elif dtype.is_struct(): - return struct_pack( - { - key: t.translate(ops.Literal(val, dtype=dtype[key])) - for key, val in value.items() - }, - type=sqla_type, - ) - elif dtype.is_string(): - return sa.literal(value) - elif dtype.is_map(): - return sa.func.map( - sa.func.list_value(*value.keys()), sa.func.list_value(*value.values()) - ) - elif dtype.is_timestamp(): - return sa.cast(sa.literal(value.isoformat()), t.get_sqla_type(dtype)) - elif dtype.is_date(): - return sa.func.make_date(value.year, value.month, value.day) - elif dtype.is_time(): - return sa.func.make_time( - value.hour, value.minute, value.second + value.microsecond / 1e6 - ) - else: - return sa.cast(sa.literal(value), sqla_type) - - -if_ = getattr(sa.func, "if") - - -def _neg_idx_to_pos(array, idx): - arg_length = sa.func.array_length(array) - return if_(idx < 0, arg_length + sa.func.greatest(idx, -arg_length), idx) - - -def _regex_extract(string, pattern, index): - return sa.func.regexp_extract( - string, - pattern, - # DuckDB requires the index to be a constant, so we compile - # the value and inline it by using sa.text - sa.text(str(index.compile(compile_kwargs=dict(literal_binds=True)))), - ) - - -def _json_get_item(left, path): - # Workaround for https://github.com/duckdb/duckdb/issues/5063 - # In some situations duckdb silently does the wrong thing if - # the path is parametrized. - sa_path = sa.text(str(path.compile(compile_kwargs=dict(literal_binds=True)))) - return left.op("->")(sa_path) - - -def _strftime(t, op): - if not isinstance(op.format_str, ops.Literal): - raise UnsupportedOperationError( - f"DuckDB format_str must be a literal `str`; got {type(op.format_str)}" - ) - return sa.func.strftime(t.translate(op.arg), t.translate(op.format_str)) - - -def _strptime(t, op): - if not isinstance(op.format_str, ops.Literal): - raise UnsupportedOperationError( - f"DuckDB format_str must be a literal `str`; got {type(op.format_str)}" - ) - return sa.cast( - sa.func.strptime(t.translate(op.arg), t.translate(op.format_str)), - t.get_sqla_type(op.dtype), - ) - - -def _arbitrary(t, op): - if (how := op.how) == "heavy": - raise UnsupportedOperationError( - f"how={how!r} not supported in the DuckDB backend" - ) - return t._reduction(getattr(sa.func, how), op) - - -def _string_agg(t, op): - if not isinstance(op.sep, ops.Literal): - raise UnsupportedOperationError( - "Separator argument to group_concat operation must be a constant" - ) - agg = sa.func.string_agg(t.translate(op.arg), sa.text(repr(op.sep.value))) - if (where := op.where) is not None: - return agg.filter(t.translate(where)) - return agg - - -def _struct_column(t, op): - return struct_pack( - dict(zip(op.names, map(t.translate, op.values))), - type=t.get_sqla_type(op.dtype), - ) - - -@compiles(array_map, "duckdb") -def compiles_list_apply(element, compiler, **kw): - *args, signature, result = map(partial(compiler.process, **kw), element.clauses) - return f"list_apply({', '.join(args)}, {signature} -> {result})" - - -def _array_map(t, op): - return array_map( - t.translate(op.arg), sa.literal_column(f"({op.param})"), t.translate(op.body) - ) - - -@compiles(array_filter, "duckdb") -def compiles_list_filter(element, compiler, **kw): - *args, signature, result = map(partial(compiler.process, **kw), element.clauses) - return f"list_filter({', '.join(args)}, {signature} -> {result})" - - -def _array_filter(t, op): - return array_filter( - t.translate(op.arg), sa.literal_column(f"({op.param})"), t.translate(op.body) - ) - - -def _array_intersect(t, op): - name = "x" - parameter = ops.Argument( - name=name, shape=op.left.shape, dtype=op.left.dtype.value_type - ) - return t.translate( - ops.ArrayFilter( - op.left, param=parameter.param, body=ops.ArrayContains(op.right, parameter) - ) - ) - - -def _array_zip(t, op): - args = tuple(map(t.translate, op.arg)) - - i = sa.literal_column("i", type_=sa.INTEGER) - dtype = op.dtype - return array_map( - sa.func.range(1, sa.func.greatest(*map(sa.func.array_length, args)) + 1), - i, - struct_pack( - { - name: sa.func.list_extract(arg, i) - for name, arg in zip(dtype.value_type.names, args) - }, - type=t.get_sqla_type(dtype), - ), - ) - - -@compiles(try_cast, "duckdb") -def compiles_try_cast(element, compiler, **kw): - return "TRY_CAST({} AS {})".format( - compiler.process(element.clauses.clauses[0], **kw), - compiler.visit_typeclause(element), - ) - - -def _try_cast(t, op): - arg = t.translate(op.arg) - to = t.get_sqla_type(op.to) - return try_cast(arg, type_=to) - - -_temporal_delta = fixed_arity( - lambda part, start, end: sa.func.date_diff(part, end, start), 3 -) - - -def _to_json_collection(t, op): - typ = t.get_sqla_type(op.dtype) - return try_cast(t.translate(op.arg), typ, type_=typ) - - -def _array_remove(t, op): - arg = op.arg - param = ops.Argument(name="x", shape=arg.shape, dtype=arg.dtype.value_type) - return _array_filter( - t, - ops.ArrayFilter(arg, param=param.param, body=ops.NotEquals(param, op.other)), - ) - - -operation_registry.update( - { - ops.ArrayColumn: ( - lambda t, op: sa.cast( - sa.func.list_value(*map(t.translate, op.cols)), - t.get_sqla_type(op.dtype), - ) - ), - ops.TryCast: _try_cast, - ops.ArrayRepeat: fixed_arity( - lambda arg, times: sa.func.flatten( - sa.func.array( - sa.select(arg).select_from(sa.func.range(times)).scalar_subquery() - ) - ), - 2, - ), - ops.ArrayLength: unary(sa.func.array_length), - ops.ArraySlice: _array_slice( - index_converter=_neg_idx_to_pos, - array_length=sa.func.array_length, - func=sa.func.list_slice, - ), - ops.ArrayIndex: _array_index( - index_converter=_neg_idx_to_pos, func=sa.func.list_extract - ), - ops.ArrayMap: _array_map, - ops.ArrayFilter: _array_filter, - ops.ArrayContains: fixed_arity(sa.func.list_has, 2), - ops.ArrayPosition: fixed_arity( - lambda lst, el: sa.func.list_indexof(lst, el) - 1, 2 - ), - ops.ArrayDistinct: fixed_arity( - lambda arg: if_( - arg.is_(sa.null()), - sa.null(), - # append a null if the input array has a null - sa.func.list_distinct(arg) - + if_( - # list_count doesn't count nulls - sa.func.list_count(arg) < sa.func.array_length(arg), - sa.func.list_value(sa.null()), - sa.func.list_value(), - ), - ), - 1, - ), - ops.ArraySort: fixed_arity(sa.func.list_sort, 1), - ops.ArrayRemove: _array_remove, - ops.ArrayUnion: lambda t, op: t.translate( - ops.ArrayDistinct(ops.ArrayConcat((op.left, op.right))) - ), - ops.ArrayZip: _array_zip, - ops.DayOfWeekName: unary(sa.func.dayname), - ops.Literal: _literal, - ops.Log2: unary(sa.func.log2), - ops.Ln: unary(sa.func.ln), - ops.Log: _log, - ops.IsNan: unary(sa.func.isnan), - ops.Modulus: fixed_arity(operator.mod, 2), - ops.Round: _round, - ops.StructField: ( - lambda t, op: sa.func.struct_extract( - t.translate(op.arg), - sa.text(repr(op.field)), - type_=t.get_sqla_type(op.dtype), - ) - ), - ops.TableColumn: _table_column, - ops.TimestampFromUNIX: _timestamp_from_unix, - ops.TimestampBucket: _timestamp_bucket, - ops.TimestampNow: fixed_arity( - # duckdb 0.6.0 changes now to be a timestamp with time zone force - # it back to the original for backwards compatibility - lambda *_: sa.cast(sa.func.now(), sa.TIMESTAMP), - 0, - ), - ops.RegexExtract: fixed_arity(_regex_extract, 3), - ops.RegexReplace: fixed_arity( - lambda *args: sa.func.regexp_replace(*args, sa.text("'g'")), 3 - ), - ops.RegexSearch: fixed_arity(sa.func.regexp_matches, 2), - ops.StringContains: fixed_arity(sa.func.contains, 2), - ops.ApproxMedian: reduction( - # without inline text, duckdb fails with - # RuntimeError: INTERNAL Error: Invalid PhysicalType for GetTypeIdSize - lambda arg: sa.func.approx_quantile(arg, sa.text(str(0.5))) - ), - ops.ApproxCountDistinct: reduction(sa.func.approx_count_distinct), - ops.Mode: reduction(sa.func.mode), - ops.Strftime: _strftime, - ops.Arbitrary: _arbitrary, - ops.GroupConcat: _string_agg, - ops.StructColumn: _struct_column, - ops.ArgMin: reduction(sa.func.min_by), - ops.ArgMax: reduction(sa.func.max_by), - ops.BitwiseXor: fixed_arity(sa.func.xor, 2), - ops.JSONGetItem: fixed_arity(_json_get_item, 2), - ops.RowID: lambda *_: sa.literal_column("rowid"), - ops.StringToTimestamp: _strptime, - ops.Quantile: lambda t, op: ( - reduction(sa.func.quantile_cont)(t, op) - if op.arg.dtype.is_numeric() - else reduction(sa.func.quantile_disc)(t, op) - ), - ops.MultiQuantile: lambda t, op: ( - reduction(sa.func.quantile_cont)(t, op) - if op.arg.dtype.is_numeric() - else reduction(sa.func.quantile_disc)(t, op) - ), - ops.TypeOf: unary(sa.func.typeof), - ops.IntervalAdd: fixed_arity(operator.add, 2), - ops.IntervalSubtract: fixed_arity(operator.sub, 2), - ops.Capitalize: alchemy.sqlalchemy_operation_registry[ops.Capitalize], - ops.ArrayStringJoin: fixed_arity( - lambda sep, arr: sa.func.array_aggr(arr, sa.text("'string_agg'"), sep), 2 - ), - ops.StartsWith: fixed_arity(sa.func.prefix, 2), - ops.EndsWith: fixed_arity(sa.func.suffix, 2), - ops.Argument: lambda _, op: sa.literal_column(op.param), - ops.Unnest: unary(sa.func.unnest), - ops.MapGet: fixed_arity( - lambda arg, key, default: sa.func.coalesce( - sa.func.list_extract(sa.func.element_at(arg, key), 1), default - ), - 3, - ), - ops.Map: fixed_arity(sa.func.map, 2), - ops.MapContains: fixed_arity( - lambda arg, key: sa.func.array_length(sa.func.element_at(arg, key)) != 0, 2 - ), - ops.MapLength: unary(sa.func.cardinality), - ops.MapKeys: unary(sa.func.map_keys), - ops.MapValues: unary(sa.func.map_values), - ops.MapMerge: fixed_arity(sa.func.map_concat, 2), - ops.Hash: unary(sa.func.hash), - ops.Median: reduction(sa.func.median), - ops.First: reduction(sa.func.first), - ops.Last: reduction(sa.func.last), - ops.ArrayIntersect: _array_intersect, - ops.TimeDelta: _temporal_delta, - ops.DateDelta: _temporal_delta, - ops.TimestampDelta: _temporal_delta, - ops.ToJSONMap: _to_json_collection, - ops.ToJSONArray: _to_json_collection, - ops.ArrayFlatten: unary(sa.func.flatten), - ops.IntegerRange: fixed_arity(sa.func.range, 3), - # geospatial - ops.GeoPoint: _geo_point, - ops.GeoAsText: unary(sa.func.ST_AsText), - ops.GeoArea: unary(sa.func.ST_Area), - ops.GeoBuffer: _geo_buffer, - ops.GeoCentroid: _centroid, - ops.GeoContains: fixed_arity(sa.func.ST_Contains, 2), - ops.GeoCovers: fixed_arity(sa.func.ST_Covers, 2), - ops.GeoCoveredBy: fixed_arity(sa.func.ST_CoveredBy, 2), - ops.GeoCrosses: fixed_arity(sa.func.ST_Crosses, 2), - ops.GeoDifference: _geo_difference, - ops.GeoDisjoint: fixed_arity(sa.func.ST_Disjoint, 2), - ops.GeoDistance: fixed_arity(sa.func.ST_Distance, 2), - ops.GeoDWithin: fixed_arity(sa.func.ST_DWithin, 3), - ops.GeoEndPoint: _geo_end_point, - ops.GeoEnvelope: _envelope, - ops.GeoEquals: fixed_arity(sa.func.ST_Equals, 2), - ops.GeoGeometryType: unary(sa.func.ST_GeometryType), - ops.GeoIntersection: _geo_intersection, - ops.GeoIntersects: fixed_arity(sa.func.ST_Intersects, 2), - ops.GeoIsValid: unary(sa.func.ST_IsValid), - ops.GeoLength: unary(sa.func.ST_Length), - ops.GeoNPoints: unary(sa.func.ST_NPoints), - ops.GeoOverlaps: fixed_arity(sa.func.ST_Overlaps, 2), - ops.GeoStartPoint: _geo_start_point, - ops.GeoTouches: fixed_arity(sa.func.ST_Touches, 2), - ops.GeoUnion: _geo_union, - ops.GeoUnaryUnion: _geo_unary_union, - ops.GeoWithin: fixed_arity(sa.func.ST_Within, 2), - ops.GeoX: unary(sa.func.ST_X), - ops.GeoY: unary(sa.func.ST_Y), - ops.GeoConvert: _geo_convert, - # other ops - ops.TimestampRange: fixed_arity(sa.func.range, 3), - ops.RegexSplit: fixed_arity(sa.func.str_split_regex, 2), - } -) - - -_invalid_operations = { - # ibis.expr.operations.strings - ops.Translate, -} - -operation_registry = { - k: v for k, v in operation_registry.items() if k not in _invalid_operations -} diff --git a/ibis/backends/duckdb/tests/conftest.py b/ibis/backends/duckdb/tests/conftest.py index c16b69ffb5ce..cffe154a5157 100644 --- a/ibis/backends/duckdb/tests/conftest.py +++ b/ibis/backends/duckdb/tests/conftest.py @@ -7,10 +7,11 @@ import ibis from ibis.backends.conftest import TEST_TABLES from ibis.backends.tests.base import BackendTest -from ibis.conftest import SANDBOXED +from ibis.conftest import SANDBOXED, WINDOWS if TYPE_CHECKING: from collections.abc import Iterator + from typing import Any from ibis.backends.base import BaseBackend @@ -36,7 +37,7 @@ class TestConf(BackendTest): supports_map = True - deps = "duckdb", "duckdb_engine" + deps = ("duckdb",) stateful = False supports_tpch = True @@ -48,8 +49,6 @@ def preload(self): @property def ddl_script(self) -> Iterator[str]: - from ibis.backends.base.sql.alchemy.geospatial import geospatial_supported - parquet_dir = self.data_dir / "parquet" geojson_dir = self.data_dir / "geojson" for table in TEST_TABLES: @@ -59,7 +58,7 @@ def ddl_script(self) -> Iterator[str]: SELECT * FROM read_parquet('{parquet_dir / f'{table}.parquet'}') """ ) - if geospatial_supported and not SANDBOXED: + if not SANDBOXED: for table in TEST_TABLES_GEO: yield ( f""" @@ -81,14 +80,25 @@ def ddl_script(self) -> Iterator[str]: @staticmethod def connect(*, tmpdir, worker_id, **kw) -> BaseBackend: # use an extension directory per test worker to prevent simultaneous - # downloads - extension_directory = tmpdir.getbasetemp().joinpath("duckdb_extensions") - extension_directory.mkdir(exist_ok=True) - return ibis.duckdb.connect(extension_directory=extension_directory, **kw) + # downloads on windows + # + # avoid enabling on linux because this adds a lot of time to parallel + # test runs due to each worker getting its own extensions directory + if WINDOWS: + extension_directory = tmpdir.getbasetemp().joinpath("duckdb_extensions") + extension_directory.mkdir(exist_ok=True) + kw["extension_directory"] = extension_directory + return ibis.duckdb.connect(**kw) def load_tpch(self) -> None: - with self.connection.begin() as con: - con.exec_driver_sql("CALL dbgen(sf=0.1)") + """Load the TPC-H dataset.""" + with self.connection._safe_raw_sql("CALL dbgen(sf=0.17)"): + pass + + def _load_data(self, **_: Any) -> None: + """Load test data into a backend.""" + with self.connection._safe_raw_sql(";\n".join(self.ddl_script)): + pass @pytest.fixture(scope="session") diff --git a/ibis/backends/duckdb/tests/snapshots/test_client/test_to_other_sql/out.sql b/ibis/backends/duckdb/tests/snapshots/test_client/test_to_other_sql/out.sql new file mode 100644 index 000000000000..67f2cfc5ea25 --- /dev/null +++ b/ibis/backends/duckdb/tests/snapshots/test_client/test_to_other_sql/out.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM "functional_alltypes" \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_dwithin/out.sql b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_dwithin/out.sql index 677936b16a31..e68c65813913 100644 --- a/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_dwithin/out.sql +++ b/ibis/backends/duckdb/tests/snapshots/test_geospatial/test_geospatial_dwithin/out.sql @@ -1,3 +1,3 @@ SELECT - ST_DWITHIN(t0.geom, t0.geom, CAST(3.0 AS REAL(53))) AS tmp + ST_DWITHIN(t0.geom, t0.geom, CAST(3.0 AS DOUBLE)) AS tmp FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/duckdb/tests/test_client.py b/ibis/backends/duckdb/tests/test_client.py index 05525ab55c96..08cee6fb0954 100644 --- a/ibis/backends/duckdb/tests/test_client.py +++ b/ibis/backends/duckdb/tests/test_client.py @@ -3,7 +3,6 @@ import duckdb import pyarrow as pa import pytest -import sqlalchemy as sa from pytest import param import ibis @@ -25,7 +24,7 @@ def ext_directory(tmpdir_factory): @pytest.mark.xfail( LINUX and SANDBOXED, reason="nix on linux cannot download duckdb extensions or data due to sandboxing", - raises=sa.exc.OperationalError, + raises=duckdb.IOException, ) @pytest.mark.xdist_group(name="duckdb-extensions") def test_connect_extensions(ext_directory): @@ -76,11 +75,11 @@ def test_cross_db(tmpdir): con2.attach(path1, name="test1", read_only=True) - t1_from_con2 = con2.table("t1", schema="test1.main") + t1_from_con2 = con2.table("t1", schema="main", database="test1") assert t1_from_con2.schema() == t2.schema() assert t1_from_con2.execute().equals(t2.execute()) - foo_t1_from_con2 = con2.table("t1", schema="test1.foo") + foo_t1_from_con2 = con2.table("t1", schema="foo", database="test1") assert foo_t1_from_con2.schema() == t2.schema() assert foo_t1_from_con2.execute().equals(t2.execute()) @@ -115,24 +114,26 @@ def test_attach_detach(tmpdir): con2.detach(name) assert name not in con2.list_databases() - with pytest.raises(sa.exc.ProgrammingError): + with pytest.raises(duckdb.BinderException): con2.detach(name) @pytest.mark.parametrize( - "scale", + ("scale", "expected_scale"), [ - None, - param(0, id="seconds"), - param(3, id="millis"), - param(6, id="micros"), - param(9, id="nanos"), + param(None, 6, id="default"), + param(0, 0, id="seconds"), + param(3, 3, id="millis"), + param(6, 6, id="micros"), + param(9, 9, id="nanos"), ], ) -def test_create_table_with_timestamp_scales(con, scale): +def test_create_table_with_timestamp_scales(con, scale, expected_scale): schema = ibis.schema(dict(ts=dt.Timestamp(scale=scale))) - t = con.create_table(gen_name("duckdb_timestamp_scale"), schema=schema, temp=True) - assert t.schema() == schema + expected = ibis.schema(dict(ts=dt.Timestamp(scale=expected_scale))) + name = gen_name("duckdb_timestamp_scale") + t = con.create_table(name, schema=schema, temp=True) + assert t.schema() == expected def test_config_options(con): @@ -153,8 +154,46 @@ def test_config_options(con): def test_config_options_bad_option(con): - with pytest.raises(sa.exc.ProgrammingError): + with pytest.raises(duckdb.CatalogException): con.settings["not_a_valid_option"] = "oopsie" with pytest.raises(KeyError): con.settings["i_didnt_set_this"] + + +def test_insert(con): + import pandas as pd + + name = ibis.util.guid() + + t = con.create_table(name, schema=ibis.schema({"a": "int64"})) + con.insert(name, obj=pd.DataFrame({"a": [1, 2]})) + assert t.count().execute() == 2 + + con.insert(name, obj=pd.DataFrame({"a": [1, 2]})) + assert t.count().execute() == 4 + + con.insert(name, obj=pd.DataFrame({"a": [1, 2]}), overwrite=True) + assert t.count().execute() == 2 + + con.insert(name, t) + assert t.count().execute() == 4 + + con.insert(name, [{"a": 1}, {"a": 2}], overwrite=True) + assert t.count().execute() == 2 + + con.insert(name, [(1,), (2,)]) + assert t.count().execute() == 4 + + con.insert(name, {"a": [1, 2]}, overwrite=True) + assert t.count().execute() == 2 + + +@pytest.mark.xfail(reason="snowflake backend not yet rewritten") +def test_to_other_sql(con, snapshot): + pytest.importorskip("snowflake.connector") + + t = con.table("functional_alltypes") + + sql = ibis.to_sql(t, dialect="snowflake") + snapshot.assert_match(sql, "out.sql") diff --git a/ibis/backends/duckdb/tests/test_datatypes.py b/ibis/backends/duckdb/tests/test_datatypes.py index d2e3bfc97c8f..6805e487d78f 100644 --- a/ibis/backends/duckdb/tests/test_datatypes.py +++ b/ibis/backends/duckdb/tests/test_datatypes.py @@ -1,15 +1,11 @@ from __future__ import annotations -import duckdb_engine import pytest -import sqlalchemy as sa -from packaging.version import parse as vparse from pytest import param -import ibis.backends.base.sql.alchemy.datatypes as sat import ibis.common.exceptions as exc import ibis.expr.datatypes as dt -from ibis.backends.duckdb.datatypes import DuckDBType +from ibis.backends.base.sqlglot.datatypes import DuckDBType @pytest.mark.parametrize( @@ -93,39 +89,3 @@ def test_parse_quoted_struct_field(): assert DuckDBType.from_string('STRUCT("a" INTEGER, "a b c" INTEGER)') == dt.Struct( {"a": dt.int32, "a b c": dt.int32} ) - - -def test_generate_quoted_struct(): - typ = sat.StructType( - {"in come": sa.VARCHAR(), "my count": sa.BIGINT(), "thing": sa.INTEGER()} - ) - result = typ.compile(dialect=duckdb_engine.Dialect()) - expected = 'STRUCT("in come" VARCHAR, "my count" BIGINT, thing INTEGER)' - assert result == expected - - -@pytest.mark.xfail( - condition=vparse(duckdb_engine.__version__) < vparse("0.9.2"), - raises=AssertionError, - reason="mapping from UINTEGER query metadata fixed in 0.9.2", -) -def test_read_uint8_from_parquet(tmp_path): - import numpy as np - - import ibis - - con = ibis.duckdb.connect() - - # There is an incorrect mapping in duckdb-engine from UInteger -> UInt8 - # In order to get something that reads as a UInt8, we cast to UInt32 (UInteger) - t = ibis.memtable({"a": np.array([1, 2, 3, 4], dtype="uint32")}) - assert t.a.type() == dt.uint32 - - parqpath = tmp_path / "uint.parquet" - - con.to_parquet(t, parqpath) - - # If this doesn't fail, then things are working - t2 = con.read_parquet(parqpath) - - assert t2.schema() == t.schema() diff --git a/ibis/backends/duckdb/tests/test_register.py b/ibis/backends/duckdb/tests/test_register.py index 0568ed7712b6..94989b422e21 100644 --- a/ibis/backends/duckdb/tests/test_register.py +++ b/ibis/backends/duckdb/tests/test_register.py @@ -11,7 +11,6 @@ import pandas.testing as tm import pyarrow as pa import pytest -import sqlalchemy as sa import ibis import ibis.common.exceptions as exc @@ -112,7 +111,7 @@ def test_read_geo_from_url(con, monkeypatch): loaded_exts = [] monkeypatch.setattr(con, "_load_extensions", lambda x, **kw: loaded_exts.extend(x)) - with pytest.raises((sa.exc.OperationalError, sa.exc.ProgrammingError)): + with pytest.raises(duckdb.IOException): # The read will fail, either because the URL is bogus (which it is) or # because the current connection doesn't have the spatial extension # installed and so the call to `st_read` will raise a catalog error. @@ -143,16 +142,15 @@ def test_temp_directory(tmp_path): # 1. in-memory + no temp_directory specified con = ibis.duckdb.connect() - with con.begin() as c: - value = c.exec_driver_sql(query).scalar() - assert value # we don't care what the specific value is + + value = con.raw_sql(query).fetchone()[0] + assert value # we don't care what the specific value is temp_directory = Path(tempfile.gettempdir()) / "duckdb" # 2. in-memory + temp_directory specified con = ibis.duckdb.connect(temp_directory=temp_directory) - with con.begin() as c: - value = c.exec_driver_sql(query).scalar() + value = con.raw_sql(query).fetchone()[0] assert value == str(temp_directory) # 3. on-disk + no temp_directory specified @@ -161,8 +159,7 @@ def test_temp_directory(tmp_path): # 4. on-disk + temp_directory specified con = ibis.duckdb.connect(tmp_path / "test2.ddb", temp_directory=temp_directory) - with con.begin() as c: - value = c.exec_driver_sql(query).scalar() + value = con.raw_sql(query).fetchone()[0] assert value == str(temp_directory) @@ -362,7 +359,7 @@ def test_set_temp_dir(tmp_path): "nix on linux cannot download duckdb extensions or data due to sandboxing; " "duckdb will try to automatically install and load read_parquet" ), - raises=(duckdb.IOException, sa.exc.DBAPIError), + raises=duckdb.IOException, ) def test_s3_403_fallback(con, httpserver, monkeypatch): # monkeypatch to avoid downloading extensions in tests diff --git a/ibis/backends/duckdb/tests/test_udf.py b/ibis/backends/duckdb/tests/test_udf.py index fa0968fc2765..cff40de2ca2f 100644 --- a/ibis/backends/duckdb/tests/test_udf.py +++ b/ibis/backends/duckdb/tests/test_udf.py @@ -51,9 +51,7 @@ def test_builtin_scalar(con, func): a, b = "duck", "luck" expr = func(a, b) - with con.begin() as c: - expected = c.exec_driver_sql(f"SELECT {func.__name__}({a!r}, {b!r})").scalar() - + expected = con.raw_sql(f"SELECT {func.__name__}({a!r}, {b!r})").df().squeeze() assert con.execute(expr) == expected @@ -80,10 +78,11 @@ def test_builtin_agg(con, func): data = ibis.memtable({"a": raw_data}) expr = func(data.a) - with con.begin() as c: - expected = c.exec_driver_sql( - f"SELECT {func.__name__}(a) FROM UNNEST({raw_data!r}) _ (a)" - ).scalar() + expected = ( + con.raw_sql(f"SELECT {func.__name__}(a) FROM UNNEST({raw_data!r}) _ (a)") + .df() + .squeeze() + ) assert con.execute(expr) == expected diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/clickhouse/out.sql index c29c79e8682a..e472a5727fab 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/clickhouse/out.sql @@ -1,62 +1,55 @@ SELECT - t5.street, - t5.key + t5.street AS street, + t5.key AS key, + t5.key_right AS key_right FROM ( SELECT - t4.street, - ROW_NUMBER() OVER (ORDER BY t4.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key + t1.street AS street, + ROW_NUMBER() OVER (ORDER BY t1.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key, + t3.key AS key_right FROM ( SELECT - t1.street, - t1.key + t0.street AS street, + ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key + FROM data AS t0 + ) AS t1 + INNER JOIN ( + SELECT + t1.key AS key FROM ( SELECT - t0.*, + t0.street AS street, ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key FROM data AS t0 ) AS t1 - INNER JOIN ( - SELECT - t1.key - FROM ( - SELECT - t0.*, - ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key - FROM data AS t0 - ) AS t1 - ) AS t2 - ON t1.key = t2.key - ) AS t4 + ) AS t3 + ON t1.key = t3.key ) AS t5 INNER JOIN ( SELECT - t5.key + t5.key AS key FROM ( SELECT - t4.street, - ROW_NUMBER() OVER (ORDER BY t4.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key + t1.street AS street, + ROW_NUMBER() OVER (ORDER BY t1.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key, + t3.key AS key_right FROM ( SELECT - t1.street, - t1.key + t0.street AS street, + ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key + FROM data AS t0 + ) AS t1 + INNER JOIN ( + SELECT + t1.key AS key FROM ( SELECT - t0.*, + t0.street AS street, ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key FROM data AS t0 ) AS t1 - INNER JOIN ( - SELECT - t1.key - FROM ( - SELECT - t0.*, - ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key - FROM data AS t0 - ) AS t1 - ) AS t2 - ON t1.key = t2.key - ) AS t4 + ) AS t3 + ON t1.key = t3.key ) AS t5 -) AS t6 - ON t5.key = t6.key \ No newline at end of file +) AS t7 + ON t5.key = t7.key \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/datafusion/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/datafusion/out.sql new file mode 100644 index 000000000000..f7cf54e9de51 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/datafusion/out.sql @@ -0,0 +1,55 @@ +SELECT + "t5"."street" AS "street", + "t5"."key" AS "key", + "t5"."key_right" AS "key_right" +FROM ( + SELECT + "t1"."street" AS "street", + ROW_NUMBER() OVER (ORDER BY "t1"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key", + "t2"."key" AS "key_right" + FROM ( + SELECT + "t0"."street" AS "street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + INNER JOIN ( + SELECT + "t1"."key" AS "key" + FROM ( + SELECT + "t0"."street" AS "street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + ) AS "t2" + ON "t1"."key" = "t2"."key" +) AS "t5" +INNER JOIN ( + SELECT + "t5"."key" AS "key" + FROM ( + SELECT + "t1"."street" AS "street", + ROW_NUMBER() OVER (ORDER BY "t1"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key", + "t2"."key" AS "key_right" + FROM ( + SELECT + "t0"."street" AS "street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + INNER JOIN ( + SELECT + "t1"."key" AS "key" + FROM ( + SELECT + "t0"."street" AS "street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + ) AS "t2" + ON "t1"."key" = "t2"."key" + ) AS "t5" +) AS "t6" + ON "t5"."key" = "t6"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/duckdb/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/duckdb/out.sql index fb8c40cd69ba..2a699a186d7d 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/duckdb/out.sql @@ -1,32 +1,55 @@ -WITH t0 AS ( - SELECT - t5.street AS street, - ROW_NUMBER() OVER (ORDER BY t5.street ASC) - 1 AS key - FROM data AS t5 -), t1 AS ( - SELECT - t0.key AS key - FROM t0 -), t2 AS ( - SELECT - t0.street AS street, - t0.key AS key - FROM t0 - JOIN t1 - ON t0.key = t1.key -), t3 AS ( +SELECT + t5.street AS street, + t5.key AS key, + t5.key_right AS key_right +FROM ( SELECT - t2.street AS street, - ROW_NUMBER() OVER (ORDER BY t2.street ASC) - 1 AS key - FROM t2 -), t4 AS ( + t1.street AS street, + ROW_NUMBER() OVER (ORDER BY t1.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key, + t3.key AS key_right + FROM ( + SELECT + t0.street AS street, + ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key + FROM data AS t0 + ) AS t1 + INNER JOIN ( + SELECT + t1.key AS key + FROM ( + SELECT + t0.street AS street, + ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key + FROM data AS t0 + ) AS t1 + ) AS t3 + ON t1.key = t3.key +) AS t5 +INNER JOIN ( SELECT - t3.key AS key - FROM t3 -) -SELECT - t3.street, - t3.key -FROM t3 -JOIN t4 - ON t3.key = t4.key \ No newline at end of file + t5.key AS key + FROM ( + SELECT + t1.street AS street, + ROW_NUMBER() OVER (ORDER BY t1.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key, + t3.key AS key_right + FROM ( + SELECT + t0.street AS street, + ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key + FROM data AS t0 + ) AS t1 + INNER JOIN ( + SELECT + t1.key AS key + FROM ( + SELECT + t0.street AS street, + ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key + FROM data AS t0 + ) AS t1 + ) AS t3 + ON t1.key = t3.key + ) AS t5 +) AS t7 + ON t5.key = t7.key \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/snowflake/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/snowflake/out.sql index f3f2e94c391a..d63129cc6985 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/snowflake/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/snowflake/out.sql @@ -1,32 +1,55 @@ -WITH t0 AS ( - SELECT - t5."street" AS "street", - ROW_NUMBER() OVER (ORDER BY t5."street" ASC) - 1 AS "key" - FROM "data" AS t5 -), t1 AS ( - SELECT - t0."key" AS "key" - FROM t0 -), t2 AS ( - SELECT - t0."street" AS "street", - t0."key" AS "key" - FROM t0 - JOIN t1 - ON t0."key" = t1."key" -), t3 AS ( +SELECT + "t5"."street" AS "street", + "t5"."key" AS "key", + "t5"."key_right" AS "key_right" +FROM ( SELECT - t2."street" AS "street", - ROW_NUMBER() OVER (ORDER BY t2."street" ASC) - 1 AS "key" - FROM t2 -), t4 AS ( + "t1"."street" AS "street", + ROW_NUMBER() OVER (ORDER BY "t1"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key", + "t3"."key" AS "key_right" + FROM ( + SELECT + "t0"."street" AS "street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + INNER JOIN ( + SELECT + "t1"."key" AS "key" + FROM ( + SELECT + "t0"."street" AS "street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + ) AS "t3" + ON "t1"."key" = "t3"."key" +) AS "t5" +INNER JOIN ( SELECT - t3."key" AS "key" - FROM t3 -) -SELECT - t3."street", - t3."key" -FROM t3 -JOIN t4 - ON t3."key" = t4."key" \ No newline at end of file + "t5"."key" AS "key" + FROM ( + SELECT + "t1"."street" AS "street", + ROW_NUMBER() OVER (ORDER BY "t1"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key", + "t3"."key" AS "key_right" + FROM ( + SELECT + "t0"."street" AS "street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + INNER JOIN ( + SELECT + "t1"."key" AS "key" + FROM ( + SELECT + "t0"."street" AS "street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + ) AS "t3" + ON "t1"."key" = "t3"."key" + ) AS "t5" +) AS "t7" + ON "t5"."key" = "t7"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/clickhouse/out.sql new file mode 100644 index 000000000000..f26c12ac78c8 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/clickhouse/out.sql @@ -0,0 +1,5 @@ +SELECT + t0.id, + t0.bool_col +FROM functional_alltypes AS t0 +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/duckdb/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/duckdb/out.sql new file mode 100644 index 000000000000..f26c12ac78c8 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/duckdb/out.sql @@ -0,0 +1,5 @@ +SELECT + t0.id, + t0.bool_col +FROM functional_alltypes AS t0 +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/clickhouse/out.sql new file mode 100644 index 000000000000..f26c12ac78c8 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/clickhouse/out.sql @@ -0,0 +1,5 @@ +SELECT + t0.id, + t0.bool_col +FROM functional_alltypes AS t0 +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/duckdb/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/duckdb/out.sql new file mode 100644 index 000000000000..f26c12ac78c8 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/duckdb/out.sql @@ -0,0 +1,5 @@ +SELECT + t0.id, + t0.bool_col +FROM functional_alltypes AS t0 +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/clickhouse/out.sql new file mode 100644 index 000000000000..eb7db2731364 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/clickhouse/out.sql @@ -0,0 +1,3 @@ +SELECT + SUM(t0.bigint_col) AS "Sum(bigint_col)" +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/duckdb/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/duckdb/out.sql new file mode 100644 index 000000000000..eb7db2731364 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/duckdb/out.sql @@ -0,0 +1,3 @@ +SELECT + SUM(t0.bigint_col) AS "Sum(bigint_col)" +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/clickhouse/out.sql new file mode 100644 index 000000000000..88b2af3a2cc3 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/clickhouse/out.sql @@ -0,0 +1,10 @@ +SELECT + * +FROM ( + SELECT + t0.id, + t0.bool_col + FROM functional_alltypes AS t0 + LIMIT 10 +) AS t2 +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/duckdb/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/duckdb/out.sql new file mode 100644 index 000000000000..88b2af3a2cc3 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/duckdb/out.sql @@ -0,0 +1,10 @@ +SELECT + * +FROM ( + SELECT + t0.id, + t0.bool_col + FROM functional_alltypes AS t0 + LIMIT 10 +) AS t2 +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_join/test_complex_join_agg/out.sql b/ibis/backends/tests/snapshots/test_join/test_complex_join_agg/out.sql new file mode 100644 index 000000000000..c454d75c63aa --- /dev/null +++ b/ibis/backends/tests/snapshots/test_join/test_complex_join_agg/out.sql @@ -0,0 +1,17 @@ +SELECT + t3.key1 AS key1, + AVG(t3.value1 - t3.value2) AS avg_diff +FROM ( + SELECT + t0.value1 AS value1, + t0.key1 AS key1, + t0.key2 AS key2, + t1.value2 AS value2, + t1.key1 AS key1_right, + t1.key4 AS key4 + FROM table1 AS t0 + LEFT OUTER JOIN table2 AS t1 + ON t0.key1 = t1.key1 +) AS t3 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/clickhouse/out.sql index edaf83ff95ff..6dfef25abe9f 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/clickhouse/out.sql @@ -1,49 +1,40 @@ SELECT - t4.key + t2.key AS key, + t3.key AS key_right, + t6.key_right AS key_right_right FROM ( SELECT - t1.key + t0.key AS key + FROM leaf AS t0 + WHERE + TRUE +) AS t2 +INNER JOIN ( + SELECT + t0.key AS key + FROM leaf AS t0 + WHERE + TRUE +) AS t3 + ON t2.key = t3.key +INNER JOIN ( + SELECT + t2.key AS key, + t3.key AS key_right FROM ( SELECT - * + t0.key AS key FROM leaf AS t0 WHERE TRUE - ) AS t1 - INNER JOIN ( - SELECT - t1.key - FROM ( - SELECT - * - FROM leaf AS t0 - WHERE - TRUE - ) AS t1 ) AS t2 - ON t1.key = t2.key -) AS t4 -INNER JOIN ( - SELECT - t1.key - FROM ( + INNER JOIN ( SELECT - * + t0.key AS key FROM leaf AS t0 WHERE TRUE - ) AS t1 - INNER JOIN ( - SELECT - t1.key - FROM ( - SELECT - * - FROM leaf AS t0 - WHERE - TRUE - ) AS t1 - ) AS t2 - ON t1.key = t2.key -) AS t5 - ON t4.key = t5.key \ No newline at end of file + ) AS t3 + ON t2.key = t3.key +) AS t6 + ON t6.key = t6.key \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/datafusion/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/datafusion/out.sql new file mode 100644 index 000000000000..96acd49caaad --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/datafusion/out.sql @@ -0,0 +1,48 @@ +SELECT + "t1"."key" AS "key", + "t2"."key" AS "key_right", + "t4"."key_right" AS "key_right_right" +FROM ( + SELECT + * + FROM "leaf" AS "t0" + WHERE + TRUE +) AS "t1" +INNER JOIN ( + SELECT + "t1"."key" AS "key" + FROM ( + SELECT + * + FROM "leaf" AS "t0" + WHERE + TRUE + ) AS "t1" +) AS "t2" + ON "t1"."key" = "t2"."key" +INNER JOIN ( + SELECT + "t1"."key" AS "key", + "t2"."key" AS "key_right" + FROM ( + SELECT + * + FROM "leaf" AS "t0" + WHERE + TRUE + ) AS "t1" + INNER JOIN ( + SELECT + "t1"."key" AS "key" + FROM ( + SELECT + * + FROM "leaf" AS "t0" + WHERE + TRUE + ) AS "t1" + ) AS "t2" + ON "t1"."key" = "t2"."key" +) AS "t4" + ON "t1"."key" = "t1"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/duckdb/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/duckdb/out.sql index 66a947699796..fb2ee62190b5 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/duckdb/out.sql @@ -1,22 +1,40 @@ -WITH t0 AS ( +SELECT + t1.key AS key, + t2.key AS key_right, + t5.key_right AS key_right_right +FROM ( SELECT - t4.key AS key - FROM leaf AS t4 + t0.key AS key + FROM leaf AS t0 WHERE - CAST(TRUE AS BOOLEAN) -), t1 AS ( + TRUE +) AS t1 +INNER JOIN ( SELECT t0.key AS key - FROM t0 -), t2 AS ( + FROM leaf AS t0 + WHERE + TRUE +) AS t2 + ON t1.key = t2.key +INNER JOIN ( SELECT - t0.key AS key - FROM t0 - JOIN t1 - ON t0.key = t1.key -) -SELECT - t2.key -FROM t2 -JOIN t2 AS t3 - ON t2.key = t3.key \ No newline at end of file + t1.key AS key, + t2.key AS key_right + FROM ( + SELECT + t0.key AS key + FROM leaf AS t0 + WHERE + TRUE + ) AS t1 + INNER JOIN ( + SELECT + t0.key AS key + FROM leaf AS t0 + WHERE + TRUE + ) AS t2 + ON t1.key = t2.key +) AS t5 + ON t1.key = t5.key \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/snowflake/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/snowflake/out.sql index 60738db25e2d..eb9acf0a45fe 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/snowflake/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/snowflake/out.sql @@ -1,22 +1,40 @@ -WITH t0 AS ( +SELECT + "t1"."key" AS "key", + "t2"."key" AS "key_right", + "t5"."key_right" AS "key_right_right" +FROM ( SELECT - t4."key" AS "key" - FROM "leaf" AS t4 + "t0"."key" AS "key" + FROM "leaf" AS "t0" WHERE TRUE -), t1 AS ( +) AS "t1" +INNER JOIN ( SELECT - t0."key" AS "key" - FROM t0 -), t2 AS ( + "t0"."key" AS "key" + FROM "leaf" AS "t0" + WHERE + TRUE +) AS "t2" + ON "t1"."key" = "t2"."key" +INNER JOIN ( SELECT - t0."key" AS "key" - FROM t0 - JOIN t1 - ON t0."key" = t1."key" -) -SELECT - t2."key" -FROM t2 -JOIN t2 AS t3 - ON t2."key" = t3."key" \ No newline at end of file + "t1"."key" AS "key", + "t2"."key" AS "key_right" + FROM ( + SELECT + "t0"."key" AS "key" + FROM "leaf" AS "t0" + WHERE + TRUE + ) AS "t1" + INNER JOIN ( + SELECT + "t0"."key" AS "key" + FROM "leaf" AS "t0" + WHERE + TRUE + ) AS "t2" + ON "t1"."key" = "t2"."key" +) AS "t5" + ON "t1"."key" = "t5"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/datafusion/out.sql b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/datafusion/out.sql new file mode 100644 index 000000000000..d3969647c9ea --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/datafusion/out.sql @@ -0,0 +1,22 @@ +SELECT + CASE "t0"."continent" + WHEN 'NA' + THEN 'North America' + WHEN 'SA' + THEN 'South America' + WHEN 'EU' + THEN 'Europe' + WHEN 'AF' + THEN 'Africa' + WHEN 'AS' + THEN 'Asia' + WHEN 'OC' + THEN 'Oceania' + WHEN 'AN' + THEN 'Antarctica' + ELSE 'Unknown continent' + END AS "cont", + SUM("t0"."population") AS "total_pop" +FROM "countries" AS "t0" +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/snowflake/out.sql b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/snowflake/out.sql index 922316952999..d3969647c9ea 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/snowflake/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/snowflake/out.sql @@ -1,5 +1,5 @@ SELECT - CASE t0."continent" + CASE "t0"."continent" WHEN 'NA' THEN 'North America' WHEN 'SA' @@ -16,7 +16,7 @@ SELECT THEN 'Antarctica' ELSE 'Unknown continent' END AS "cont", - SUM(t0."population") AS "total_pop" -FROM "countries" AS t0 + SUM("t0"."population") AS "total_pop" +FROM "countries" AS "t0" GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/clickhouse/out.sql index fa221469c7dd..cd122964c87e 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/clickhouse/out.sql @@ -1,13 +1,9 @@ SELECT t0.x IN ( SELECT - t1.x - FROM ( - SELECT - * - FROM t AS t0 - WHERE - t0.x > 2 - ) AS t1 - ) AS "InColumn(x, x)" + t0.x + FROM t AS t0 + WHERE + t0.x > 2 + ) AS "InSubquery(x)" FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/datafusion/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/datafusion/out.sql new file mode 100644 index 000000000000..b5362bf67adc --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/datafusion/out.sql @@ -0,0 +1,15 @@ +SELECT + "t0"."x" IN (( + SELECT + "t1"."x" AS "x" + FROM ( + SELECT + * + FROM "t" AS "t0" + WHERE + ( + "t0"."x" > 2 + ) + ) AS "t1" + )) AS "InSubquery(x)" +FROM "t" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/duckdb/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/duckdb/out.sql index 218ccb1d5c46..dd1d25118977 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/duckdb/out.sql @@ -1,13 +1,9 @@ SELECT t0.x IN ( SELECT - t1.x - FROM ( - SELECT - t0.x AS x - FROM t AS t0 - WHERE - t0.x > CAST(2 AS TINYINT) - ) AS t1 - ) AS "InColumn(x, x)" + t0.x + FROM t AS t0 + WHERE + t0.x > CAST(2 AS TINYINT) + ) AS "InSubquery(x)" FROM t AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/snowflake/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/snowflake/out.sql index 683a03b084ec..92e386ef62a4 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/snowflake/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/snowflake/out.sql @@ -1,13 +1,9 @@ SELECT - t0."x" IN ( + "t0"."x" IN (( SELECT - t1."x" - FROM ( - SELECT - t0."x" AS "x" - FROM "t" AS t0 - WHERE - t0."x" > 2 - ) AS t1 - ) AS "InColumn(x, x)" -FROM "t" AS t0 \ No newline at end of file + "t0"."x" AS "x" + FROM "t" AS "t0" + WHERE + "t0"."x" > 2 + )) AS "InSubquery(x)" +FROM "t" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/clickhouse/out.sql index d30fed08ac79..8962d00fdabe 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/clickhouse/out.sql @@ -1,94 +1,96 @@ SELECT - t11.field_of_study, - t11.diff + t10.field_of_study, + t10.diff FROM ( SELECT - * + t5.field_of_study, + t5.diff FROM ( SELECT - * + t4.field_of_study, + any(t4.diff) AS diff FROM ( SELECT - t4.field_of_study, - any(t4.diff) AS diff + t3.field_of_study, + t3.years, + t3.degrees, + t3.earliest_degrees, + t3.latest_degrees, + t3.latest_degrees - t3.earliest_degrees AS diff FROM ( SELECT - t3.*, - t3.latest_degrees - t3.earliest_degrees AS diff + t2.field_of_study, + t2.years, + t2.degrees, + any(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, + anyLast(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees FROM ( SELECT - t2.*, - any(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, - anyLast(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees + t1.field_of_study, + CAST(t1.__pivoted__.1 AS Nullable(String)) AS years, + CAST(t1.__pivoted__.2 AS Nullable(Int64)) AS degrees FROM ( SELECT - t1.field_of_study, - CAST(t1.__pivoted__.1 AS Nullable(String)) AS years, - CAST(t1.__pivoted__.2 AS Nullable(Int64)) AS degrees - FROM ( - SELECT - t0.field_of_study, - arrayJoin( - [CAST(tuple('1970-71', t0."1970-71") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1975-76', t0."1975-76") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1980-81', t0."1980-81") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1985-86', t0."1985-86") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1990-91', t0."1990-91") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1995-96', t0."1995-96") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2000-01', t0."2000-01") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2005-06', t0."2005-06") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2010-11', t0."2010-11") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2011-12', t0."2011-12") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2012-13', t0."2012-13") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2013-14', t0."2013-14") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2014-15', t0."2014-15") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2015-16', t0."2015-16") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2016-17', t0."2016-17") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2017-18', t0."2017-18") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2018-19', t0."2018-19") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2019-20', t0."2019-20") AS Tuple(years Nullable(String), degrees Nullable(Int64)))] - ) AS __pivoted__ - FROM humanities AS t0 - ) AS t1 - ) AS t2 - ) AS t3 - ) AS t4 - GROUP BY - t4.field_of_study - ) AS t5 - ORDER BY - t5.diff DESC - ) AS t6 + t0.field_of_study, + arrayJoin( + [CAST(tuple('1970-71', t0."1970-71") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1975-76', t0."1975-76") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1980-81', t0."1980-81") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1985-86', t0."1985-86") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1990-91', t0."1990-91") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1995-96', t0."1995-96") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2000-01', t0."2000-01") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2005-06', t0."2005-06") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2010-11', t0."2010-11") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2011-12', t0."2011-12") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2012-13', t0."2012-13") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2013-14', t0."2013-14") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2014-15', t0."2014-15") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2015-16', t0."2015-16") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2016-17', t0."2016-17") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2017-18', t0."2017-18") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2018-19', t0."2018-19") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2019-20', t0."2019-20") AS Tuple(years Nullable(String), degrees Nullable(Int64)))] + ) AS __pivoted__ + FROM humanities AS t0 + ) AS t1 + ) AS t2 + ) AS t3 + ) AS t4 + GROUP BY + t4.field_of_study + ) AS t5 + ORDER BY + t5.diff DESC LIMIT 10 UNION ALL SELECT - * + t5.field_of_study, + t5.diff FROM ( SELECT - * + t4.field_of_study, + any(t4.diff) AS diff FROM ( SELECT - * + t3.field_of_study, + t3.years, + t3.degrees, + t3.earliest_degrees, + t3.latest_degrees, + t3.latest_degrees - t3.earliest_degrees AS diff FROM ( SELECT - t4.field_of_study, - any(t4.diff) AS diff + t2.field_of_study, + t2.years, + t2.degrees, + any(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, + anyLast(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees FROM ( SELECT - t3.*, - t3.latest_degrees - t3.earliest_degrees AS diff + t1.field_of_study, + CAST(t1.__pivoted__.1 AS Nullable(String)) AS years, + CAST(t1.__pivoted__.2 AS Nullable(Int64)) AS degrees FROM ( SELECT - t2.*, - any(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, - anyLast(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees - FROM ( - SELECT - t1.field_of_study, - CAST(t1.__pivoted__.1 AS Nullable(String)) AS years, - CAST(t1.__pivoted__.2 AS Nullable(Int64)) AS degrees - FROM ( - SELECT - t0.field_of_study, - arrayJoin( - [CAST(tuple('1970-71', t0."1970-71") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1975-76', t0."1975-76") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1980-81', t0."1980-81") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1985-86', t0."1985-86") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1990-91', t0."1990-91") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1995-96', t0."1995-96") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2000-01', t0."2000-01") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2005-06', t0."2005-06") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2010-11', t0."2010-11") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2011-12', t0."2011-12") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2012-13', t0."2012-13") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2013-14', t0."2013-14") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2014-15', t0."2014-15") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2015-16', t0."2015-16") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2016-17', t0."2016-17") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2017-18', t0."2017-18") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2018-19', t0."2018-19") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2019-20', t0."2019-20") AS Tuple(years Nullable(String), degrees Nullable(Int64)))] - ) AS __pivoted__ - FROM humanities AS t0 - ) AS t1 - ) AS t2 - ) AS t3 - ) AS t4 - GROUP BY - t4.field_of_study - ) AS t5 - WHERE - t5.diff < 0 - ) AS t7 - ORDER BY - t7.diff ASC - ) AS t9 + t0.field_of_study, + arrayJoin( + [CAST(tuple('1970-71', t0."1970-71") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1975-76', t0."1975-76") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1980-81', t0."1980-81") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1985-86', t0."1985-86") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1990-91', t0."1990-91") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('1995-96', t0."1995-96") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2000-01', t0."2000-01") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2005-06', t0."2005-06") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2010-11', t0."2010-11") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2011-12', t0."2011-12") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2012-13', t0."2012-13") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2013-14', t0."2013-14") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2014-15', t0."2014-15") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2015-16', t0."2015-16") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2016-17', t0."2016-17") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2017-18', t0."2017-18") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2018-19', t0."2018-19") AS Tuple(years Nullable(String), degrees Nullable(Int64))), CAST(tuple('2019-20', t0."2019-20") AS Tuple(years Nullable(String), degrees Nullable(Int64)))] + ) AS __pivoted__ + FROM humanities AS t0 + ) AS t1 + ) AS t2 + ) AS t3 + ) AS t4 + GROUP BY + t4.field_of_study + ) AS t5 + WHERE + t5.diff < 0 + ORDER BY + t5.diff ASC LIMIT 10 -) AS t11 \ No newline at end of file +) AS t10 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql index 8335befe6765..6eaa105c4a49 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql @@ -1,75 +1,96 @@ -WITH t0 AS ( - SELECT - t7.field_of_study AS field_of_study, - UNNEST( - CAST([{'years': '1970-71', 'degrees': t7."1970-71"}, {'years': '1975-76', 'degrees': t7."1975-76"}, {'years': '1980-81', 'degrees': t7."1980-81"}, {'years': '1985-86', 'degrees': t7."1985-86"}, {'years': '1990-91', 'degrees': t7."1990-91"}, {'years': '1995-96', 'degrees': t7."1995-96"}, {'years': '2000-01', 'degrees': t7."2000-01"}, {'years': '2005-06', 'degrees': t7."2005-06"}, {'years': '2010-11', 'degrees': t7."2010-11"}, {'years': '2011-12', 'degrees': t7."2011-12"}, {'years': '2012-13', 'degrees': t7."2012-13"}, {'years': '2013-14', 'degrees': t7."2013-14"}, {'years': '2014-15', 'degrees': t7."2014-15"}, {'years': '2015-16', 'degrees': t7."2015-16"}, {'years': '2016-17', 'degrees': t7."2016-17"}, {'years': '2017-18', 'degrees': t7."2017-18"}, {'years': '2018-19', 'degrees': t7."2018-19"}, {'years': '2019-20', 'degrees': t7."2019-20"}] AS STRUCT(years TEXT, degrees BIGINT)[]) - ) AS __pivoted__ - FROM humanities AS t7 -), t1 AS ( - SELECT - t0.field_of_study AS field_of_study, - STRUCT_EXTRACT(t0.__pivoted__, 'years') AS years, - STRUCT_EXTRACT(t0.__pivoted__, 'degrees') AS degrees - FROM t0 -), t2 AS ( - SELECT - t1.field_of_study AS field_of_study, - t1.years AS years, - t1.degrees AS degrees, - FIRST_VALUE(t1.degrees) OVER (PARTITION BY t1.field_of_study ORDER BY t1.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, - LAST_VALUE(t1.degrees) OVER (PARTITION BY t1.field_of_study ORDER BY t1.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees - FROM t1 -), t3 AS ( - SELECT - t2.field_of_study AS field_of_study, - t2.years AS years, - t2.degrees AS degrees, - t2.earliest_degrees AS earliest_degrees, - t2.latest_degrees AS latest_degrees, - t2.latest_degrees - t2.earliest_degrees AS diff - FROM t2 -), t4 AS ( - SELECT - t3.field_of_study AS field_of_study, - FIRST(t3.diff) AS diff - FROM t3 - GROUP BY - 1 -), anon_1 AS ( +SELECT + t10.field_of_study, + t10.diff +FROM ( SELECT - t4.field_of_study AS field_of_study, - t4.diff AS diff - FROM t4 + t5.field_of_study, + t5.diff + FROM ( + SELECT + t4.field_of_study, + FIRST(t4.diff) AS diff + FROM ( + SELECT + t3.field_of_study, + t3.years, + t3.degrees, + t3.earliest_degrees, + t3.latest_degrees, + t3.latest_degrees - t3.earliest_degrees AS diff + FROM ( + SELECT + t2.field_of_study, + t2.years, + t2.degrees, + FIRST(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, + LAST(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees + FROM ( + SELECT + t1.field_of_study, + t1.__pivoted__['years'] AS years, + t1.__pivoted__['degrees'] AS degrees + FROM ( + SELECT + t0.field_of_study, + UNNEST( + [{'years': '1970-71', 'degrees': t0."1970-71"}, {'years': '1975-76', 'degrees': t0."1975-76"}, {'years': '1980-81', 'degrees': t0."1980-81"}, {'years': '1985-86', 'degrees': t0."1985-86"}, {'years': '1990-91', 'degrees': t0."1990-91"}, {'years': '1995-96', 'degrees': t0."1995-96"}, {'years': '2000-01', 'degrees': t0."2000-01"}, {'years': '2005-06', 'degrees': t0."2005-06"}, {'years': '2010-11', 'degrees': t0."2010-11"}, {'years': '2011-12', 'degrees': t0."2011-12"}, {'years': '2012-13', 'degrees': t0."2012-13"}, {'years': '2013-14', 'degrees': t0."2013-14"}, {'years': '2014-15', 'degrees': t0."2014-15"}, {'years': '2015-16', 'degrees': t0."2015-16"}, {'years': '2016-17', 'degrees': t0."2016-17"}, {'years': '2017-18', 'degrees': t0."2017-18"}, {'years': '2018-19', 'degrees': t0."2018-19"}, {'years': '2019-20', 'degrees': t0."2019-20"}] + ) AS __pivoted__ + FROM humanities AS t0 + ) AS t1 + ) AS t2 + ) AS t3 + ) AS t4 + GROUP BY + 1 + ) AS t5 ORDER BY - t4.diff DESC + t5.diff DESC LIMIT 10 -), t5 AS ( + UNION ALL SELECT - t4.field_of_study AS field_of_study, - t4.diff AS diff - FROM t4 + t5.field_of_study, + t5.diff + FROM ( + SELECT + t4.field_of_study, + FIRST(t4.diff) AS diff + FROM ( + SELECT + t3.field_of_study, + t3.years, + t3.degrees, + t3.earliest_degrees, + t3.latest_degrees, + t3.latest_degrees - t3.earliest_degrees AS diff + FROM ( + SELECT + t2.field_of_study, + t2.years, + t2.degrees, + FIRST(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, + LAST(t2.degrees) OVER (PARTITION BY t2.field_of_study ORDER BY t2.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees + FROM ( + SELECT + t1.field_of_study, + t1.__pivoted__['years'] AS years, + t1.__pivoted__['degrees'] AS degrees + FROM ( + SELECT + t0.field_of_study, + UNNEST( + [{'years': '1970-71', 'degrees': t0."1970-71"}, {'years': '1975-76', 'degrees': t0."1975-76"}, {'years': '1980-81', 'degrees': t0."1980-81"}, {'years': '1985-86', 'degrees': t0."1985-86"}, {'years': '1990-91', 'degrees': t0."1990-91"}, {'years': '1995-96', 'degrees': t0."1995-96"}, {'years': '2000-01', 'degrees': t0."2000-01"}, {'years': '2005-06', 'degrees': t0."2005-06"}, {'years': '2010-11', 'degrees': t0."2010-11"}, {'years': '2011-12', 'degrees': t0."2011-12"}, {'years': '2012-13', 'degrees': t0."2012-13"}, {'years': '2013-14', 'degrees': t0."2013-14"}, {'years': '2014-15', 'degrees': t0."2014-15"}, {'years': '2015-16', 'degrees': t0."2015-16"}, {'years': '2016-17', 'degrees': t0."2016-17"}, {'years': '2017-18', 'degrees': t0."2017-18"}, {'years': '2018-19', 'degrees': t0."2018-19"}, {'years': '2019-20', 'degrees': t0."2019-20"}] + ) AS __pivoted__ + FROM humanities AS t0 + ) AS t1 + ) AS t2 + ) AS t3 + ) AS t4 + GROUP BY + 1 + ) AS t5 WHERE - t4.diff < CAST(0 AS TINYINT) -), anon_2 AS ( - SELECT - t5.field_of_study AS field_of_study, - t5.diff AS diff - FROM t5 + t5.diff < CAST(0 AS TINYINT) ORDER BY t5.diff ASC LIMIT 10 -) -SELECT - t6.field_of_study, - t6.diff -FROM ( - SELECT - anon_1.field_of_study AS field_of_study, - anon_1.diff AS diff - FROM anon_1 - UNION ALL - SELECT - anon_2.field_of_study AS field_of_study, - anon_2.diff AS diff - FROM anon_2 -) AS t6 \ No newline at end of file +) AS t10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/conftest.py b/ibis/backends/tests/sql/conftest.py index b16b9fa48147..04667e60e033 100644 --- a/ibis/backends/tests/sql/conftest.py +++ b/ibis/backends/tests/sql/conftest.py @@ -3,11 +3,7 @@ import pytest import ibis - -pytest.importorskip("duckdb") - - -from ibis.tests.expr.mocks import MockBackend # noqa: E402 +from ibis.tests.expr.mocks import MockBackend @pytest.fixture(scope="module") @@ -70,13 +66,9 @@ def bar_t(con): return con.table("bar_t") -def get_query(expr): - ast = Compiler.to_ast(expr, QueryContext(compiler=Compiler)) - return ast.queries[0] - - def to_sql(expr, *args, **kwargs) -> str: - return get_query(expr).compile(*args, **kwargs) + pytest.importorskip("duckdb") + return str(ibis.to_sql(expr, *args, dialect="duckdb", **kwargs)) @pytest.fixture(scope="module") diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_and_non_agg_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_and_non_agg_filter/out.sql index f93e098292e7..4a8b9ef4cb44 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_and_non_agg_filter/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_and_non_agg_filter/out.sql @@ -1,9 +1,19 @@ -SELECT t0.* -FROM my_table t0 -WHERE (t0.`a` < 100) AND - (t0.`a` = ( - SELECT max(t0.`a`) AS `Max(a)` - FROM my_table t0 - WHERE t0.`a` < 100 -)) AND - (t0.`b` = 'a') \ No newline at end of file +SELECT + t0.a, + t0.b +FROM my_table AS t0 +WHERE + t0.a < CAST(100 AS TINYINT) + AND t0.a = ( + SELECT + MAX(t1.a) AS "Max(a)" + FROM ( + SELECT + t0.a, + t0.b + FROM my_table AS t0 + WHERE + t0.a < CAST(100 AS TINYINT) + ) AS t1 + ) + AND t0.b = 'a' \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter/out.sql index 631a18bd4253..37c5668f835b 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter/out.sql @@ -1,11 +1,18 @@ -WITH t0 AS ( - SELECT t1.*, t1.`b` * 2 AS `b2` - FROM my_table t1 -) -SELECT t0.`a`, t0.`b2` -FROM t0 -WHERE (t0.`a` < 100) AND - (t0.`a` = ( - SELECT max(t0.`a`) AS `Max(a)` - FROM t0 -)) \ No newline at end of file +SELECT + t0.a, + t0.b * CAST(2 AS TINYINT) AS b2 +FROM my_table AS t0 +WHERE + t0.a < CAST(100 AS TINYINT) + AND t0.a = ( + SELECT + MAX(t1.a) AS "Max(a)" + FROM ( + SELECT + t0.a, + t0.b * CAST(2 AS TINYINT) AS b2 + FROM my_table AS t0 + WHERE + t0.a < CAST(100 AS TINYINT) + ) AS t1 + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter_with_alias/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter_with_alias/out.sql index 631a18bd4253..37c5668f835b 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter_with_alias/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_agg_filter_with_alias/out.sql @@ -1,11 +1,18 @@ -WITH t0 AS ( - SELECT t1.*, t1.`b` * 2 AS `b2` - FROM my_table t1 -) -SELECT t0.`a`, t0.`b2` -FROM t0 -WHERE (t0.`a` < 100) AND - (t0.`a` = ( - SELECT max(t0.`a`) AS `Max(a)` - FROM t0 -)) \ No newline at end of file +SELECT + t0.a, + t0.b * CAST(2 AS TINYINT) AS b2 +FROM my_table AS t0 +WHERE + t0.a < CAST(100 AS TINYINT) + AND t0.a = ( + SELECT + MAX(t1.a) AS "Max(a)" + FROM ( + SELECT + t0.a, + t0.b * CAST(2 AS TINYINT) AS b2 + FROM my_table AS t0 + WHERE + t0.a < CAST(100 AS TINYINT) + ) AS t1 + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_column_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_distinct/out.sql index 91e2486414bc..d38aa10366c4 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_column_distinct/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_distinct/out.sql @@ -1,2 +1,7 @@ -SELECT DISTINCT t0.`string_col` -FROM functional_alltypes t0 \ No newline at end of file +SELECT DISTINCT + * +FROM ( + SELECT + t0.string_col + FROM functional_alltypes AS t0 +) AS t1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_default_name/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_default_name/out.sql index 28d25fb5beba..1e1635a607bf 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_default_name/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_default_name/out.sql @@ -1,2 +1,3 @@ -SELECT t0.`int_col` + 4 AS `Add(int_col, 4)` -FROM int_col_table t0 \ No newline at end of file +SELECT + t0.int_col + CAST(4 AS TINYINT) AS "Add(int_col, 4)" +FROM int_col_table AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/out.sql index 54f6988bbf2a..0237f96353c1 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_column_expr_retains_name/out.sql @@ -1,2 +1,3 @@ -SELECT t0.`int_col` + 4 AS `foo` -FROM int_col_table t0 \ No newline at end of file +SELECT + t0.int_col + CAST(4 AS TINYINT) AS foo +FROM int_col_table AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/decompiled.py index c42eaa8fc2d6..58b21a2c9c09 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/decompiled.py @@ -19,9 +19,6 @@ "month": "int32", }, ) +f = functional_alltypes.filter(functional_alltypes.bigint_col > 0) -result = ( - functional_alltypes.filter(functional_alltypes.bigint_col > 0) - .group_by(functional_alltypes.string_col) - .aggregate(functional_alltypes.int_col.nunique().name("nunique")) -) +result = f.aggregate([f.int_col.nunique().name("nunique")], by=[f.string_col]) diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/out.sql index 9ed4f1de1890..1203afe54f42 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_count_distinct/out.sql @@ -1,4 +1,24 @@ -SELECT t0.`string_col`, count(DISTINCT t0.`int_col`) AS `nunique` -FROM functional_alltypes t0 -WHERE t0.`bigint_col` > 0 -GROUP BY 1 \ No newline at end of file +SELECT + t1.string_col, + COUNT(DISTINCT t1.int_col) AS nunique +FROM ( + SELECT + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month + FROM functional_alltypes AS t0 + WHERE + t0.bigint_col > CAST(0 AS TINYINT) +) AS t1 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/decompiled.py index db9a8d152cb6..54b395185659 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/decompiled.py @@ -20,19 +20,12 @@ }, ) lit = ibis.literal(0) -alias = functional_alltypes.string_col.name("key") -difference = ( - functional_alltypes.select( - [alias, functional_alltypes.float_col.cast("float64").name("value")] - ) - .filter(functional_alltypes.int_col > lit) - .difference( - functional_alltypes.select( - [alias, functional_alltypes.double_col.name("value")] - ).filter(functional_alltypes.int_col <= lit), - distinct=True, - ) +f = functional_alltypes.filter(functional_alltypes.int_col > lit) +f1 = functional_alltypes.filter(functional_alltypes.int_col <= lit) +difference = f.select( + f.string_col.name("key"), f.float_col.cast("float64").name("value") +).difference( + f1.select(f1.string_col.name("key"), f1.double_col.name("value")), distinct=True ) -proj = difference.select([difference.key, difference.value]) -result = proj.select(proj.key) +result = difference.select(difference.key) diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/out.sql index df350603146b..65eba6a3a4c8 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_difference_project_column/out.sql @@ -1,21 +1,17 @@ -SELECT t0.`key` +SELECT + t3.key FROM ( - SELECT t1.`key`, t1.`value` - FROM ( - WITH t2 AS ( - SELECT t4.`string_col` AS `key`, t4.`double_col` AS `value` - FROM functional_alltypes t4 - WHERE t4.`int_col` <= 0 - ), - t3 AS ( - SELECT t4.`string_col` AS `key`, CAST(t4.`float_col` AS double) AS `value` - FROM functional_alltypes t4 - WHERE t4.`int_col` > 0 - ) - SELECT * - FROM t3 - EXCEPT - SELECT * - FROM t2 - ) t1 -) t0 \ No newline at end of file + SELECT + t0.string_col AS key, + CAST(t0.float_col AS DOUBLE) AS value + FROM functional_alltypes AS t0 + WHERE + t0.int_col > CAST(0 AS TINYINT) + EXCEPT + SELECT + t0.string_col AS key, + t0.double_col AS value + FROM functional_alltypes AS t0 + WHERE + t0.int_col <= CAST(0 AS TINYINT) +) AS t3 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/decompiled.py index 07cf4d8e959d..9da80d9792e9 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/decompiled.py @@ -2,10 +2,8 @@ t = ibis.table(name="t", schema={"a": "int64", "b": "string"}) +f = t.filter(t.b == "m") +agg = f.aggregate([f.a.sum().name("sum"), f.a.max()], by=[f.b]) +f1 = agg.filter(agg["Max(a)"] == 2) -result = ( - t.filter(t.b == "m") - .group_by(t.b) - .having(t.a.max() == 2) - .aggregate(t.a.sum().name("sum")) -) +result = f1.select(f1.b, f1.sum) diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/out.sql index 68f73aceff66..0043337a96ee 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_having_from_filter/out.sql @@ -1,5 +1,21 @@ -SELECT t0.`b`, sum(t0.`a`) AS `sum` -FROM t t0 -WHERE t0.`b` = 'm' -GROUP BY 1 -HAVING max(t0.`a`) = 2 \ No newline at end of file +SELECT + t2.b, + t2.sum +FROM ( + SELECT + t1.b, + SUM(t1.a) AS sum, + MAX(t1.a) AS "Max(a)" + FROM ( + SELECT + t0.a, + t0.b + FROM t AS t0 + WHERE + t0.b = 'm' + ) AS t1 + GROUP BY + 1 +) AS t2 +WHERE + t2."Max(a)" = CAST(2 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_having_size/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_having_size/out.sql index ebea652d49ca..6691834ce1b5 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_having_size/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_having_size/out.sql @@ -1,4 +1,14 @@ -SELECT t0.`string_col`, count(1) AS `CountStar(functional_alltypes)` -FROM functional_alltypes t0 -GROUP BY 1 -HAVING max(t0.`double_col`) = 1 \ No newline at end of file +SELECT + t1.string_col, + t1."CountStar()" +FROM ( + SELECT + t0.string_col, + COUNT(*) AS "CountStar()", + MAX(t0.double_col) AS "Max(double_col)" + FROM functional_alltypes AS t0 + GROUP BY + 1 +) AS t1 +WHERE + t1."Max(double_col)" = CAST(1 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/decompiled.py index 5118ece2e5f4..99cadb58de06 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/decompiled.py @@ -20,19 +20,12 @@ }, ) lit = ibis.literal(0) -alias = functional_alltypes.string_col.name("key") -intersection = ( - functional_alltypes.select( - [alias, functional_alltypes.float_col.cast("float64").name("value")] - ) - .filter(functional_alltypes.int_col > lit) - .intersect( - functional_alltypes.select( - [alias, functional_alltypes.double_col.name("value")] - ).filter(functional_alltypes.int_col <= lit), - distinct=True, - ) +f = functional_alltypes.filter(functional_alltypes.int_col > lit) +f1 = functional_alltypes.filter(functional_alltypes.int_col <= lit) +intersection = f.select( + f.string_col.name("key"), f.float_col.cast("float64").name("value") +).intersect( + f1.select(f1.string_col.name("key"), f1.double_col.name("value")), distinct=True ) -proj = intersection.select([intersection.key, intersection.value]) -result = proj.select(proj.key) +result = intersection.select(intersection.key) diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/out.sql index e849cd866523..7c3cbc2fdf12 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_intersect_project_column/out.sql @@ -1,21 +1,17 @@ -SELECT t0.`key` +SELECT + t3.key FROM ( - SELECT t1.`key`, t1.`value` - FROM ( - WITH t2 AS ( - SELECT t4.`string_col` AS `key`, t4.`double_col` AS `value` - FROM functional_alltypes t4 - WHERE t4.`int_col` <= 0 - ), - t3 AS ( - SELECT t4.`string_col` AS `key`, CAST(t4.`float_col` AS double) AS `value` - FROM functional_alltypes t4 - WHERE t4.`int_col` > 0 - ) - SELECT * - FROM t3 - INTERSECT - SELECT * - FROM t2 - ) t1 -) t0 \ No newline at end of file + SELECT + t0.string_col AS key, + CAST(t0.float_col AS DOUBLE) AS value + FROM functional_alltypes AS t0 + WHERE + t0.int_col > CAST(0 AS TINYINT) + INTERSECT + SELECT + t0.string_col AS key, + t0.double_col AS value + FROM functional_alltypes AS t0 + WHERE + t0.int_col <= CAST(0 AS TINYINT) +) AS t3 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/decompiled.py index 38bfb40e35aa..223cfa46792a 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/decompiled.py @@ -20,9 +20,10 @@ }, ) -result = functional_alltypes.group_by(functional_alltypes.string_col).aggregate( +result = functional_alltypes.aggregate( [ functional_alltypes.int_col.nunique().name("int_card"), functional_alltypes.smallint_col.nunique().name("smallint_card"), - ] + ], + by=[functional_alltypes.string_col], ) diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/out.sql index 97baaa4d2137..cbb6ac1079a3 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_multiple_count_distinct/out.sql @@ -1,4 +1,7 @@ -SELECT t0.`string_col`, count(DISTINCT t0.`int_col`) AS `int_card`, - count(DISTINCT t0.`smallint_col`) AS `smallint_card` -FROM functional_alltypes t0 -GROUP BY 1 \ No newline at end of file +SELECT + t0.string_col, + COUNT(DISTINCT t0.int_col) AS int_card, + COUNT(DISTINCT t0.smallint_col) AS smallint_card +FROM functional_alltypes AS t0 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_pushdown_with_or/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_pushdown_with_or/out.sql index c295debd740a..45fa51a79cba 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_pushdown_with_or/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_pushdown_with_or/out.sql @@ -1,5 +1,19 @@ -SELECT t0.* -FROM functional_alltypes t0 -WHERE (t0.`double_col` > 3.14) AND - (locate('foo', t0.`string_col`) - 1 >= 0) AND - (((t0.`int_col` - 1) = 0) OR (t0.`float_col` <= 1.34)) \ No newline at end of file +SELECT + t0.double_col, + t0.string_col, + t0.int_col, + t0.float_col +FROM functional_alltypes AS t0 +WHERE + t0.double_col > CAST(3.14 AS DOUBLE) + AND CONTAINS(t0.string_col, 'foo') + AND ( + ( + ( + t0.int_col - CAST(1 AS TINYINT) + ) = CAST(0 AS TINYINT) + ) + OR ( + t0.float_col <= CAST(1.34 AS DOUBLE) + ) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_simple_agg_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_simple_agg_filter/out.sql index d0e1564bcf77..c1410ecdac47 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_simple_agg_filter/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_simple_agg_filter/out.sql @@ -1,8 +1,18 @@ -SELECT t0.* -FROM my_table t0 -WHERE (t0.`a` < 100) AND - (t0.`a` = ( - SELECT max(t0.`a`) AS `Max(a)` - FROM my_table t0 - WHERE t0.`a` < 100 -)) \ No newline at end of file +SELECT + t0.a, + t0.b +FROM my_table AS t0 +WHERE + t0.a < CAST(100 AS TINYINT) + AND t0.a = ( + SELECT + MAX(t1.a) AS "Max(a)" + FROM ( + SELECT + t0.a, + t0.b + FROM my_table AS t0 + WHERE + t0.a < CAST(100 AS TINYINT) + ) AS t1 + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_subquery_where_location/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_subquery_where_location/decompiled.py index a18437fa4d10..aef3bd85e809 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_subquery_where_location/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_subquery_where_location/decompiled.py @@ -11,9 +11,7 @@ }, ) param = ibis.param("timestamp") -proj = alltypes.select( - [alltypes.float_col, alltypes.timestamp_col, alltypes.int_col, alltypes.string_col] -).filter(alltypes.timestamp_col < param.name("my_param")) -agg = proj.group_by(proj.string_col).aggregate(proj.float_col.sum().name("foo")) +f = alltypes.filter(alltypes.timestamp_col < param.name("my_param")) +agg = f.aggregate([f.float_col.sum().name("foo")], by=[f.string_col]) result = agg.foo.count() diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_subquery_where_location/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_subquery_where_location/out.sql index 40f4523b6453..7ceaa3d33e79 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_subquery_where_location/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_subquery_where_location/out.sql @@ -1,11 +1,19 @@ -WITH t0 AS ( - SELECT t2.`float_col`, t2.`timestamp_col`, t2.`int_col`, t2.`string_col` - FROM alltypes t2 - WHERE t2.`timestamp_col` < '2014-01-01T00:00:00' -) -SELECT count(t1.`foo`) AS `Count(foo)` +SELECT + COUNT(t2.foo) AS "Count(foo)" FROM ( - SELECT t0.`string_col`, sum(t0.`float_col`) AS `foo` - FROM t0 - GROUP BY 1 -) t1 \ No newline at end of file + SELECT + t1.string_col, + SUM(t1.float_col) AS foo + FROM ( + SELECT + t0.float_col, + t0.timestamp_col, + t0.int_col, + t0.string_col + FROM alltypes AS t0 + WHERE + t0.timestamp_col < MAKE_TIMESTAMP(2014, 1, 1, 0, 0, 0.0) + ) AS t1 + GROUP BY + 1 +) AS t2 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/decompiled.py index 4c39ab24b66c..d5fec08f28bd 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/decompiled.py @@ -20,18 +20,11 @@ }, ) lit = ibis.literal(0) -alias = functional_alltypes.string_col.name("key") -difference = ( - functional_alltypes.select( - [alias, functional_alltypes.float_col.cast("float64").name("value")] - ) - .filter(functional_alltypes.int_col > lit) - .difference( - functional_alltypes.select( - [alias, functional_alltypes.double_col.name("value")] - ).filter(functional_alltypes.int_col <= lit), - distinct=True, - ) -) +f = functional_alltypes.filter(functional_alltypes.int_col > lit) +f1 = functional_alltypes.filter(functional_alltypes.int_col <= lit) -result = difference.select([difference.key, difference.value]) +result = f.select( + f.string_col.name("key"), f.float_col.cast("float64").name("value") +).difference( + f1.select(f1.string_col.name("key"), f1.double_col.name("value")), distinct=True +) diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/out.sql index dc7592273ce4..4d00f47c36b0 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_difference/out.sql @@ -1,18 +1,18 @@ -SELECT t0.`key`, t0.`value` +SELECT + t3.key, + t3.value FROM ( - WITH t1 AS ( - SELECT t3.`string_col` AS `key`, t3.`double_col` AS `value` - FROM functional_alltypes t3 - WHERE t3.`int_col` <= 0 - ), - t2 AS ( - SELECT t3.`string_col` AS `key`, CAST(t3.`float_col` AS double) AS `value` - FROM functional_alltypes t3 - WHERE t3.`int_col` > 0 - ) - SELECT * - FROM t2 + SELECT + t0.string_col AS key, + CAST(t0.float_col AS DOUBLE) AS value + FROM functional_alltypes AS t0 + WHERE + t0.int_col > CAST(0 AS TINYINT) EXCEPT - SELECT * - FROM t1 -) t0 \ No newline at end of file + SELECT + t0.string_col AS key, + t0.double_col AS value + FROM functional_alltypes AS t0 + WHERE + t0.int_col <= CAST(0 AS TINYINT) +) AS t3 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/decompiled.py index 3b35a0e24ef2..76016cf2fc5f 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/decompiled.py @@ -21,5 +21,5 @@ ) result = functional_alltypes.select( - [functional_alltypes.string_col, functional_alltypes.int_col] + functional_alltypes.string_col, functional_alltypes.int_col ).distinct() diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/out.sql index b0aa492cb968..483b4fef6f49 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_distinct/out.sql @@ -1,2 +1,8 @@ -SELECT DISTINCT t0.`string_col`, t0.`int_col` -FROM functional_alltypes t0 \ No newline at end of file +SELECT DISTINCT + * +FROM ( + SELECT + t0.string_col, + t0.int_col + FROM functional_alltypes AS t0 +) AS t1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/decompiled.py index bcacc9049a93..143198197ad6 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/decompiled.py @@ -1,12 +1,14 @@ import ibis -s = ibis.table(name="s", schema={"b": "string"}) lit = ibis.timestamp("2018-01-01 00:00:00") +s = ibis.table(name="s", schema={"b": "string"}) t = ibis.table(name="t", schema={"a": "int64", "b": "string", "c": "timestamp"}) -proj = t.select([t.a, t.b, t.c.name("C")]) -proj1 = proj.filter(proj.C == lit) -proj2 = proj1.select([proj1.a, proj1.b, lit.name("the_date")]) -proj3 = proj2.inner_join(s, proj2.b == s.b).select(proj2.a) +f = t.filter(t.c == lit) +joinchain = ( + f.select(f.a, f.b, lit.name("the_date")) + .inner_join(s, f.select(f.a, f.b, lit.name("the_date")).b == s.b) + .select(f.select(f.a, f.b, lit.name("the_date")).a) +) -result = proj3.filter(proj3.a < 1.0) +result = joinchain.filter(joinchain.a < 1.0) diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/out.sql index fc58d3195c63..bf3eedb03752 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_drop_with_filter/out.sql @@ -1,21 +1,19 @@ -WITH t0 AS ( - SELECT t4.`a`, t4.`b`, t4.`c` AS `C` - FROM t t4 -), -t1 AS ( - SELECT t0.* - FROM t0 - WHERE t0.`C` = '2018-01-01T00:00:00' -), -t2 AS ( - SELECT t1.`a`, t1.`b`, '2018-01-01T00:00:00' AS `the_date` - FROM t1 -) -SELECT t3.* +SELECT + t6.a FROM ( - SELECT t2.`a` - FROM t2 - INNER JOIN s t4 - ON t2.`b` = t4.`b` -) t3 -WHERE t3.`a` < 1.0 \ No newline at end of file + SELECT + t4.a + FROM ( + SELECT + t0.a, + t0.b, + MAKE_TIMESTAMP(2018, 1, 1, 0, 0, 0.0) AS the_date + FROM t AS t0 + WHERE + t0.c = MAKE_TIMESTAMP(2018, 1, 1, 0, 0, 0.0) + ) AS t4 + INNER JOIN s AS t2 + ON t4.b = t2.b +) AS t6 +WHERE + t6.a < CAST(1.0 AS DOUBLE) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/decompiled.py index 1f9d05ae454a..17014ebe6802 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/decompiled.py @@ -20,18 +20,11 @@ }, ) lit = ibis.literal(0) -alias = functional_alltypes.string_col.name("key") -intersection = ( - functional_alltypes.select( - [alias, functional_alltypes.float_col.cast("float64").name("value")] - ) - .filter(functional_alltypes.int_col > lit) - .intersect( - functional_alltypes.select( - [alias, functional_alltypes.double_col.name("value")] - ).filter(functional_alltypes.int_col <= lit), - distinct=True, - ) -) +f = functional_alltypes.filter(functional_alltypes.int_col > lit) +f1 = functional_alltypes.filter(functional_alltypes.int_col <= lit) -result = intersection.select([intersection.key, intersection.value]) +result = f.select( + f.string_col.name("key"), f.float_col.cast("float64").name("value") +).intersect( + f1.select(f1.string_col.name("key"), f1.double_col.name("value")), distinct=True +) diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/out.sql index dd58f56759e2..72a05ef24e92 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_table_intersect/out.sql @@ -1,18 +1,18 @@ -SELECT t0.`key`, t0.`value` +SELECT + t3.key, + t3.value FROM ( - WITH t1 AS ( - SELECT t3.`string_col` AS `key`, t3.`double_col` AS `value` - FROM functional_alltypes t3 - WHERE t3.`int_col` <= 0 - ), - t2 AS ( - SELECT t3.`string_col` AS `key`, CAST(t3.`float_col` AS double) AS `value` - FROM functional_alltypes t3 - WHERE t3.`int_col` > 0 - ) - SELECT * - FROM t2 + SELECT + t0.string_col AS key, + CAST(t0.float_col AS DOUBLE) AS value + FROM functional_alltypes AS t0 + WHERE + t0.int_col > CAST(0 AS TINYINT) INTERSECT - SELECT * - FROM t1 -) t0 \ No newline at end of file + SELECT + t0.string_col AS key, + t0.double_col AS value + FROM functional_alltypes AS t0 + WHERE + t0.int_col <= CAST(0 AS TINYINT) +) AS t3 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_union/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_union/decompiled.py index d0e56bad6b4f..ea48ae1fd416 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_union/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_union/decompiled.py @@ -20,18 +20,11 @@ }, ) lit = ibis.literal(0) -alias = functional_alltypes.string_col.name("key") -union = ( - functional_alltypes.select( - [alias, functional_alltypes.float_col.cast("float64").name("value")] - ) - .filter(functional_alltypes.int_col > lit) - .union( - functional_alltypes.select( - [alias, functional_alltypes.double_col.name("value")] - ).filter(functional_alltypes.int_col <= lit), - distinct=True, - ) -) +f = functional_alltypes.filter(functional_alltypes.int_col > lit) +f1 = functional_alltypes.filter(functional_alltypes.int_col <= lit) -result = union.select([union.key, union.value]) +result = f.select( + f.string_col.name("key"), f.float_col.cast("float64").name("value") +).union( + f1.select(f1.string_col.name("key"), f1.double_col.name("value")), distinct=True +) diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_union/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_union/out.sql index d585d437cbe9..0bf62fa423da 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_union/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_union/out.sql @@ -1,18 +1,18 @@ -SELECT t0.`key`, t0.`value` +SELECT + t3.key, + t3.value FROM ( - WITH t1 AS ( - SELECT t3.`string_col` AS `key`, t3.`double_col` AS `value` - FROM functional_alltypes t3 - WHERE t3.`int_col` <= 0 - ), - t2 AS ( - SELECT t3.`string_col` AS `key`, CAST(t3.`float_col` AS double) AS `value` - FROM functional_alltypes t3 - WHERE t3.`int_col` > 0 - ) - SELECT * - FROM t2 + SELECT + t0.string_col AS key, + CAST(t0.float_col AS DOUBLE) AS value + FROM functional_alltypes AS t0 + WHERE + t0.int_col > CAST(0 AS TINYINT) UNION - SELECT * - FROM t1 -) t0 \ No newline at end of file + SELECT + t0.string_col AS key, + t0.double_col AS value + FROM functional_alltypes AS t0 + WHERE + t0.int_col <= CAST(0 AS TINYINT) +) AS t3 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/decompiled.py index 3731f3449a83..8ad01cf538e5 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/decompiled.py @@ -2,7 +2,6 @@ t = ibis.table(name="t", schema={"a": "int64", "b": "string"}) -proj = t.order_by(t.b.asc()) -union = proj.union(proj) +s = t.order_by(t.b.asc()) -result = union.select([union.a, union.b]) +result = s.union(s) diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/out.sql index 3951fb9c74f2..dda59184ba53 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_order_by/out.sql @@ -1,13 +1,18 @@ -SELECT t0.`a`, t0.`b` +SELECT + t2.a, + t2.b FROM ( - WITH t1 AS ( - SELECT t2.* - FROM t t2 - ORDER BY t2.`b` ASC - ) - SELECT * - FROM t1 + SELECT + t0.a, + t0.b + FROM t AS t0 + ORDER BY + t0.b ASC UNION ALL - SELECT * - FROM t1 -) t0 \ No newline at end of file + SELECT + t0.a, + t0.b + FROM t AS t0 + ORDER BY + t0.b ASC +) AS t2 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/decompiled.py b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/decompiled.py index 66cfa539b334..7c33ae58b4db 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/decompiled.py @@ -20,18 +20,10 @@ }, ) lit = ibis.literal(0) -alias = functional_alltypes.string_col.name("key") -union = ( - functional_alltypes.select( - [alias, functional_alltypes.float_col.cast("float64").name("value")] - ) - .filter(functional_alltypes.int_col > lit) - .union( - functional_alltypes.select( - [alias, functional_alltypes.double_col.name("value")] - ).filter(functional_alltypes.int_col <= lit) - ) -) -proj = union.select([union.key, union.value]) +f = functional_alltypes.filter(functional_alltypes.int_col > lit) +f1 = functional_alltypes.filter(functional_alltypes.int_col <= lit) +union = f.select( + f.string_col.name("key"), f.float_col.cast("float64").name("value") +).union(f1.select(f1.string_col.name("key"), f1.double_col.name("value"))) -result = proj.select(proj.key) +result = union.select(union.key) diff --git a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/out.sql b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/out.sql index d8d9874335b7..b2fb8620109b 100644 --- a/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_compiler/test_union_project_column/out.sql @@ -1,21 +1,17 @@ -SELECT t0.`key` +SELECT + t3.key FROM ( - SELECT t1.`key`, t1.`value` - FROM ( - WITH t2 AS ( - SELECT t4.`string_col` AS `key`, t4.`double_col` AS `value` - FROM functional_alltypes t4 - WHERE t4.`int_col` <= 0 - ), - t3 AS ( - SELECT t4.`string_col` AS `key`, CAST(t4.`float_col` AS double) AS `value` - FROM functional_alltypes t4 - WHERE t4.`int_col` > 0 - ) - SELECT * - FROM t3 - UNION ALL - SELECT * - FROM t2 - ) t1 -) t0 \ No newline at end of file + SELECT + t0.string_col AS key, + CAST(t0.float_col AS DOUBLE) AS value + FROM functional_alltypes AS t0 + WHERE + t0.int_col > CAST(0 AS TINYINT) + UNION ALL + SELECT + t0.string_col AS key, + t0.double_col AS value + FROM functional_alltypes AS t0 + WHERE + t0.int_col <= CAST(0 AS TINYINT) +) AS t3 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/decompiled.py index c964df0eda21..2cf13f7c2cc1 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/decompiled.py @@ -1,6 +1,10 @@ import ibis +tpch_region = ibis.table( + name="tpch_region", + schema={"r_regionkey": "int16", "r_name": "string", "r_comment": "string"}, +) tpch_nation = ibis.table( name="tpch_nation", schema={ @@ -10,15 +14,17 @@ "n_comment": "string", }, ) -tpch_region = ibis.table( - name="tpch_region", - schema={"r_regionkey": "int16", "r_name": "string", "r_comment": "string"}, -) result = ( tpch_region.inner_join( tpch_nation, tpch_region.r_regionkey == tpch_nation.n_regionkey ) - .select([tpch_nation, tpch_region.r_name.name("region")]) + .select( + tpch_nation.n_nationkey, + tpch_nation.n_name, + tpch_nation.n_regionkey, + tpch_nation.n_comment, + tpch_region.r_name.name("region"), + ) .count() ) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/out.sql index d96890d7589e..3011cdb409b4 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_count_joined/out.sql @@ -1,7 +1,13 @@ -SELECT count(1) AS `CountStar()` +SELECT + COUNT(*) AS "CountStar()" FROM ( - SELECT t2.*, t1.`r_name` AS `region` - FROM tpch_region t1 - INNER JOIN tpch_nation t2 - ON t1.`r_regionkey` = t2.`n_regionkey` -) t0 \ No newline at end of file + SELECT + t3.n_nationkey, + t3.n_name, + t3.n_regionkey, + t3.n_comment, + t2.r_name AS region + FROM tpch_region AS t2 + INNER JOIN tpch_nation AS t3 + ON t2.r_regionkey = t3.n_regionkey +) AS t5 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/explicit.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/explicit.sql index aae031ab5be6..47945167c00a 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/explicit.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/explicit.sql @@ -1,4 +1,13 @@ -SELECT t0.`foo_id`, sum(t0.`f`) AS `total` -FROM star1 t0 -GROUP BY 1 -HAVING sum(t0.`f`) > 10 \ No newline at end of file +SELECT + t1.foo_id, + t1.total +FROM ( + SELECT + t0.foo_id, + SUM(t0.f) AS total + FROM star1 AS t0 + GROUP BY + 1 +) AS t1 +WHERE + t1.total > CAST(10 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/inline.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/inline.sql index 72cf2f0a1f54..35e4fe0adc24 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/inline.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_having/inline.sql @@ -1,4 +1,14 @@ -SELECT t0.`foo_id`, sum(t0.`f`) AS `total` -FROM star1 t0 -GROUP BY 1 -HAVING count(1) > 100 \ No newline at end of file +SELECT + t1.foo_id, + t1.total +FROM ( + SELECT + t0.foo_id, + SUM(t0.f) AS total, + COUNT(*) AS "CountStar()" + FROM star1 AS t0 + GROUP BY + 1 +) AS t1 +WHERE + t1."CountStar()" > CAST(100 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_alias_bug/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_alias_bug/out.sql index 3d6905b14e28..a15f1a8cce7e 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_alias_bug/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_alias_bug/out.sql @@ -1,8 +1,16 @@ -SELECT t0.`foo_id`, sum(t0.`value1`) AS `total` +SELECT + t5.foo_id, + SUM(t5.value1) AS total FROM ( - SELECT t1.*, t2.`value1` - FROM star1 t1 - INNER JOIN star2 t2 - ON t1.`foo_id` = t2.`foo_id` -) t0 -GROUP BY 1 \ No newline at end of file + SELECT + t2.c, + t2.f, + t2.foo_id, + t2.bar_id, + t3.value1 + FROM star1 AS t2 + INNER JOIN star2 AS t3 + ON t2.foo_id = t3.foo_id +) AS t5 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered.sql index 4f6df3806a2d..82d666c54c3f 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered.sql @@ -1,8 +1,23 @@ -SELECT t0.`g`, sum(t0.`foo`) AS `foo total` +SELECT + t1.g, + SUM(t1.foo) AS "foo total" FROM ( - SELECT t1.*, t1.`a` + t1.`b` AS `foo` - FROM alltypes t1 - WHERE (t1.`f` > 0) AND - (t1.`g` = 'bar') -) t0 -GROUP BY 1 \ No newline at end of file + SELECT + t0.a, + t0.b, + t0.c, + t0.d, + t0.e, + t0.f, + t0.g, + t0.h, + t0.i, + t0.j, + t0.k, + t0.a + t0.b AS foo + FROM alltypes AS t0 + WHERE + t0.f > CAST(0 AS TINYINT) AND t0.g = 'bar' +) AS t1 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered2.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered2.sql index b86f82d6b137..32772c5a969d 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered2.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/agg_filtered2.sql @@ -1,8 +1,25 @@ -SELECT t0.`g`, sum(t0.`foo`) AS `foo total` +SELECT + t1.g, + SUM(t1.foo) AS "foo total" FROM ( - SELECT t1.*, t1.`a` + t1.`b` AS `foo` - FROM alltypes t1 - WHERE t1.`f` > 0 -) t0 -WHERE t0.`foo` < 10 -GROUP BY 1 \ No newline at end of file + SELECT + t0.a, + t0.b, + t0.c, + t0.d, + t0.e, + t0.f, + t0.g, + t0.h, + t0.i, + t0.j, + t0.k, + t0.a + t0.b AS foo + FROM alltypes AS t0 + WHERE + t0.f > CAST(0 AS TINYINT) AND ( + t0.a + t0.b + ) < CAST(10 AS TINYINT) +) AS t1 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/filtered.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/filtered.sql index c5d9cb1217ce..7407d5fcf78b 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/filtered.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/filtered.sql @@ -1,4 +1,16 @@ -SELECT t0.*, t0.`a` + t0.`b` AS `foo` -FROM alltypes t0 -WHERE (t0.`f` > 0) AND - (t0.`g` = 'bar') \ No newline at end of file +SELECT + t0.a, + t0.b, + t0.c, + t0.d, + t0.e, + t0.f, + t0.g, + t0.h, + t0.i, + t0.j, + t0.k, + t0.a + t0.b AS foo +FROM alltypes AS t0 +WHERE + t0.f > CAST(0 AS TINYINT) AND t0.g = 'bar' \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/proj.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/proj.sql index dfb892809a35..0917ea29aae1 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/proj.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_aggregate_projection_subquery/proj.sql @@ -1,3 +1,16 @@ -SELECT t0.*, t0.`a` + t0.`b` AS `foo` -FROM alltypes t0 -WHERE t0.`f` > 0 \ No newline at end of file +SELECT + t0.a, + t0.b, + t0.c, + t0.d, + t0.e, + t0.f, + t0.g, + t0.h, + t0.i, + t0.j, + t0.k, + t0.a + t0.b AS foo +FROM alltypes AS t0 +WHERE + t0.f > CAST(0 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/decompiled.py index fde5a5fce9b8..dc83d63276ea 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/decompiled.py @@ -9,4 +9,6 @@ name="star2", schema={"foo_id": "string", "value1": "float64", "value3": "float64"} ) -result = star1.anti_join(star2, star1.foo_id == star2.foo_id).select(star1) +result = star1.anti_join(star2, star1.foo_id == star2.foo_id).select( + star1.c, star1.f, star1.foo_id, star1.bar_id +) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/out.sql index 971fd1985c21..f2ef0d0d3f67 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_anti_join/out.sql @@ -1,4 +1,8 @@ -SELECT t0.* -FROM star1 t0 - LEFT ANTI JOIN star2 t1 - ON t0.`foo_id` = t1.`foo_id` \ No newline at end of file +SELECT + t2.c, + t2.f, + t2.foo_id, + t2.bar_id +FROM star1 AS t2 +ANTI JOIN star2 AS t3 + ON t2.foo_id = t3.foo_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bool_bool/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bool_bool/out.sql index 35e0559cb01e..b110ecf4f3b8 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bool_bool/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bool_bool/out.sql @@ -1,3 +1,9 @@ -SELECT t0.* -FROM airlines t0 -WHERE (CAST(t0.`dest` AS bigint) = 0) = TRUE \ No newline at end of file +SELECT + t0.dest, + t0.origin, + t0.arrdelay +FROM airlines AS t0 +WHERE + ( + CAST(t0.dest AS BIGINT) = CAST(0 AS TINYINT) + ) = TRUE \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_duplicated_where/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_duplicated_where/out.sql index 59632deb743c..173b6323b243 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_duplicated_where/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_duplicated_where/out.sql @@ -1,17 +1,23 @@ -WITH t0 AS ( - SELECT t3.`arrdelay`, t3.`dest` - FROM airlines t3 -), -t1 AS ( - SELECT t0.*, avg(t0.`arrdelay`) OVER (PARTITION BY t0.`dest`) AS `dest_avg`, - t0.`arrdelay` - avg(t0.`arrdelay`) OVER (PARTITION BY t0.`dest`) AS `dev` - FROM t0 -) -SELECT t2.* +SELECT + t2.arrdelay, + t2.dest, + t2.dest_avg, + t2.dev FROM ( - SELECT t1.* - FROM t1 - WHERE t1.`dev` IS NOT NULL -) t2 -ORDER BY t2.`dev` DESC + SELECT + t1.arrdelay, + t1.dest, + AVG(t1.arrdelay) OVER (PARTITION BY t1.dest ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS dest_avg, + t1.arrdelay - AVG(t1.arrdelay) OVER (PARTITION BY t1.dest ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS dev + FROM ( + SELECT + t0.arrdelay, + t0.dest + FROM airlines AS t0 + ) AS t1 +) AS t2 +WHERE + NOT t2.dev IS NULL +ORDER BY + t2.dev DESC LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_project_multiple_times/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_project_multiple_times/out.sql index 7d1f43968acb..88011a4c2cf9 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_project_multiple_times/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_project_multiple_times/out.sql @@ -1,24 +1,43 @@ -WITH t0 AS ( - SELECT t3.*, t4.`n_name`, t5.`r_name` - FROM tpch_customer t3 - INNER JOIN tpch_nation t4 - ON t3.`c_nationkey` = t4.`n_nationkey` - INNER JOIN tpch_region t5 - ON t4.`n_regionkey` = t5.`r_regionkey` -), -t1 AS ( - SELECT t0.`n_name`, - sum(CAST(t0.`c_acctbal` AS double)) AS `Sum(Cast(c_acctbal, float64))` - FROM t0 - GROUP BY 1 -), -t2 AS ( - SELECT t1.* - FROM t1 - ORDER BY t1.`Sum(Cast(c_acctbal, float64))` DESC +SELECT + t3.c_name, + t5.r_name, + t4.n_name +FROM tpch_customer AS t3 +INNER JOIN tpch_nation AS t4 + ON t3.c_nationkey = t4.n_nationkey +INNER JOIN tpch_region AS t5 + ON t4.n_regionkey = t5.r_regionkey +SEMI JOIN ( + SELECT + t9.n_name, + t9."Sum(Cast(c_acctbal, float64))" + FROM ( + SELECT + t8.n_name, + SUM(CAST(t8.c_acctbal AS DOUBLE)) AS "Sum(Cast(c_acctbal, float64))" + FROM ( + SELECT + t3.c_custkey, + t3.c_name, + t3.c_address, + t3.c_nationkey, + t3.c_phone, + t3.c_acctbal, + t3.c_mktsegment, + t3.c_comment, + t4.n_name, + t5.r_name + FROM tpch_customer AS t3 + INNER JOIN tpch_nation AS t4 + ON t3.c_nationkey = t4.n_nationkey + INNER JOIN tpch_region AS t5 + ON t4.n_regionkey = t5.r_regionkey + ) AS t8 + GROUP BY + 1 + ) AS t9 + ORDER BY + t9."Sum(Cast(c_acctbal, float64))" DESC LIMIT 10 -) -SELECT t0.`c_name`, t0.`r_name`, t0.`n_name` -FROM t0 - LEFT SEMI JOIN t2 - ON t0.`n_name` = t2.`n_name` \ No newline at end of file +) AS t12 + ON t4.n_name = t12.n_name \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/decompiled.py index b438d3207191..6058efaa962e 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/decompiled.py @@ -22,19 +22,27 @@ lit2 = ibis.literal("bar") result = alltypes.select( - [ - alltypes.g.case() - .when(lit, lit2) - .when(lit1, ibis.literal("qux")) - .else_(ibis.literal("default")) - .end() - .name("col1"), - ibis.case() - .when(alltypes.g == lit, lit2) - .when(alltypes.g == lit1, alltypes.g) - .else_(ibis.literal(None).cast("string")) - .end() - .name("col2"), - alltypes, - ] + alltypes.g.case() + .when(lit, lit2) + .when(lit1, ibis.literal("qux")) + .else_(ibis.literal("default")) + .end() + .name("col1"), + ibis.case() + .when(alltypes.g == lit, lit2) + .when(alltypes.g == lit1, alltypes.g) + .else_(ibis.literal(None).cast("string")) + .end() + .name("col2"), + alltypes.a, + alltypes.b, + alltypes.c, + alltypes.d, + alltypes.e, + alltypes.f, + alltypes.g, + alltypes.h, + alltypes.i, + alltypes.j, + alltypes.k, ) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/out.sql index da032855d0de..4b25c0cd6d1e 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_case_in_projection/out.sql @@ -1,12 +1,21 @@ SELECT - CASE t0.`g` - WHEN 'foo' THEN 'bar' - WHEN 'baz' THEN 'qux' - ELSE 'default' - END AS `col1`, + CASE t0.g WHEN 'foo' THEN 'bar' WHEN 'baz' THEN 'qux' ELSE 'default' END AS col1, CASE - WHEN t0.`g` = 'foo' THEN 'bar' - WHEN t0.`g` = 'baz' THEN t0.`g` - ELSE CAST(NULL AS string) - END AS `col2`, t0.* -FROM alltypes t0 \ No newline at end of file + WHEN t0.g = 'foo' + THEN 'bar' + WHEN t0.g = 'baz' + THEN t0.g + ELSE CAST(NULL AS TEXT) + END AS col2, + t0.a, + t0.b, + t0.c, + t0.d, + t0.e, + t0.f, + t0.g, + t0.h, + t0.i, + t0.j, + t0.k +FROM alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_chain_limit_doesnt_collapse/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_chain_limit_doesnt_collapse/result.sql index e25947808580..df7349fd3de0 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_chain_limit_doesnt_collapse/result.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_chain_limit_doesnt_collapse/result.sql @@ -1,18 +1,39 @@ -WITH t0 AS ( - SELECT t2.`city`, count(t2.`city`) AS `Count(city)` - FROM tbl t2 - GROUP BY 1 -) -SELECT t1.* +SELECT + * FROM ( - SELECT t0.* - FROM t0 - ORDER BY t0.`Count(city)` DESC + SELECT + t1.city, + t1."Count(city)" + FROM ( + SELECT + t0.city, + COUNT(t0.city) AS "Count(city)" + FROM tbl AS t0 + GROUP BY + 1 + ) AS t1 + ORDER BY + t1."Count(city)" DESC LIMIT 10 -) t1 -LIMIT 5 OFFSET (SELECT count(1) + -5 FROM ( - SELECT t0.* - FROM t0 - ORDER BY t0.`Count(city)` DESC - LIMIT 10 -) t1) \ No newline at end of file +) AS t3 +LIMIT 5 +OFFSET ( + SELECT + COUNT(*) + CAST(-5 AS TINYINT) + FROM ( + SELECT + t1.city, + t1."Count(city)" + FROM ( + SELECT + t0.city, + COUNT(t0.city) AS "Count(city)" + FROM tbl AS t0 + GROUP BY + 1 + ) AS t1 + ORDER BY + t1."Count(city)" DESC + LIMIT 10 + ) AS t3 +) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_complex_union/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_complex_union/result.sql index 09660f6902cc..57f027a897ec 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_complex_union/result.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_complex_union/result.sql @@ -1,48 +1,14 @@ -WITH t0 AS ( - SELECT t5.`diag`, t5.`status` - FROM aids2_two t5 -), -t1 AS ( - SELECT t5.`diag`, t5.`status` - FROM aids2_one t5 -), -t2 AS ( - SELECT t0.`diag` + 1 AS `diag`, t0.`status` - FROM t0 -), -t3 AS ( - SELECT t1.`diag` + 1 AS `diag`, t1.`status` - FROM t1 -) -SELECT t4.`diag`, t4.`status` +SELECT + t4.diag, + t4.status FROM ( - WITH t0 AS ( - SELECT t5.`diag`, t5.`status` - FROM aids2_two t5 - ), - t1 AS ( - SELECT t5.`diag`, t5.`status` - FROM aids2_one t5 - ), - t2 AS ( - SELECT t0.`diag` + 1 AS `diag`, t0.`status` - FROM t0 - ), - t3 AS ( - SELECT t1.`diag` + 1 AS `diag`, t1.`status` - FROM t1 - ), - t5 AS ( - SELECT CAST(t2.`diag` AS int) AS `diag`, t2.`status` - FROM t2 - ), - t6 AS ( - SELECT CAST(t3.`diag` AS int) AS `diag`, t3.`status` - FROM t3 - ) - SELECT * - FROM t6 + SELECT + CAST(t0.diag + CAST(1 AS TINYINT) AS INT) AS diag, + t0.status + FROM aids2_one AS t0 UNION ALL - SELECT * - FROM t5 -) t4 \ No newline at end of file + SELECT + CAST(t1.diag + CAST(1 AS TINYINT) AS INT) AS diag, + t1.status + FROM aids2_two AS t1 +) AS t4 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_double_nested_subquery_no_aliases/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_double_nested_subquery_no_aliases/out.sql index cdbb5aa93918..6578a858c971 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_double_nested_subquery_no_aliases/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_double_nested_subquery_no_aliases/out.sql @@ -1,12 +1,26 @@ -WITH t0 AS ( - SELECT t2.`key1`, t2.`key2`, t2.`key3`, sum(t2.`value`) AS `total` - FROM foo_table t2 - GROUP BY 1, 2, 3 -) -SELECT t1.`key1`, sum(t1.`total`) AS `total` +SELECT + t2.key1, + SUM(t2.total) AS total FROM ( - SELECT t0.`key1`, t0.`key2`, sum(t0.`total`) AS `total` - FROM t0 - GROUP BY 1, 2 -) t1 -GROUP BY 1 \ No newline at end of file + SELECT + t1.key1, + t1.key2, + SUM(t1.total) AS total + FROM ( + SELECT + t0.key1, + t0.key2, + t0.key3, + SUM(t0.value) AS total + FROM foo_table AS t0 + GROUP BY + 1, + 2, + 3 + ) AS t1 + GROUP BY + 1, + 2 +) AS t2 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/decompiled.py index a4ca0e75b920..23f7c5d41601 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/decompiled.py @@ -1,11 +1,9 @@ import ibis -result = ( - ibis.table( - name="star1", - schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, - ) - .foo_id.endswith(ibis.literal("foo")) - .name("tmp") +star1 = ibis.table( + name="star1", + schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, ) + +result = star1.foo_id.endswith(ibis.literal("foo")).name("tmp") diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/out.sql index 72a7fe461d8c..8114e54d3695 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_endswith/out.sql @@ -1,2 +1,3 @@ -SELECT t0.`foo_id` like concat('%', 'foo') AS `tmp` -FROM star1 t0 \ No newline at end of file +SELECT + SUFFIX(t0.foo_id, 'foo') AS tmp +FROM star1 AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_exists_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_exists_subquery/out.sql new file mode 100644 index 000000000000..89f8c66d24dc --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_exists_subquery/out.sql @@ -0,0 +1,13 @@ +SELECT + t0.key1, + t0.key2, + t0.value1 +FROM t1 AS t0 +WHERE + EXISTS( + SELECT + CAST(1 AS TINYINT) AS "1" + FROM t2 AS t1 + WHERE + t0.key1 = t1.key1 + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_inside_exists/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_inside_exists/out.sql index 601dc361ff08..bffa5a6720b0 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_inside_exists/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_inside_exists/out.sql @@ -1,12 +1,14 @@ -WITH t0 AS ( - SELECT t2.* - FROM purchases t2 - WHERE t2.`ts` > '2015-08-15' -) -SELECT t1.* -FROM events t1 -WHERE EXISTS ( - SELECT 1 - FROM t0 - WHERE t1.`user_id` = t0.`user_id` -) \ No newline at end of file +SELECT + t0.session_id, + t0.user_id, + t0.event_type, + t0.ts +FROM events AS t0 +WHERE + EXISTS( + SELECT + CAST(1 AS TINYINT) AS "1" + FROM purchases AS t1 + WHERE + t1.ts > '2015-08-15' AND t0.user_id = t1.user_id + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_predicates/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_predicates/out.sql index 3ca2292d594e..fb4bf6a1c3ff 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_predicates/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_predicates/out.sql @@ -1,5 +1,7 @@ -SELECT t0.* -FROM t t0 -WHERE (lower(t0.`color`) LIKE '%de%') AND - (locate('de', lower(t0.`color`)) - 1 >= 0) AND - (regexp_like(lower(t0.`color`), '.*ge.*')) \ No newline at end of file +SELECT + t0.color +FROM t AS t0 +WHERE + LOWER(t0.color) LIKE '%de%' + AND CONTAINS(LOWER(t0.color), 'de') + AND REGEXP_MATCHES(LOWER(t0.color), '.*ge.*', 's') \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_self_join_analysis_bug/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_self_join_analysis_bug/result.sql index 84266c91887a..2bdce97b5fa2 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_self_join_analysis_bug/result.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_self_join_analysis_bug/result.sql @@ -1,16 +1,40 @@ -WITH t0 AS ( - SELECT t2.`region`, t2.`kind`, sum(t2.`amount`) AS `total` - FROM purchases t2 - WHERE t2.`kind` = 'bar' - GROUP BY 1, 2 -), -t1 AS ( - SELECT t2.`region`, t2.`kind`, sum(t2.`amount`) AS `total` - FROM purchases t2 - WHERE t2.`kind` = 'foo' - GROUP BY 1, 2 -) -SELECT t1.`region`, t1.`total` - t0.`total` AS `diff` -FROM t1 - INNER JOIN t0 - ON t1.`region` = t0.`region` \ No newline at end of file +SELECT + t4.region, + t4.total - t5.total AS diff +FROM ( + SELECT + t1.region, + t1.kind, + t1.total + FROM ( + SELECT + t0.region, + t0.kind, + SUM(t0.amount) AS total + FROM purchases AS t0 + GROUP BY + 1, + 2 + ) AS t1 + WHERE + t1.kind = 'foo' +) AS t4 +INNER JOIN ( + SELECT + t1.region, + t1.kind, + t1.total + FROM ( + SELECT + t0.region, + t0.kind, + SUM(t0.amount) AS total + FROM purchases AS t0 + GROUP BY + 1, + 2 + ) AS t1 + WHERE + t1.kind = 'bar' +) AS t5 + ON t4.region = t5.region \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr3.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr3.sql index 5b8dd20d8a72..4f1205ba038e 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr3.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr3.sql @@ -1,7 +1,23 @@ -SELECT t0.* -FROM star1 t0 -WHERE t0.`f` > ln(( - SELECT avg(t0.`f`) AS `Mean(f)` - FROM star1 t0 - WHERE t0.`foo_id` = 'foo' -)) \ No newline at end of file +SELECT + t0.c, + t0.f, + t0.foo_id, + t0.bar_id +FROM star1 AS t0 +WHERE + t0.f > LN( + ( + SELECT + AVG(t1.f) AS "Mean(f)" + FROM ( + SELECT + t0.c, + t0.f, + t0.foo_id, + t0.bar_id + FROM star1 AS t0 + WHERE + t0.foo_id = 'foo' + ) AS t1 + ) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr4.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr4.sql index 74192a7c1507..640d7f8d09a4 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr4.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_filter_subquery_derived_reduction/expr4.sql @@ -1,7 +1,25 @@ -SELECT t0.* -FROM star1 t0 -WHERE t0.`f` > (ln(( - SELECT avg(t0.`f`) AS `Mean(f)` - FROM star1 t0 - WHERE t0.`foo_id` = 'foo' -)) + 1) \ No newline at end of file +SELECT + t0.c, + t0.f, + t0.foo_id, + t0.bar_id +FROM star1 AS t0 +WHERE + t0.f > ( + LN( + ( + SELECT + AVG(t1.f) AS "Mean(f)" + FROM ( + SELECT + t0.c, + t0.f, + t0.foo_id, + t0.bar_id + FROM star1 AS t0 + WHERE + t0.foo_id = 'foo' + ) AS t1 + ) + ) + CAST(1 AS TINYINT) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/decompiled.py index 0d2f6cdef48c..37d25bd80aba 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/decompiled.py @@ -4,11 +4,8 @@ tbl = ibis.table( name="tbl", schema={"foo": "int32", "bar": "int64", "value": "float64"} ) -alias = (tbl.foo + tbl.bar).name("baz") -proj = tbl.select([tbl, alias]) +f = tbl.filter(tbl.value > 0) -result = ( - tbl.select([tbl, alias]) - .filter(tbl.value > 0) - .select([proj, (proj.foo * 2).name("qux")]) +result = f.select( + f.foo, f.bar, f.value, (f.foo + f.bar).name("baz"), (f.foo * 2).name("qux") ) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project.sql index 74da42fa06bd..bc1d0cc45118 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project.sql @@ -1,5 +1,7 @@ -SELECT t0.*, t0.`foo` * 2 AS `qux` -FROM ( - SELECT t1.*, t1.`foo` + t1.`bar` AS `baz` - FROM tbl t1 -) t0 \ No newline at end of file +SELECT + t0.foo, + t0.bar, + t0.value, + t0.foo + t0.bar AS baz, + t0.foo * CAST(2 AS TINYINT) AS qux +FROM tbl AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project_filter.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project_filter.sql index 7324cd727f5f..4f8cda85cc5c 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project_filter.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_fuse_projections/project_filter.sql @@ -1,6 +1,9 @@ -SELECT *, `foo` * 2 AS `qux` -FROM ( - SELECT t1.*, t1.`foo` + t1.`bar` AS `baz` - FROM tbl t1 - WHERE t1.`value` > 0 -) t0 \ No newline at end of file +SELECT + t0.foo, + t0.bar, + t0.value, + t0.foo + t0.bar AS baz, + t0.foo * CAST(2 AS TINYINT) AS qux +FROM tbl AS t0 +WHERE + t0.value > CAST(0 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_identifier_quoting/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_identifier_quoting/out.sql index 31d90d8c221e..f768122da94c 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_identifier_quoting/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_identifier_quoting/out.sql @@ -1,2 +1,4 @@ -SELECT t0.`date` AS `else`, t0.`explain` AS `join` -FROM table t0 \ No newline at end of file +SELECT + t0.date AS else, + t0.explain AS join +FROM table AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown/result.sql index 674aa2375012..dc33ad4a62ff 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown/result.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown/result.sql @@ -1,6 +1,7 @@ -SELECT t0.* -FROM ( - SELECT t1.`x` + 1 AS `x` - FROM t t1 -) t0 -WHERE t0.`x` > 1 \ No newline at end of file +SELECT + t0.x + CAST(1 AS TINYINT) AS x +FROM t AS t0 +WHERE + ( + t0.x + CAST(1 AS TINYINT) + ) > CAST(1 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown_with_literal/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown_with_literal/result.sql index daf0620a4166..2fd5fe2ddc07 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown_with_literal/result.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_incorrect_predicate_pushdown_with_literal/result.sql @@ -1,6 +1,5 @@ -SELECT t0.* -FROM ( - SELECT 1 AS `a` - FROM t t1 -) t0 -WHERE t0.`a` > 1 \ No newline at end of file +SELECT + CAST(1 AS TINYINT) AS a +FROM t AS t0 +WHERE + CAST(1 AS TINYINT) > CAST(1 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/decompiled.py index d1d51461058d..ce3143f2e407 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/decompiled.py @@ -4,18 +4,33 @@ first = ibis.table( name="first", schema={"key1": "string", "key2": "string", "value1": "float64"} ) +second = ibis.table(name="second", schema={"key1": "string", "value2": "float64"}) third = ibis.table( name="third", schema={"key2": "string", "key3": "string", "value3": "float64"} ) -second = ibis.table(name="second", schema={"key1": "string", "value2": "float64"}) fourth = ibis.table(name="fourth", schema={"key3": "string", "value4": "float64"}) -proj = first.inner_join(second, first.key1 == second.key1).select( - [first, second.value2] -) -proj1 = third.inner_join(fourth, third.key3 == fourth.key3).select( - [third, fourth.value4] -) -result = proj.inner_join(proj1, proj.key2 == proj1.key2).select( - [proj, proj1.value3, proj1.value4] +result = ( + first.inner_join(second, first.key1 == second.key1) + .inner_join( + third.inner_join(fourth, third.key3 == fourth.key3).select( + third.key2, third.key3, third.value3, fourth.value4 + ), + first.key2 + == third.inner_join(fourth, third.key3 == fourth.key3) + .select(third.key2, third.key3, third.value3, fourth.value4) + .key2, + ) + .select( + first.key1, + first.key2, + first.value1, + second.value2, + third.inner_join(fourth, third.key3 == fourth.key3) + .select(third.key2, third.key3, third.value3, fourth.value4) + .value3, + third.inner_join(fourth, third.key3 == fourth.key3) + .select(third.key2, third.key3, third.value3, fourth.value4) + .value4, + ) ) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/out.sql index d7b970431a49..a66ce49c2bc5 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_between_joins/out.sql @@ -1,16 +1,21 @@ -WITH t0 AS ( - SELECT t2.*, t3.`value4` - FROM third t2 - INNER JOIN fourth t3 - ON t2.`key3` = t3.`key3` -), -t1 AS ( - SELECT t2.*, t3.`value2` - FROM first t2 - INNER JOIN second t3 - ON t2.`key1` = t3.`key1` -) -SELECT t1.*, t0.`value3`, t0.`value4` -FROM t1 - INNER JOIN t0 - ON t1.`key2` = t0.`key2` \ No newline at end of file +SELECT + t4.key1, + t4.key2, + t4.value1, + t5.value2, + t11.value3, + t11.value4 +FROM first AS t4 +INNER JOIN second AS t5 + ON t4.key1 = t5.key1 +INNER JOIN ( + SELECT + t6.key2, + t6.key3, + t6.value3, + t7.value4 + FROM third AS t6 + INNER JOIN fourth AS t7 + ON t6.key3 = t7.key3 +) AS t11 + ON t4.key2 = t11.key2 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_filtered_tables_no_pushdown/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_filtered_tables_no_pushdown/out.sql index 6b3adafa9878..879cd074a3ac 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_filtered_tables_no_pushdown/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_filtered_tables_no_pushdown/out.sql @@ -1,20 +1,28 @@ -WITH t0 AS ( - SELECT t2.* - FROM b t2 - WHERE (t2.`year` = 2016) AND - (t2.`month` = 2) AND - (t2.`day` = 29) -), -t1 AS ( - SELECT t2.* - FROM a t2 - WHERE (t2.`year` = 2016) AND - (t2.`month` = 2) AND - (t2.`day` = 29) -) -SELECT t1.`value_a`, t0.`value_b` -FROM t1 - LEFT OUTER JOIN t0 - ON (t1.`year` = t0.`year`) AND - (t1.`month` = t0.`month`) AND - (t1.`day` = t0.`day`) \ No newline at end of file +SELECT + t4.value_a, + t5.value_b +FROM ( + SELECT + t0.year, + t0.month, + t0.day, + t0.value_a + FROM a AS t0 + WHERE + t0.year = CAST(2016 AS SMALLINT) + AND t0.month = CAST(2 AS TINYINT) + AND t0.day = CAST(29 AS TINYINT) +) AS t4 +LEFT OUTER JOIN ( + SELECT + t1.year, + t1.month, + t1.day, + t1.value_b + FROM b AS t1 + WHERE + t1.year = CAST(2016 AS SMALLINT) + AND t1.month = CAST(2 AS TINYINT) + AND t1.day = CAST(29 AS TINYINT) +) AS t5 + ON t4.year = t5.year AND t4.month = t5.month AND t4.day = t5.day \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/decompiled.py index 071de55b252a..e2fefc7e6493 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/decompiled.py @@ -1,19 +1,6 @@ import ibis -tpch_customer = ibis.table( - name="tpch_customer", - schema={ - "c_custkey": "int64", - "c_name": "string", - "c_address": "string", - "c_nationkey": "int16", - "c_phone": "string", - "c_acctbal": "decimal", - "c_mktsegment": "string", - "c_comment": "string", - }, -) tpch_nation = ibis.table( name="tpch_nation", schema={ @@ -27,7 +14,40 @@ name="tpch_region", schema={"r_regionkey": "int16", "r_name": "string", "r_comment": "string"}, ) +tpch_customer = ibis.table( + name="tpch_customer", + schema={ + "c_custkey": "int64", + "c_name": "string", + "c_address": "string", + "c_nationkey": "int16", + "c_phone": "string", + "c_acctbal": "decimal", + "c_mktsegment": "string", + "c_comment": "string", + }, +) -result = tpch_nation.inner_join( - tpch_region, tpch_nation.n_regionkey == tpch_region.r_regionkey -).inner_join(tpch_customer, tpch_nation.n_nationkey == tpch_customer.c_nationkey) +result = ( + tpch_nation.inner_join( + tpch_region, tpch_nation.n_regionkey == tpch_region.r_regionkey + ) + .inner_join(tpch_customer, tpch_nation.n_nationkey == tpch_customer.c_nationkey) + .select( + tpch_nation.n_nationkey, + tpch_nation.n_name, + tpch_nation.n_regionkey, + tpch_nation.n_comment, + tpch_region.r_regionkey, + tpch_region.r_name, + tpch_region.r_comment, + tpch_customer.c_custkey, + tpch_customer.c_name, + tpch_customer.c_address, + tpch_customer.c_nationkey, + tpch_customer.c_phone, + tpch_customer.c_acctbal, + tpch_customer.c_mktsegment, + tpch_customer.c_comment, + ) +) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/out.sql index f21373ec71c4..28ef4cadbb22 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_just_materialized/out.sql @@ -1,6 +1,21 @@ -SELECT * -FROM tpch_nation t0 - INNER JOIN tpch_region t1 - ON t0.`n_regionkey` = t1.`r_regionkey` - INNER JOIN tpch_customer t2 - ON t0.`n_nationkey` = t2.`c_nationkey` \ No newline at end of file +SELECT + t3.n_nationkey, + t3.n_name, + t3.n_regionkey, + t3.n_comment, + t4.r_regionkey, + t4.r_name, + t4.r_comment, + t5.c_custkey, + t5.c_name, + t5.c_address, + t5.c_nationkey, + t5.c_phone, + t5.c_acctbal, + t5.c_mktsegment, + t5.c_comment +FROM tpch_nation AS t3 +INNER JOIN tpch_region AS t4 + ON t3.n_regionkey = t4.r_regionkey +INNER JOIN tpch_customer AS t5 + ON t3.n_nationkey = t5.c_nationkey \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_projection_subquery_bug/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_projection_subquery_bug/out.sql index b6eaf7d52b2b..ccaf14d42229 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_projection_subquery_bug/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_projection_subquery_bug/out.sql @@ -1,10 +1,17 @@ -WITH t0 AS ( - SELECT t2.`n_nationkey`, t2.`n_name` AS `nation`, t3.`r_name` AS `region` - FROM tpch_nation t2 - INNER JOIN tpch_region t3 - ON t2.`n_regionkey` = t3.`r_regionkey` -) -SELECT t1.*, t0.* -FROM t0 - INNER JOIN tpch_customer t1 - ON t0.`n_nationkey` = t1.`c_nationkey` \ No newline at end of file +SELECT + t5.c_custkey, + t5.c_name, + t5.c_address, + t5.c_nationkey, + t5.c_phone, + t5.c_acctbal, + t5.c_mktsegment, + t5.c_comment, + t3.n_nationkey, + t3.n_name AS nation, + t4.r_name AS region +FROM tpch_nation AS t3 +INNER JOIN tpch_region AS t4 + ON t3.n_regionkey = t4.r_regionkey +INNER JOIN tpch_customer AS t5 + ON t3.n_nationkey = t5.c_nationkey \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_conditional_aggregate/result.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_conditional_aggregate/result.sql new file mode 100644 index 000000000000..7b3d0ffe12ef --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_conditional_aggregate/result.sql @@ -0,0 +1,31 @@ +SELECT + t6.on, + t6.by, + t6.on_right, + t6.by_right, + t6.val +FROM ( + SELECT + t2.on, + t2.by, + t3.on AS on_right, + t3.by AS by_right, + t3.val + FROM left AS t2 + LEFT OUTER JOIN right AS t3 + ON t2.by = t3.by +) AS t6 +WHERE + t6.on_right = ( + SELECT + MAX(t4.on) AS "Max(on)" + FROM ( + SELECT + t1.on, + t1.by, + t1.val + FROM right AS t1 + WHERE + t1.by = t0.by AND t1.on <= t0.on + ) AS t4 + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_limited_table/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_limited_table/out.sql index b7b9d6c68d14..1ea81d25a312 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_limited_table/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_join_with_limited_table/out.sql @@ -1,9 +1,13 @@ -WITH t0 AS ( - SELECT t2.* - FROM star1 t2 +SELECT + t4.c, + t4.f, + t4.foo_id, + t4.bar_id +FROM ( + SELECT + * + FROM star1 AS t0 LIMIT 100 -) -SELECT t0.* -FROM t0 - INNER JOIN star2 t1 - ON t0.`foo_id` = t1.`foo_id` \ No newline at end of file +) AS t4 +INNER JOIN star2 AS t3 + ON t4.foo_id = t3.foo_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_cte_extract/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_cte_extract/out.sql index 6a0ea118caef..e32be6672764 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_cte_extract/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_cte_extract/out.sql @@ -1,8 +1,27 @@ -WITH t0 AS ( - SELECT t2.* - FROM functional_alltypes t2 +SELECT + t2.id, + t2.bool_col, + t2.tinyint_col, + t2.smallint_col, + t2.int_col, + t2.bigint_col, + t2.float_col, + t2.double_col, + t2.date_string_col, + t2.string_col, + t2.timestamp_col, + t2.year, + t2.month +FROM ( + SELECT + * + FROM functional_alltypes AS t0 LIMIT 100 -) -SELECT t0.* -FROM t0 - INNER JOIN t0 t1 \ No newline at end of file +) AS t2 +INNER JOIN ( + SELECT + * + FROM functional_alltypes AS t0 + LIMIT 100 +) AS t4 + ON TRUE \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/decompiled.py index 80cbfb72f010..334f4b168218 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/decompiled.py @@ -27,34 +27,32 @@ functional_alltypes.tinyint_col < selfreference.timestamp_col.minute(), ) .select( - [ - functional_alltypes.id, - functional_alltypes.bool_col, - functional_alltypes.tinyint_col, - functional_alltypes.smallint_col, - functional_alltypes.int_col, - functional_alltypes.bigint_col, - functional_alltypes.float_col, - functional_alltypes.double_col, - functional_alltypes.date_string_col, - functional_alltypes.string_col, - functional_alltypes.timestamp_col, - functional_alltypes.year, - functional_alltypes.month, - selfreference.id.name("id_right"), - selfreference.bool_col.name("bool_col_right"), - selfreference.tinyint_col.name("tinyint_col_right"), - selfreference.smallint_col.name("smallint_col_right"), - selfreference.int_col.name("int_col_right"), - selfreference.bigint_col.name("bigint_col_right"), - selfreference.float_col.name("float_col_right"), - selfreference.double_col.name("double_col_right"), - selfreference.date_string_col.name("date_string_col_right"), - selfreference.string_col.name("string_col_right"), - selfreference.timestamp_col.name("timestamp_col_right"), - selfreference.year.name("year_right"), - selfreference.month.name("month_right"), - ] + functional_alltypes.id, + functional_alltypes.bool_col, + functional_alltypes.tinyint_col, + functional_alltypes.smallint_col, + functional_alltypes.int_col, + functional_alltypes.bigint_col, + functional_alltypes.float_col, + functional_alltypes.double_col, + functional_alltypes.date_string_col, + functional_alltypes.string_col, + functional_alltypes.timestamp_col, + functional_alltypes.year, + functional_alltypes.month, + selfreference.id.name("id_right"), + selfreference.bool_col.name("bool_col_right"), + selfreference.tinyint_col.name("tinyint_col_right"), + selfreference.smallint_col.name("smallint_col_right"), + selfreference.int_col.name("int_col_right"), + selfreference.bigint_col.name("bigint_col_right"), + selfreference.float_col.name("float_col_right"), + selfreference.double_col.name("double_col_right"), + selfreference.date_string_col.name("date_string_col_right"), + selfreference.string_col.name("string_col_right"), + selfreference.timestamp_col.name("timestamp_col_right"), + selfreference.year.name("year_right"), + selfreference.month.name("month_right"), ) .count() ) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql index ed179f1e792d..26824f377a3e 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_limit_with_self_join/out.sql @@ -1,21 +1,34 @@ -SELECT count(1) AS `CountStar()` +SELECT + COUNT(*) AS "CountStar()" FROM ( - SELECT t1.`id`, t1.`bool_col`, t1.`tinyint_col`, t1.`smallint_col`, - t1.`int_col`, t1.`bigint_col`, t1.`float_col`, t1.`double_col`, - t1.`date_string_col`, t1.`string_col`, t1.`timestamp_col`, - t1.`year`, t1.`month`, t2.`id` AS `id_right`, - t2.`bool_col` AS `bool_col_right`, - t2.`tinyint_col` AS `tinyint_col_right`, - t2.`smallint_col` AS `smallint_col_right`, - t2.`int_col` AS `int_col_right`, - t2.`bigint_col` AS `bigint_col_right`, - t2.`float_col` AS `float_col_right`, - t2.`double_col` AS `double_col_right`, - t2.`date_string_col` AS `date_string_col_right`, - t2.`string_col` AS `string_col_right`, - t2.`timestamp_col` AS `timestamp_col_right`, - t2.`year` AS `year_right`, t2.`month` AS `month_right` - FROM functional_alltypes t1 - INNER JOIN functional_alltypes t2 - ON t1.`tinyint_col` < extract(t2.`timestamp_col`, 'minute') -) t0 \ No newline at end of file + SELECT + t1.id, + t1.bool_col, + t1.tinyint_col, + t1.smallint_col, + t1.int_col, + t1.bigint_col, + t1.float_col, + t1.double_col, + t1.date_string_col, + t1.string_col, + t1.timestamp_col, + t1.year, + t1.month, + t3.id AS id_right, + t3.bool_col AS bool_col_right, + t3.tinyint_col AS tinyint_col_right, + t3.smallint_col AS smallint_col_right, + t3.int_col AS int_col_right, + t3.bigint_col AS bigint_col_right, + t3.float_col AS float_col_right, + t3.double_col AS double_col_right, + t3.date_string_col AS date_string_col_right, + t3.string_col AS string_col_right, + t3.timestamp_col AS timestamp_col_right, + t3.year AS year_right, + t3.month AS month_right + FROM functional_alltypes AS t1 + INNER JOIN functional_alltypes AS t3 + ON t1.tinyint_col < EXTRACT('minute' FROM t3.timestamp_col) +) AS t5 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_loj_subquery_filter_handling/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_loj_subquery_filter_handling/out.sql index 66a29f8afa3f..fd5640b69685 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_loj_subquery_filter_handling/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_loj_subquery_filter_handling/out.sql @@ -1,16 +1,22 @@ -WITH t0 AS ( - SELECT t2.* - FROM bar t2 - WHERE t2.`id` < 3 -), -t1 AS ( - SELECT t2.* - FROM foo t2 - WHERE t2.`id` < 2 -) -SELECT t1.`id` AS `left_id`, t1.`desc` AS `left_desc`, t0.`id` AS `right_id`, - t0.`desc` AS `right_desc` -FROM t1 - LEFT OUTER JOIN t0 - ON (t1.`id` = t0.`id`) AND - (t1.`desc` = t0.`desc`) \ No newline at end of file +SELECT + t4.id AS left_id, + t4.desc AS left_desc, + t5.id AS right_id, + t5.desc AS right_desc +FROM ( + SELECT + t0.id, + t0.desc + FROM foo AS t0 + WHERE + t0.id < CAST(2 AS TINYINT) +) AS t4 +LEFT OUTER JOIN ( + SELECT + t1.id, + t1.desc + FROM bar AS t1 + WHERE + t1.id < CAST(3 AS TINYINT) +) AS t5 + ON t4.id = t5.id AND t4.desc = t5.desc \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/decompiled.py index da8275915ad4..00f53b84cbf4 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/decompiled.py @@ -5,24 +5,13 @@ name="star1", schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, ) -star3 = ibis.table(name="star3", schema={"bar_id": "string", "value2": "float64"}) star2 = ibis.table( name="star2", schema={"foo_id": "string", "value1": "float64", "value3": "float64"} ) +star3 = ibis.table(name="star3", schema={"bar_id": "string", "value2": "float64"}) result = ( star1.left_join(star2, star1.foo_id == star2.foo_id) - .select( - [ - star1.c, - star1.f, - star1.foo_id, - star1.bar_id, - star2.foo_id.name("foo_id_right"), - star2.value1, - star2.value3, - ] - ) .inner_join(star3, star1.bar_id == star3.bar_id) - .select([star1, star2.value1, star3.value2]) + .select(star1.c, star1.f, star1.foo_id, star1.bar_id, star2.value1, star3.value2) ) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/out.sql index 2e4276f4463e..4d414f398697 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_joins/out.sql @@ -1,10 +1,12 @@ -SELECT *, `value1`, t1.`value2` -FROM ( - SELECT t2.`c`, t2.`f`, t2.`foo_id`, t2.`bar_id`, - t3.`foo_id` AS `foo_id_right`, t3.`value1`, t3.`value3` - FROM star1 t2 - LEFT OUTER JOIN star2 t3 - ON t2.`foo_id` = t3.`foo_id` -) t0 - INNER JOIN star3 t1 - ON `bar_id` = t1.`bar_id` \ No newline at end of file +SELECT + t3.c, + t3.f, + t3.foo_id, + t3.bar_id, + t4.value1, + t5.value2 +FROM star1 AS t3 +LEFT OUTER JOIN star2 AS t4 + ON t3.foo_id = t4.foo_id +INNER JOIN star3 AS t5 + ON t3.bar_id = t5.bar_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/decompiled.py index 3d256c68c24d..a2704f369e8a 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/decompiled.py @@ -1,25 +1,23 @@ import ibis -result = ( - ibis.table( - name="functional_alltypes", - schema={ - "id": "int32", - "bool_col": "boolean", - "tinyint_col": "int8", - "smallint_col": "int16", - "int_col": "int32", - "bigint_col": "int64", - "float_col": "float32", - "double_col": "float64", - "date_string_col": "string", - "string_col": "string", - "timestamp_col": "timestamp", - "year": "int32", - "month": "int32", - }, - ) - .limit(20) - .limit(10) +functional_alltypes = ibis.table( + name="functional_alltypes", + schema={ + "id": "int32", + "bool_col": "boolean", + "tinyint_col": "int8", + "smallint_col": "int16", + "int_col": "int32", + "bigint_col": "int64", + "float_col": "float32", + "double_col": "float64", + "date_string_col": "string", + "string_col": "string", + "timestamp_col": "timestamp", + "year": "int32", + "month": "int32", + }, ) + +result = functional_alltypes.limit(20).limit(10) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/out.sql index b4f67ae8d56d..7f7d28627c2f 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_multiple_limits/out.sql @@ -1,7 +1,9 @@ -SELECT t0.* +SELECT + * FROM ( - SELECT t1.* - FROM functional_alltypes t1 + SELECT + * + FROM functional_alltypes AS t0 LIMIT 20 -) t0 +) AS t1 LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/decompiled.py index 1a8369bd146b..1675b491d5db 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/decompiled.py @@ -19,8 +19,8 @@ "month": "int32", }, ) -agg = functional_alltypes.group_by(functional_alltypes.string_col).aggregate( - functional_alltypes.count().name("nrows") +agg = functional_alltypes.aggregate( + [functional_alltypes.count().name("nrows")], by=[functional_alltypes.string_col] ) limit = agg.limit(5) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/out.sql index 687196f1f9b2..315083748ef8 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_order_by_on_limit_yield_subquery/out.sql @@ -1,8 +1,14 @@ -SELECT t0.* +SELECT + t2.string_col, + t2.nrows FROM ( - SELECT t1.`string_col`, count(1) AS `nrows` - FROM functional_alltypes t1 - GROUP BY 1 + SELECT + t0.string_col, + COUNT(*) AS nrows + FROM functional_alltypes AS t0 + GROUP BY + 1 LIMIT 5 -) t0 -ORDER BY t0.`string_col` ASC \ No newline at end of file +) AS t2 +ORDER BY + t2.string_col ASC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_projection_filter_fuse/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_projection_filter_fuse/out.sql index ad8215827d46..c93dbd9ab8dc 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_projection_filter_fuse/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_projection_filter_fuse/out.sql @@ -1,6 +1,7 @@ -SELECT t0.`a`, t0.`b`, t0.`c` -FROM ( - SELECT t1.* - FROM foo t1 - WHERE t1.`a` > 0 -) t0 \ No newline at end of file +SELECT + t0.a, + t0.b, + t0.c +FROM foo AS t0 +WHERE + t0.a > CAST(0 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_scalar_subquery_different_table/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_scalar_subquery_different_table/out.sql index c689bbdae2d2..fdc7cf00bcd2 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_scalar_subquery_different_table/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_scalar_subquery_different_table/out.sql @@ -1,6 +1,12 @@ -SELECT t0.* -FROM foo t0 -WHERE t0.`y` > ( - SELECT max(t1.`x`) AS `Max(x)` - FROM bar t1 -) \ No newline at end of file +SELECT + t0.job, + t0.dept_id, + t0.year, + t0.y +FROM foo AS t0 +WHERE + t0.y > ( + SELECT + MAX(t1.x) AS "Max(x)" + FROM bar AS t1 + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/decompiled.py index 8993f672fc63..824342590c0c 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/decompiled.py @@ -6,4 +6,4 @@ schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, ) -result = star1.group_by(star1.foo_id).aggregate(star1.f.sum().name("total")) +result = star1.aggregate([star1.f.sum().name("total")], by=[star1.foo_id]) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/out.sql index 5b190f3b2157..dbf4aadac203 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_explicit_column/out.sql @@ -1,3 +1,6 @@ -SELECT t0.`foo_id`, sum(t0.`f`) AS `total` -FROM star1 t0 -GROUP BY 1 \ No newline at end of file +SELECT + t0.foo_id, + SUM(t0.f) AS total +FROM star1 AS t0 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/decompiled.py index 96eb30f61071..da7840af5f74 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/decompiled.py @@ -6,6 +6,4 @@ schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, ) -result = star1.group_by([star1.foo_id, star1.bar_id]).aggregate( - star1.f.sum().name("total") -) +result = star1.aggregate([star1.f.sum().name("total")], by=[star1.foo_id, star1.bar_id]) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/out.sql index eea2be13c7a2..be9b430bd3d3 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/agg_string_columns/out.sql @@ -1,3 +1,8 @@ -SELECT t0.`foo_id`, t0.`bar_id`, sum(t0.`f`) AS `total` -FROM star1 t0 -GROUP BY 1, 2 \ No newline at end of file +SELECT + t0.foo_id, + t0.bar_id, + SUM(t0.f) AS total +FROM star1 AS t0 +GROUP BY + 1, + 2 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/decompiled.py index f811fdd901f7..948decc08bfc 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/decompiled.py @@ -1,7 +1,9 @@ import ibis -result = ibis.table( +star1 = ibis.table( name="star1", schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, -).count() +) + +result = star1.count() diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/out.sql index 978aa3734aed..a924af63f39d 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/aggregate_table_count_metric/out.sql @@ -1,2 +1,3 @@ -SELECT count(1) AS `CountStar(star1)` -FROM star1 t0 \ No newline at end of file +SELECT + COUNT(*) AS "CountStar()" +FROM star1 AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/out.sql index c281273cc2b1..31e87b57f3ea 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/filter_then_limit/out.sql @@ -1,4 +1,9 @@ -SELECT t0.* -FROM star1 t0 -WHERE t0.`f` > 0 +SELECT + t0.c, + t0.f, + t0.foo_id, + t0.bar_id +FROM star1 AS t0 +WHERE + t0.f > CAST(0 AS TINYINT) LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/decompiled.py index 736a1368dbe5..3b82e874ef59 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/decompiled.py @@ -1,7 +1,9 @@ import ibis -result = ibis.table( +star1 = ibis.table( name="star1", schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, -).limit(10) +) + +result = star1.limit(10) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/out.sql index 9423c5fcba1c..2b6d0fe52716 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_simple/out.sql @@ -1,3 +1,4 @@ -SELECT t0.* -FROM star1 t0 +SELECT + * +FROM star1 AS t0 LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/decompiled.py index 2767182490a8..f002a286656d 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/decompiled.py @@ -1,9 +1,10 @@ import ibis -limit = ibis.table( +star1 = ibis.table( name="star1", schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, -).limit(10) +) +limit = star1.limit(10) result = limit.filter(limit.f > 0) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/out.sql index 371e8e9ed0be..ab4dd6df7158 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_then_filter/out.sql @@ -1,7 +1,13 @@ -SELECT t0.* +SELECT + t1.c, + t1.f, + t1.foo_id, + t1.bar_id FROM ( - SELECT t1.* - FROM star1 t1 + SELECT + * + FROM star1 AS t0 LIMIT 10 -) t0 -WHERE t0.`f` > 0 \ No newline at end of file +) AS t1 +WHERE + t1.f > CAST(0 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/decompiled.py index 97d5358b784c..46ad8f50727d 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/decompiled.py @@ -1,7 +1,9 @@ import ibis -result = ibis.table( +star1 = ibis.table( name="star1", schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, -).limit(10, 5) +) + +result = star1.limit(10, 5) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/out.sql index 42a7a0310f16..3c71bda9b962 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/limit_with_offset/out.sql @@ -1,3 +1,5 @@ -SELECT t0.* -FROM star1 t0 -LIMIT 10 OFFSET 5 \ No newline at end of file +SELECT + * +FROM star1 AS t0 +LIMIT 10 +OFFSET 5 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/decompiled.py index 48689a1f4b93..676523775712 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/decompiled.py @@ -4,4 +4,4 @@ result = ibis.table( name="star1", schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, -).view() +) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/out.sql index c3a9a54ab86a..2e1820e62e9f 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/self_reference_simple/out.sql @@ -1,2 +1,3 @@ -SELECT t0.* -FROM star1 t0 \ No newline at end of file +SELECT + * +FROM star1 AS star1_ref \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/out.sql index 69691039e13d..bb666f269b2a 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/single_column/out.sql @@ -1,3 +1,8 @@ -SELECT t0.* -FROM star1 t0 -ORDER BY t0.`f` ASC \ No newline at end of file +SELECT + t0.c, + t0.f, + t0.foo_id, + t0.bar_id +FROM star1 AS t0 +ORDER BY + t0.f ASC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/test_physical_table_reference_translate/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/test_physical_table_reference_translate/out.sql index 69ab711dca56..f223e15ca36b 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/test_physical_table_reference_translate/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_select_sql/test_physical_table_reference_translate/out.sql @@ -1,2 +1,3 @@ -SELECT t0.* -FROM alltypes t0 \ No newline at end of file +SELECT + * +FROM alltypes \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql index a42ae0a70e7f..d262b49d64c1 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_self_join_subquery_distinct_equal/out.sql @@ -1,10 +1,20 @@ -WITH t0 AS ( - SELECT t2.*, t3.* - FROM tpch_region t2 - INNER JOIN tpch_nation t3 - ON t2.`r_regionkey` = t3.`n_regionkey` -) -SELECT t0.`r_name`, t1.`n_name` -FROM t0 - INNER JOIN t0 t1 - ON t0.`r_regionkey` = t1.`r_regionkey` \ No newline at end of file +SELECT + t7.r_name, + t7.n_name +FROM tpch_region AS t2 +INNER JOIN tpch_nation AS t3 + ON t2.r_regionkey = t3.n_regionkey +INNER JOIN ( + SELECT + t2.r_regionkey, + t2.r_name, + t2.r_comment, + t3.n_nationkey, + t3.n_name, + t3.n_regionkey, + t3.n_comment + FROM tpch_region AS t2 + INNER JOIN tpch_nation AS t3 + ON t2.r_regionkey = t3.n_regionkey +) AS t7 + ON t2.r_regionkey = t7.r_regionkey \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/decompiled.py index bd4953665f16..6e73ee23967b 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/decompiled.py @@ -9,4 +9,6 @@ name="star2", schema={"foo_id": "string", "value1": "float64", "value3": "float64"} ) -result = star1.semi_join(star2, star1.foo_id == star2.foo_id).select(star1) +result = star1.semi_join(star2, star1.foo_id == star2.foo_id).select( + star1.c, star1.f, star1.foo_id, star1.bar_id +) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/out.sql index dc3915e054a0..802ea0aad1c6 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_semi_join/out.sql @@ -1,4 +1,8 @@ -SELECT t0.* -FROM star1 t0 - LEFT SEMI JOIN star2 t1 - ON t0.`foo_id` = t1.`foo_id` \ No newline at end of file +SELECT + t2.c, + t2.f, + t2.foo_id, + t2.bar_id +FROM star1 AS t2 +SEMI JOIN star2 AS t3 + ON t2.foo_id = t3.foo_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/decompiled.py index 08cedadf59d9..527f8071b328 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/decompiled.py @@ -11,4 +11,4 @@ result = star1.inner_join( star2, [star1.foo_id == star2.foo_id, star1.bar_id == star2.foo_id] -).select(star1) +).select(star1.c, star1.f, star1.foo_id, star1.bar_id) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner.sql index a4895f7b057b..56f5488cdde3 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner.sql @@ -1,4 +1,8 @@ -SELECT t0.* -FROM star1 t0 - INNER JOIN star2 t1 - ON t0.`foo_id` = t1.`foo_id` \ No newline at end of file +SELECT + t2.c, + t2.f, + t2.foo_id, + t2.bar_id +FROM star1 AS t2 +INNER JOIN star2 AS t3 + ON t2.foo_id = t3.foo_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner_two_preds.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner_two_preds.sql index d1fa6db8b809..59916704c75d 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner_two_preds.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/inner_two_preds.sql @@ -1,5 +1,8 @@ -SELECT t0.* -FROM star1 t0 - INNER JOIN star2 t1 - ON (t0.`foo_id` = t1.`foo_id`) AND - (t0.`bar_id` = t1.`foo_id`) \ No newline at end of file +SELECT + t2.c, + t2.f, + t2.foo_id, + t2.bar_id +FROM star1 AS t2 +INNER JOIN star2 AS t3 + ON t2.foo_id = t3.foo_id AND t2.bar_id = t3.foo_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/left.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/left.sql index 390d8f0faf19..0b9cd8c00921 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/left.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/left.sql @@ -1,4 +1,8 @@ -SELECT t0.* -FROM star1 t0 - LEFT OUTER JOIN star2 t1 - ON t0.`foo_id` = t1.`foo_id` \ No newline at end of file +SELECT + t2.c, + t2.f, + t2.foo_id, + t2.bar_id +FROM star1 AS t2 +LEFT OUTER JOIN star2 AS t3 + ON t2.foo_id = t3.foo_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/outer.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/outer.sql index 8ca7b3cb80d0..91950bc952c5 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/outer.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_simple_joins/outer.sql @@ -1,4 +1,8 @@ -SELECT t0.* -FROM star1 t0 - FULL OUTER JOIN star2 t1 - ON t0.`foo_id` = t1.`foo_id` \ No newline at end of file +SELECT + t2.c, + t2.f, + t2.foo_id, + t2.bar_id +FROM star1 AS t2 +FULL OUTER JOIN star2 AS t3 + ON t2.foo_id = t3.foo_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result1.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result1.sql index 35e0d7a3c289..5877da115f7b 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result1.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result1.sql @@ -1,11 +1,12 @@ -WITH t0 AS ( - SELECT t2.* - FROM t t2 - ORDER BY t2.`a` ASC -) -SELECT t1.`b`, count(1) AS `b_count` +SELECT + t1.b, + COUNT(*) AS b_count FROM ( - SELECT t0.`b` - FROM t0 -) t1 -GROUP BY 1 \ No newline at end of file + SELECT + t0.b + FROM t AS t0 + ORDER BY + t0.a ASC +) AS t1 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result2.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result2.sql index 0fa7d5376fd9..b40324745e05 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result2.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_sort_then_group_by_propagates_keys/result2.sql @@ -1,11 +1,12 @@ -WITH t0 AS ( - SELECT t2.* - FROM t t2 - ORDER BY t2.`b` ASC -) -SELECT t1.`b`, count(1) AS `b_count` +SELECT + t1.b, + COUNT(*) AS b_count FROM ( - SELECT t0.`b` - FROM t0 -) t1 -GROUP BY 1 \ No newline at end of file + SELECT + t0.b + FROM t AS t0 + ORDER BY + t0.b ASC +) AS t1 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/decompiled.py index 2a3bfca845bd..18a6c4da1098 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/decompiled.py @@ -1,11 +1,9 @@ import ibis -result = ( - ibis.table( - name="star1", - schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, - ) - .foo_id.startswith(ibis.literal("foo")) - .name("tmp") +star1 = ibis.table( + name="star1", + schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, ) + +result = star1.foo_id.startswith(ibis.literal("foo")).name("tmp") diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/out.sql index 697e3cda4882..05528dd5f869 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_startswith/out.sql @@ -1,2 +1,3 @@ -SELECT t0.`foo_id` like concat('foo', '%') AS `tmp` -FROM star1 t0 \ No newline at end of file +SELECT + STARTS_WITH(t0.foo_id, 'foo') AS tmp +FROM star1 AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql index 3000f853cfbc..ab07ecf75d0e 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_factor_correlated_subquery/out.sql @@ -1,19 +1,76 @@ -WITH t0 AS ( - SELECT t3.*, t1.`r_name` AS `region`, t4.`o_totalprice` AS `amount`, - CAST(t4.`o_orderdate` AS timestamp) AS `odate` - FROM tpch_region t1 - INNER JOIN tpch_nation t2 - ON t1.`r_regionkey` = t2.`n_regionkey` - INNER JOIN tpch_customer t3 - ON t3.`c_nationkey` = t2.`n_nationkey` - INNER JOIN tpch_orders t4 - ON t4.`o_custkey` = t3.`c_custkey` -) -SELECT t0.* -FROM t0 -WHERE t0.`amount` > ( - SELECT avg(t1.`amount`) AS `Mean(amount)` - FROM t0 t1 - WHERE t1.`region` = t0.`region` -) +SELECT + t11.c_custkey, + t11.c_name, + t11.c_address, + t11.c_nationkey, + t11.c_phone, + t11.c_acctbal, + t11.c_mktsegment, + t11.c_comment, + t11.region, + t11.amount, + t11.odate +FROM ( + SELECT + t6.c_custkey, + t6.c_name, + t6.c_address, + t6.c_nationkey, + t6.c_phone, + t6.c_acctbal, + t6.c_mktsegment, + t6.c_comment, + t4.r_name AS region, + t7.o_totalprice AS amount, + CAST(t7.o_orderdate AS TIMESTAMP) AS odate + FROM tpch_region AS t4 + INNER JOIN tpch_nation AS t5 + ON t4.r_regionkey = t5.n_regionkey + INNER JOIN tpch_customer AS t6 + ON t6.c_nationkey = t5.n_nationkey + INNER JOIN tpch_orders AS t7 + ON t7.o_custkey = t6.c_custkey +) AS t11 +WHERE + t11.amount > ( + SELECT + AVG(t13.amount) AS "Mean(amount)" + FROM ( + SELECT + t12.c_custkey, + t12.c_name, + t12.c_address, + t12.c_nationkey, + t12.c_phone, + t12.c_acctbal, + t12.c_mktsegment, + t12.c_comment, + t12.region, + t12.amount, + t12.odate + FROM ( + SELECT + t6.c_custkey, + t6.c_name, + t6.c_address, + t6.c_nationkey, + t6.c_phone, + t6.c_acctbal, + t6.c_mktsegment, + t6.c_comment, + t4.r_name AS region, + t7.o_totalprice AS amount, + CAST(t7.o_orderdate AS TIMESTAMP) AS odate + FROM tpch_region AS t4 + INNER JOIN tpch_nation AS t5 + ON t4.r_regionkey = t5.n_regionkey + INNER JOIN tpch_customer AS t6 + ON t6.c_nationkey = t5.n_nationkey + INNER JOIN tpch_orders AS t7 + ON t7.o_custkey = t6.c_custkey + ) AS t12 + WHERE + t12.region = t12.region + ) AS t13 + ) LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr.sql index cac23b33d30f..404caac50463 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr.sql @@ -1,6 +1,12 @@ -SELECT t0.* -FROM star1 t0 -WHERE t0.`f` > ( - SELECT avg(t0.`f`) AS `Mean(f)` - FROM star1 t0 -) \ No newline at end of file +SELECT + t0.c, + t0.f, + t0.foo_id, + t0.bar_id +FROM star1 AS t0 +WHERE + t0.f > ( + SELECT + AVG(t0.f) AS "Mean(f)" + FROM star1 AS t0 + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr2.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr2.sql index 4a2a9856ac67..fdbdef535ce4 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr2.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_filter_predicate/expr2.sql @@ -1,7 +1,21 @@ -SELECT t0.* -FROM star1 t0 -WHERE t0.`f` > ( - SELECT avg(t0.`f`) AS `Mean(f)` - FROM star1 t0 - WHERE t0.`foo_id` = 'foo' -) \ No newline at end of file +SELECT + t0.c, + t0.f, + t0.foo_id, + t0.bar_id +FROM star1 AS t0 +WHERE + t0.f > ( + SELECT + AVG(t1.f) AS "Mean(f)" + FROM ( + SELECT + t0.c, + t0.f, + t0.foo_id, + t0.bar_id + FROM star1 AS t0 + WHERE + t0.foo_id = 'foo' + ) AS t1 + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/decompiled.py index 9a11e7e90c51..a2a8f64d4fd2 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/decompiled.py @@ -17,11 +17,11 @@ "k": "time", }, ) -agg = alltypes.group_by([alltypes.a, alltypes.g]).aggregate( - alltypes.f.sum().name("metric") -) +agg = alltypes.aggregate([alltypes.f.sum().name("metric")], by=[alltypes.a, alltypes.g]) selfreference = agg.view() -proj = agg.inner_join(selfreference, agg.g == selfreference.g).select(agg) -union = proj.union(proj.view()) +joinchain = agg.inner_join(selfreference, agg.g == selfreference.g).select( + agg.a, agg.g, agg.metric +) +selfreference1 = joinchain.view() -result = union.select([union.a, union.g, union.metric]) +result = joinchain.union(selfreference1) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql index fba1d583e681..57e0912af4a6 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_in_union/out.sql @@ -1,32 +1,57 @@ -WITH t0 AS ( - SELECT t3.`a`, t3.`g`, sum(t3.`f`) AS `metric` - FROM alltypes t3 - GROUP BY 1, 2 -), -t1 AS ( - SELECT t0.* - FROM t0 - INNER JOIN t0 t3 - ON t0.`g` = t3.`g` -) -SELECT t2.`a`, t2.`g`, t2.`metric` +SELECT + t8.a, + t8.g, + t8.metric FROM ( - WITH t0 AS ( - SELECT t3.`a`, t3.`g`, sum(t3.`f`) AS `metric` - FROM alltypes t3 - GROUP BY 1, 2 - ), - t1 AS ( - SELECT t0.* - FROM t0 - INNER JOIN t0 t3 - ON t0.`g` = t3.`g` - ) - SELECT * - FROM t1 + SELECT + t2.a, + t2.g, + t2.metric + FROM ( + SELECT + t0.a, + t0.g, + SUM(t0.f) AS metric + FROM alltypes AS t0 + GROUP BY + 1, + 2 + ) AS t2 + INNER JOIN ( + SELECT + t0.a, + t0.g, + SUM(t0.f) AS metric + FROM alltypes AS t0 + GROUP BY + 1, + 2 + ) AS t4 + ON t2.g = t4.g UNION ALL - SELECT t0.* - FROM t0 - INNER JOIN t0 t3 - ON t0.`g` = t3.`g` -) t2 \ No newline at end of file + SELECT + t2.a, + t2.g, + t2.metric + FROM ( + SELECT + t0.a, + t0.g, + SUM(t0.f) AS metric + FROM alltypes AS t0 + GROUP BY + 1, + 2 + ) AS t2 + INNER JOIN ( + SELECT + t0.a, + t0.g, + SUM(t0.f) AS metric + FROM alltypes AS t0 + GROUP BY + 1, + 2 + ) AS t4 + ON t2.g = t4.g +) AS t8 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql index a59687794a8a..57edded67515 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_subquery_used_for_self_join/out.sql @@ -1,15 +1,41 @@ -WITH t0 AS ( - SELECT t2.`g`, t2.`a`, t2.`b`, sum(t2.`f`) AS `total` - FROM alltypes t2 - GROUP BY 1, 2, 3 -) -SELECT t0.`g`, max(t0.`total` - `total`) AS `metric` +SELECT + t6.g, + MAX(t6.total - t6.total_right) AS metric FROM ( - SELECT t0.`g`, t0.`a`, t0.`b`, t0.`total`, t2.`g` AS `g_right`, - t2.`a` AS `a_right`, t2.`b` AS `b_right`, - t2.`total` AS `total_right` - FROM t0 - INNER JOIN t0 t2 - ON t0.`a` = t2.`b` -) t1 -GROUP BY 1 \ No newline at end of file + SELECT + t2.g, + t2.a, + t2.b, + t2.total, + t4.g AS g_right, + t4.a AS a_right, + t4.b AS b_right, + t4.total AS total_right + FROM ( + SELECT + t0.g, + t0.a, + t0.b, + SUM(t0.f) AS total + FROM alltypes AS t0 + GROUP BY + 1, + 2, + 3 + ) AS t2 + INNER JOIN ( + SELECT + t0.g, + t0.a, + t0.b, + SUM(t0.f) AS total + FROM alltypes AS t0 + GROUP BY + 1, + 2, + 3 + ) AS t4 + ON t2.a = t4.b +) AS t6 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql index c3d8fc4e9d9f..5145b5c7361f 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_analysis_bug/out.sql @@ -1,22 +1,45 @@ -WITH t0 AS ( - SELECT t3.`dest`, avg(t3.`arrdelay`) AS `Mean(arrdelay)` - FROM airlines t3 - WHERE t3.`dest` IN ('ORD', 'JFK', 'SFO') - GROUP BY 1 -), -t1 AS ( - SELECT t0.* - FROM t0 - ORDER BY t0.`Mean(arrdelay)` DESC - LIMIT 10 -), -t2 AS ( - SELECT t3.* - FROM airlines t3 - WHERE t3.`dest` IN ('ORD', 'JFK', 'SFO') -) -SELECT `origin`, count(1) AS `CountStar()` -FROM t2 - LEFT SEMI JOIN t1 - ON t2.`dest` = t1.`dest` -GROUP BY 1 \ No newline at end of file +SELECT + t8.origin, + COUNT(*) AS "CountStar()" +FROM ( + SELECT + t2.dest, + t2.origin, + t2.arrdelay + FROM ( + SELECT + t0.dest, + t0.origin, + t0.arrdelay + FROM airlines AS t0 + WHERE + t0.dest IN ('ORD', 'JFK', 'SFO') + ) AS t2 + SEMI JOIN ( + SELECT + t3.dest, + t3."Mean(arrdelay)" + FROM ( + SELECT + t1.dest, + AVG(t1.arrdelay) AS "Mean(arrdelay)" + FROM ( + SELECT + t0.dest, + t0.origin, + t0.arrdelay + FROM airlines AS t0 + WHERE + t0.dest IN ('ORD', 'JFK', 'SFO') + ) AS t1 + GROUP BY + 1 + ) AS t3 + ORDER BY + t3."Mean(arrdelay)" DESC + LIMIT 10 + ) AS t6 + ON t2.dest = t6.dest +) AS t8 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e1.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e1.sql index 391b17edcdf4..6d86baa0190c 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e1.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e1.sql @@ -1,15 +1,24 @@ -WITH t0 AS ( - SELECT t2.`city`, avg(t2.`v2`) AS `Mean(v2)` - FROM tbl t2 - GROUP BY 1 -), -t1 AS ( - SELECT t0.* - FROM t0 - ORDER BY t0.`Mean(v2)` DESC +SELECT + t1.foo, + t1.bar, + t1.city, + t1.v1, + t1.v2 +FROM tbl AS t1 +SEMI JOIN ( + SELECT + t2.city, + t2."Mean(v2)" + FROM ( + SELECT + t0.city, + AVG(t0.v2) AS "Mean(v2)" + FROM tbl AS t0 + GROUP BY + 1 + ) AS t2 + ORDER BY + t2."Mean(v2)" DESC LIMIT 10 -) -SELECT * -FROM tbl t2 - LEFT SEMI JOIN t1 - ON t2.`city` = t1.`city` \ No newline at end of file +) AS t5 + ON t1.city = t5.city \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e2.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e2.sql index e5e3d95f33bb..53e239ad98c8 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e2.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_operation/e2.sql @@ -1,15 +1,24 @@ -WITH t0 AS ( - SELECT t2.`city`, count(t2.`city`) AS `Count(city)` - FROM tbl t2 - GROUP BY 1 -), -t1 AS ( - SELECT t0.* - FROM t0 - ORDER BY t0.`Count(city)` DESC +SELECT + t1.foo, + t1.bar, + t1.city, + t1.v1, + t1.v2 +FROM tbl AS t1 +SEMI JOIN ( + SELECT + t2.city, + t2."Count(city)" + FROM ( + SELECT + t0.city, + COUNT(t0.city) AS "Count(city)" + FROM tbl AS t0 + GROUP BY + 1 + ) AS t2 + ORDER BY + t2."Count(city)" DESC LIMIT 10 -) -SELECT * -FROM tbl t2 - LEFT SEMI JOIN t1 - ON t2.`city` = t1.`city` \ No newline at end of file +) AS t5 + ON t1.city = t5.city \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_predicate_pushdown_bug/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_predicate_pushdown_bug/out.sql index 02391a855345..9dffcedd667b 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_predicate_pushdown_bug/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_predicate_pushdown_bug/out.sql @@ -1,23 +1,50 @@ -WITH t0 AS ( - SELECT t3.*, t4.`n_name`, t5.`r_name` - FROM tpch_customer t3 - INNER JOIN tpch_nation t4 - ON t3.`c_nationkey` = t4.`n_nationkey` - INNER JOIN tpch_region t5 - ON t4.`n_regionkey` = t5.`r_regionkey` -), -t1 AS ( - SELECT t0.`n_name`, sum(t0.`c_acctbal`) AS `Sum(c_acctbal)` - FROM t0 - GROUP BY 1 -), -t2 AS ( - SELECT t1.* - FROM t1 - ORDER BY t1.`Sum(c_acctbal)` DESC +SELECT + t3.c_custkey, + t3.c_name, + t3.c_address, + t3.c_nationkey, + t3.c_phone, + t3.c_acctbal, + t3.c_mktsegment, + t3.c_comment, + t4.n_name, + t5.r_name +FROM tpch_customer AS t3 +INNER JOIN tpch_nation AS t4 + ON t3.c_nationkey = t4.n_nationkey +INNER JOIN tpch_region AS t5 + ON t4.n_regionkey = t5.r_regionkey +SEMI JOIN ( + SELECT + t9.n_name, + t9."Sum(c_acctbal)" + FROM ( + SELECT + t8.n_name, + SUM(t8.c_acctbal) AS "Sum(c_acctbal)" + FROM ( + SELECT + t3.c_custkey, + t3.c_name, + t3.c_address, + t3.c_nationkey, + t3.c_phone, + t3.c_acctbal, + t3.c_mktsegment, + t3.c_comment, + t4.n_name, + t5.r_name + FROM tpch_customer AS t3 + INNER JOIN tpch_nation AS t4 + ON t3.c_nationkey = t4.n_nationkey + INNER JOIN tpch_region AS t5 + ON t4.n_regionkey = t5.r_regionkey + ) AS t8 + GROUP BY + 1 + ) AS t9 + ORDER BY + t9."Sum(c_acctbal)" DESC LIMIT 10 -) -SELECT * -FROM t0 - LEFT SEMI JOIN t2 - ON t0.`n_name` = t2.`n_name` \ No newline at end of file +) AS t12 + ON t4.n_name = t12.n_name \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_to_aggregate/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_to_aggregate/out.sql index 23a14c5a7697..fe46767993fe 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_to_aggregate/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_topk_to_aggregate/out.sql @@ -1,8 +1,14 @@ -SELECT t0.* +SELECT + t1.dest, + t1."Mean(arrdelay)" FROM ( - SELECT t1.`dest`, avg(t1.`arrdelay`) AS `Mean(arrdelay)` - FROM airlines t1 - GROUP BY 1 -) t0 -ORDER BY t0.`Mean(arrdelay)` DESC + SELECT + t0.dest, + AVG(t0.arrdelay) AS "Mean(arrdelay)" + FROM airlines AS t0 + GROUP BY + 1 +) AS t1 +ORDER BY + t1."Mean(arrdelay)" DESC LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql index ad04485d51f8..aa75e2be0ae1 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_tpch_self_join_failure/out.sql @@ -1,22 +1,53 @@ -WITH t0 AS ( - SELECT t3.`r_name` AS `region`, t4.`n_name` AS `nation`, - t6.`o_totalprice` AS `amount`, - CAST(t6.`o_orderdate` AS timestamp) AS `odate` - FROM tpch_region t3 - INNER JOIN tpch_nation t4 - ON t3.`r_regionkey` = t4.`n_regionkey` - INNER JOIN tpch_customer t5 - ON t5.`c_nationkey` = t4.`n_nationkey` - INNER JOIN tpch_orders t6 - ON t6.`o_custkey` = t5.`c_custkey` -), -t1 AS ( - SELECT t0.`region`, extract(t0.`odate`, 'year') AS `year`, - CAST(sum(t0.`amount`) AS double) AS `total` - FROM t0 - GROUP BY 1, 2 -) -SELECT t1.`region`, t1.`year`, t1.`total` - t2.`total` AS `yoy_change` -FROM t1 - INNER JOIN t1 t2 - ON t1.`year` = (t2.`year` - 1) \ No newline at end of file +SELECT + t13.region, + t13.year, + t13.total - t15.total AS yoy_change +FROM ( + SELECT + t11.region, + EXTRACT('year' FROM t11.odate) AS year, + CAST(SUM(t11.amount) AS DOUBLE) AS total + FROM ( + SELECT + t4.r_name AS region, + t5.n_name AS nation, + t7.o_totalprice AS amount, + CAST(t7.o_orderdate AS TIMESTAMP) AS odate + FROM tpch_region AS t4 + INNER JOIN tpch_nation AS t5 + ON t4.r_regionkey = t5.n_regionkey + INNER JOIN tpch_customer AS t6 + ON t6.c_nationkey = t5.n_nationkey + INNER JOIN tpch_orders AS t7 + ON t7.o_custkey = t6.c_custkey + ) AS t11 + GROUP BY + 1, + 2 +) AS t13 +INNER JOIN ( + SELECT + t11.region, + EXTRACT('year' FROM t11.odate) AS year, + CAST(SUM(t11.amount) AS DOUBLE) AS total + FROM ( + SELECT + t4.r_name AS region, + t5.n_name AS nation, + t7.o_totalprice AS amount, + CAST(t7.o_orderdate AS TIMESTAMP) AS odate + FROM tpch_region AS t4 + INNER JOIN tpch_nation AS t5 + ON t4.r_regionkey = t5.n_regionkey + INNER JOIN tpch_customer AS t6 + ON t6.c_nationkey = t5.n_nationkey + INNER JOIN tpch_orders AS t7 + ON t7.o_custkey = t6.c_custkey + ) AS t11 + GROUP BY + 1, + 2 +) AS t15 + ON t13.year = ( + t15.year - CAST(1 AS TINYINT) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/decompiled.py index d151e7855fb8..13beec098051 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/decompiled.py @@ -21,9 +21,7 @@ ) result = functional_alltypes.filter( - [ - functional_alltypes.timestamp_col - < (ibis.timestamp("2010-01-01 00:00:00") + ibis.interval(3)), - functional_alltypes.timestamp_col < (ibis.now() + ibis.interval(10)), - ] + functional_alltypes.timestamp_col + < (ibis.timestamp("2010-01-01 00:00:00") + ibis.interval(3)), + functional_alltypes.timestamp_col < (ibis.now() + ibis.interval(10)), ).count() diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/out.sql index e950adc79c04..102f8d7fc152 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_analyze_scalar_op/out.sql @@ -1,4 +1,26 @@ -SELECT count(1) AS `CountStar()` -FROM functional_alltypes t0 -WHERE (t0.`timestamp_col` < date_add(cast('2010-01-01T00:00:00' as timestamp), INTERVAL 3 MONTH)) AND - (t0.`timestamp_col` < date_add(cast(now() as timestamp), INTERVAL 10 DAY)) \ No newline at end of file +SELECT + COUNT(*) AS "CountStar()" +FROM ( + SELECT + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month + FROM functional_alltypes AS t0 + WHERE + t0.timestamp_col < ( + MAKE_TIMESTAMP(2010, 1, 1, 0, 0, 0.0) + INTERVAL '3' MONTH + ) + AND t0.timestamp_col < ( + CAST(CURRENT_TIMESTAMP AS TIMESTAMP) + INTERVAL '10' DAY + ) +) AS t1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/decompiled.py index c4b5fefc74bd..434705fa8742 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/decompiled.py @@ -8,8 +8,8 @@ star2 = ibis.table( name="star2", schema={"foo_id": "string", "value1": "float64", "value3": "float64"} ) -proj = star1.inner_join(star2, star1.foo_id == star2.foo_id).select( - [star1, (star1.f - star2.value1).name("diff")] +joinchain = star1.inner_join(star2, star1.foo_id == star2.foo_id).select( + star1.c, star1.f, star1.foo_id, star1.bar_id, (star1.f - star2.value1).name("diff") ) -result = proj.filter(proj.diff > 1) +result = joinchain.filter(joinchain.diff > 1) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/out.sql index 8c9f86b9b8fe..98fedc20876d 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_no_pushdown_possible/out.sql @@ -1,8 +1,19 @@ -SELECT t0.* +SELECT + t5.c, + t5.f, + t5.foo_id, + t5.bar_id, + t5.diff FROM ( - SELECT t1.*, t1.`f` - t2.`value1` AS `diff` - FROM star1 t1 - INNER JOIN star2 t2 - ON t1.`foo_id` = t2.`foo_id` -) t0 -WHERE t0.`diff` > 1 \ No newline at end of file + SELECT + t2.c, + t2.f, + t2.foo_id, + t2.bar_id, + t2.f - t3.value1 AS diff + FROM star1 AS t2 + INNER JOIN star2 AS t3 + ON t2.foo_id = t3.foo_id +) AS t5 +WHERE + t5.diff > CAST(1 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/decompiled.py index 8da00788bce6..adb225e89119 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/decompiled.py @@ -19,4 +19,4 @@ ) lit = ibis.literal(0) -result = alltypes.filter([alltypes.a > lit, alltypes.f.between(lit, ibis.literal(1))]) +result = alltypes.filter(alltypes.a > lit, alltypes.f.between(lit, ibis.literal(1))) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/out.sql index db518b690e66..34036b117531 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_between/out.sql @@ -1,4 +1,15 @@ -SELECT t0.* -FROM alltypes t0 -WHERE (t0.`a` > 0) AND - (t0.`f` BETWEEN 0 AND 1) \ No newline at end of file +SELECT + t0.a, + t0.b, + t0.c, + t0.d, + t0.e, + t0.f, + t0.g, + t0.h, + t0.i, + t0.j, + t0.k +FROM alltypes AS t0 +WHERE + t0.a > CAST(0 AS TINYINT) AND t0.f BETWEEN CAST(0 AS TINYINT) AND CAST(1 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/decompiled.py b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/decompiled.py index 53f9b4068aad..45ea3c0684a0 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/decompiled.py +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/decompiled.py @@ -8,9 +8,8 @@ star2 = ibis.table( name="star2", schema={"foo_id": "string", "value1": "float64", "value3": "float64"} ) - -result = ( - star1.inner_join(star2, star1.foo_id == star2.foo_id) - .select([star1, star2.value1, star2.value3]) - .filter([star1.f > 0, star2.value3 < 1000]) +joinchain = star1.inner_join(star2, star1.foo_id == star2.foo_id).select( + star1.c, star1.f, star1.foo_id, star1.bar_id, star2.value1, star2.value3 ) + +result = joinchain.filter(joinchain.f > 0, joinchain.value3 < 1000) diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/out.sql index 8ecd49adabc9..3f187d69a7fc 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_where_with_join/out.sql @@ -1,6 +1,21 @@ -SELECT t0.*, t1.`value1`, t1.`value3` -FROM star1 t0 - INNER JOIN star2 t1 - ON t0.`foo_id` = t1.`foo_id` -WHERE (t0.`f` > 0) AND - (t1.`value3` < 1000) \ No newline at end of file +SELECT + t5.c, + t5.f, + t5.foo_id, + t5.bar_id, + t5.value1, + t5.value3 +FROM ( + SELECT + t2.c, + t2.f, + t2.foo_id, + t2.bar_id, + t3.value1, + t3.value3 + FROM star1 AS t2 + INNER JOIN star2 AS t3 + ON t2.foo_id = t3.foo_id +) AS t5 +WHERE + t5.f > CAST(0 AS TINYINT) AND t5.value3 < CAST(1000 AS SMALLINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_count/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_count/out.sql new file mode 100644 index 000000000000..35e4fe0adc24 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_count/out.sql @@ -0,0 +1,14 @@ +SELECT + t1.foo_id, + t1.total +FROM ( + SELECT + t0.foo_id, + SUM(t0.f) AS total, + COUNT(*) AS "CountStar()" + FROM star1 AS t0 + GROUP BY + 1 +) AS t1 +WHERE + t1."CountStar()" > CAST(100 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_sum/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_sum/out.sql new file mode 100644 index 000000000000..47945167c00a --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/having_sum/out.sql @@ -0,0 +1,13 @@ +SELECT + t1.foo_id, + t1.total +FROM ( + SELECT + t0.foo_id, + SUM(t0.f) AS total + FROM star1 AS t0 + GROUP BY + 1 +) AS t1 +WHERE + t1.total > CAST(10 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/single/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/single/out.sql new file mode 100644 index 000000000000..dbf4aadac203 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/single/out.sql @@ -0,0 +1,6 @@ +SELECT + t0.foo_id, + SUM(t0.f) AS total +FROM star1 AS t0 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/two/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/two/out.sql new file mode 100644 index 000000000000..be9b430bd3d3 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_aggregate/two/out.sql @@ -0,0 +1,8 @@ +SELECT + t0.foo_id, + t0.bar_id, + SUM(t0.f) AS total +FROM star1 AS t0 +GROUP BY + 1, + 2 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_between/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_between/out.sql new file mode 100644 index 000000000000..c4f686443cee --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_between/out.sql @@ -0,0 +1,3 @@ +SELECT + t0.double_col BETWEEN CAST(5 AS TINYINT) AND CAST(10 AS TINYINT) AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/and/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/and/out.sql new file mode 100644 index 000000000000..d2e722d4fa18 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/and/out.sql @@ -0,0 +1,7 @@ +SELECT + ( + t0.double_col > CAST(0 AS TINYINT) + ) AND ( + t0.double_col < CAST(5 AS TINYINT) + ) AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/or/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/or/out.sql new file mode 100644 index 000000000000..4b0542464299 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_boolean_conjunction/or/out.sql @@ -0,0 +1,7 @@ +SELECT + ( + t0.double_col < CAST(0 AS TINYINT) + ) OR ( + t0.double_col > CAST(5 AS TINYINT) + ) AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_coalesce/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_coalesce/out.sql new file mode 100644 index 000000000000..df5a9329fcb4 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_coalesce/out.sql @@ -0,0 +1,7 @@ +SELECT + COALESCE( + CASE WHEN t0.double_col > CAST(30 AS TINYINT) THEN t0.double_col ELSE NULL END, + NULL, + t0.float_col + ) AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/eq/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/eq/out.sql new file mode 100644 index 000000000000..2ad44306e1cd --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/eq/out.sql @@ -0,0 +1,3 @@ +SELECT + t0.double_col = CAST(5 AS TINYINT) AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ge/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ge/out.sql new file mode 100644 index 000000000000..8b722a819754 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ge/out.sql @@ -0,0 +1,3 @@ +SELECT + t0.double_col >= CAST(5 AS TINYINT) AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/gt/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/gt/out.sql new file mode 100644 index 000000000000..ca8c8d134d60 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/gt/out.sql @@ -0,0 +1,3 @@ +SELECT + t0.double_col > CAST(5 AS TINYINT) AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/le/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/le/out.sql new file mode 100644 index 000000000000..53bf7d0d2dbb --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/le/out.sql @@ -0,0 +1,3 @@ +SELECT + t0.double_col <= CAST(5 AS TINYINT) AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/lt/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/lt/out.sql new file mode 100644 index 000000000000..627be1840789 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/lt/out.sql @@ -0,0 +1,3 @@ +SELECT + t0.double_col < CAST(5 AS TINYINT) AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ne/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ne/out.sql new file mode 100644 index 000000000000..685a418a8eb4 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_comparisons/ne/out.sql @@ -0,0 +1,3 @@ +SELECT + t0.double_col <> CAST(5 AS TINYINT) AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_cte_factor_distinct_but_equal/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_cte_factor_distinct_but_equal/out.sql new file mode 100644 index 000000000000..1aa27939686a --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_cte_factor_distinct_but_equal/out.sql @@ -0,0 +1,20 @@ +SELECT + t3.g, + t3.metric +FROM ( + SELECT + t0.g, + SUM(t0.f) AS metric + FROM alltypes AS t0 + GROUP BY + 1 +) AS t3 +INNER JOIN ( + SELECT + t1.g, + SUM(t1.f) AS metric + FROM alltypes AS t1 + GROUP BY + 1 +) AS t6 + ON t3.g = t6.g \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/count_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/count_distinct/out.sql new file mode 100644 index 000000000000..37382bcf5149 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/count_distinct/out.sql @@ -0,0 +1,3 @@ +SELECT + COUNT(DISTINCT t0.int_col) AS nunique +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/group_by_count_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/group_by_count_distinct/out.sql new file mode 100644 index 000000000000..14b6c6d83477 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/group_by_count_distinct/out.sql @@ -0,0 +1,6 @@ +SELECT + t0.string_col, + COUNT(DISTINCT t0.int_col) AS nunique +FROM functional_alltypes AS t0 +GROUP BY + 1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/projection_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/projection_distinct/out.sql new file mode 100644 index 000000000000..483b4fef6f49 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/projection_distinct/out.sql @@ -0,0 +1,8 @@ +SELECT DISTINCT + * +FROM ( + SELECT + t0.string_col, + t0.int_col + FROM functional_alltypes AS t0 +) AS t1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/single_column_projection_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/single_column_projection_distinct/out.sql new file mode 100644 index 000000000000..d38aa10366c4 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/single_column_projection_distinct/out.sql @@ -0,0 +1,7 @@ +SELECT DISTINCT + * +FROM ( + SELECT + t0.string_col + FROM functional_alltypes AS t0 +) AS t1 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/table_distinct/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/table_distinct/out.sql new file mode 100644 index 000000000000..dd4c570ec517 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_distinct/table_distinct/out.sql @@ -0,0 +1,3 @@ +SELECT DISTINCT + * +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e1.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e1.sql new file mode 100644 index 000000000000..b0be257b254f --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e1.sql @@ -0,0 +1,13 @@ +SELECT + t0.key1, + t0.key2, + t0.value1 +FROM foo_t AS t0 +WHERE + EXISTS( + SELECT + CAST(1 AS TINYINT) AS "1" + FROM bar_t AS t1 + WHERE + t0.key1 = t1.key1 + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e2.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e2.sql new file mode 100644 index 000000000000..f397c2b7251e --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_exists/e2.sql @@ -0,0 +1,17 @@ +SELECT + t0.key1, + t0.key2, + t0.value1 +FROM foo_t AS t0 +WHERE + EXISTS( + SELECT + CAST(1 AS TINYINT) AS "1" + FROM bar_t AS t1 + WHERE + ( + t0.key1 = t1.key1 + ) AND ( + t1.key2 = 'foo' + ) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_filter_group_by_agg_with_same_name/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_filter_group_by_agg_with_same_name/out.sql new file mode 100644 index 000000000000..c0ba260a78bb --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_filter_group_by_agg_with_same_name/out.sql @@ -0,0 +1,13 @@ +SELECT + t1.int_col, + t1.bigint_col +FROM ( + SELECT + t0.int_col, + SUM(t0.bigint_col) AS bigint_col + FROM t AS t0 + GROUP BY + 1 +) AS t1 +WHERE + t1.bigint_col = CAST(60 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_gh_1045/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_gh_1045/out.sql new file mode 100644 index 000000000000..b9a81bb4916f --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_gh_1045/out.sql @@ -0,0 +1,37 @@ +SELECT + t5.t1_id1, + t5.t1_val1, + t10.id3, + t10.val2, + t10.dt, + t10.t3_val2, + t10.id2a, + t10.id2b, + t10.val2_right +FROM ( + SELECT + t0.id1 AS t1_id1, + t0.val1 AS t1_val1 + FROM test1 AS t0 +) AS t5 +LEFT OUTER JOIN ( + SELECT + t7.id3, + t7.val2, + t7.dt, + t7.t3_val2, + t3.id2a, + t3.id2b, + t3.val2 AS val2_right + FROM ( + SELECT + CAST(t1.id3 AS BIGINT) AS id3, + t1.val2, + t1.dt, + CAST(t1.id3 AS BIGINT) AS t3_val2 + FROM test3 AS t1 + ) AS t7 + INNER JOIN test2 AS t3 + ON t3.id2b = t7.id3 +) AS t10 + ON t5.t1_id1 = t10.id2a \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/isnull/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/isnull/out.sql new file mode 100644 index 000000000000..d06c0383bb09 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/isnull/out.sql @@ -0,0 +1,3 @@ +SELECT + t0.double_col IS NULL AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/notnull/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/notnull/out.sql new file mode 100644 index 000000000000..f33c3466083a --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/notnull/out.sql @@ -0,0 +1,3 @@ +SELECT + NOT t0.double_col IS NULL AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_join_just_materialized/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_join_just_materialized/out.sql new file mode 100644 index 000000000000..28ef4cadbb22 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_join_just_materialized/out.sql @@ -0,0 +1,21 @@ +SELECT + t3.n_nationkey, + t3.n_name, + t3.n_regionkey, + t3.n_comment, + t4.r_regionkey, + t4.r_name, + t4.r_comment, + t5.c_custkey, + t5.c_name, + t5.c_address, + t5.c_nationkey, + t5.c_phone, + t5.c_acctbal, + t5.c_mktsegment, + t5.c_comment +FROM tpch_nation AS t3 +INNER JOIN tpch_region AS t4 + ON t3.n_regionkey = t4.r_regionkey +INNER JOIN tpch_customer AS t5 + ON t3.n_nationkey = t5.c_nationkey \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_joins/inner/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/inner/out.sql new file mode 100644 index 000000000000..9289a835e37a --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/inner/out.sql @@ -0,0 +1,11 @@ +SELECT + t2.r_regionkey, + t2.r_name, + t2.r_comment, + t3.n_nationkey, + t3.n_name, + t3.n_regionkey, + t3.n_comment +FROM tpch_region AS t2 +INNER JOIN tpch_nation AS t3 + ON t2.r_regionkey = t3.n_regionkey \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_joins/inner_select/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/inner_select/out.sql new file mode 100644 index 000000000000..38534295064f --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/inner_select/out.sql @@ -0,0 +1,8 @@ +SELECT + t3.n_nationkey, + t3.n_name, + t3.n_regionkey, + t3.n_comment +FROM tpch_region AS t2 +INNER JOIN tpch_nation AS t3 + ON t2.r_regionkey = t3.n_regionkey \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_joins/left/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/left/out.sql new file mode 100644 index 000000000000..7048d19d0ba4 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/left/out.sql @@ -0,0 +1,11 @@ +SELECT + t2.r_regionkey, + t2.r_name, + t2.r_comment, + t3.n_nationkey, + t3.n_name, + t3.n_regionkey, + t3.n_comment +FROM tpch_region AS t2 +LEFT OUTER JOIN tpch_nation AS t3 + ON t2.r_regionkey = t3.n_regionkey \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_joins/left_select/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/left_select/out.sql new file mode 100644 index 000000000000..26c408b5be1a --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/left_select/out.sql @@ -0,0 +1,8 @@ +SELECT + t3.n_nationkey, + t3.n_name, + t3.n_regionkey, + t3.n_comment +FROM tpch_region AS t2 +LEFT OUTER JOIN tpch_nation AS t3 + ON t2.r_regionkey = t3.n_regionkey \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_joins/outer/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/outer/out.sql new file mode 100644 index 000000000000..f14ac5c0d92e --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/outer/out.sql @@ -0,0 +1,11 @@ +SELECT + t2.r_regionkey, + t2.r_name, + t2.r_comment, + t3.n_nationkey, + t3.n_name, + t3.n_regionkey, + t3.n_comment +FROM tpch_region AS t2 +FULL OUTER JOIN tpch_nation AS t3 + ON t2.r_regionkey = t3.n_regionkey \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_joins/outer_select/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/outer_select/out.sql new file mode 100644 index 000000000000..1b339a3f247b --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_joins/outer_select/out.sql @@ -0,0 +1,8 @@ +SELECT + t3.n_nationkey, + t3.n_name, + t3.n_regionkey, + t3.n_comment +FROM tpch_region AS t2 +FULL OUTER JOIN tpch_nation AS t3 + ON t2.r_regionkey = t3.n_regionkey \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_limit/expr_fn0/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_limit/expr_fn0/out.sql new file mode 100644 index 000000000000..2b6d0fe52716 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_limit/expr_fn0/out.sql @@ -0,0 +1,4 @@ +SELECT + * +FROM star1 AS t0 +LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_limit/expr_fn1/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_limit/expr_fn1/out.sql new file mode 100644 index 000000000000..3c71bda9b962 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_limit/expr_fn1/out.sql @@ -0,0 +1,5 @@ +SELECT + * +FROM star1 AS t0 +LIMIT 10 +OFFSET 5 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_limit_filter/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_limit_filter/out.sql new file mode 100644 index 000000000000..31e87b57f3ea --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_limit_filter/out.sql @@ -0,0 +1,9 @@ +SELECT + t0.c, + t0.f, + t0.foo_id, + t0.bar_id +FROM star1 AS t0 +WHERE + t0.f > CAST(0 AS TINYINT) +LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_limit_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_limit_subquery/out.sql new file mode 100644 index 000000000000..ab4dd6df7158 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_limit_subquery/out.sql @@ -0,0 +1,13 @@ +SELECT + t1.c, + t1.f, + t1.foo_id, + t1.bar_id +FROM ( + SELECT + * + FROM star1 AS t0 + LIMIT 10 +) AS t1 +WHERE + t1.f > CAST(0 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/decompiled.py b/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/decompiled.py new file mode 100644 index 000000000000..593c3faf221f --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/decompiled.py @@ -0,0 +1,17 @@ +import ibis + + +star1 = ibis.table( + name="star1", + schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, +) +star2 = ibis.table( + name="star2", schema={"foo_id": "string", "value1": "float64", "value3": "float64"} +) +agg = star1.aggregate([star1.f.sum().name("total")], by=[star1.foo_id]) +joinchain = agg.inner_join(star2, agg.foo_id == star2.foo_id).select( + agg.foo_id, agg.total, star2.value1 +) +f = joinchain.filter(joinchain.total > 100) + +result = f.order_by(f.total.desc()) diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/out.sql new file mode 100644 index 000000000000..b6c7dab91969 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_lower_projection_sort_key/out.sql @@ -0,0 +1,24 @@ +SELECT + t6.foo_id, + t6.total, + t6.value1 +FROM ( + SELECT + t4.foo_id, + t4.total, + t2.value1 + FROM ( + SELECT + t0.foo_id, + SUM(t0.f) AS total + FROM star1 AS t0 + GROUP BY + 1 + ) AS t4 + INNER JOIN star2 AS t2 + ON t4.foo_id = t2.foo_id +) AS t6 +WHERE + t6.total > CAST(100 AS TINYINT) +ORDER BY + t6.total DESC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_multi_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_multi_join/out.sql new file mode 100644 index 000000000000..d9c97bc180ca --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_multi_join/out.sql @@ -0,0 +1,20 @@ +SELECT + t4.x1, + t4.y1, + t5.x2, + t11.x3, + t11.y2, + t11.x4 +FROM t1 AS t4 +INNER JOIN t2 AS t5 + ON t4.x1 = t5.x2 +INNER JOIN ( + SELECT + t6.x3, + t6.y2, + t7.x4 + FROM t3 AS t6 + INNER JOIN t4 AS t7 + ON t6.x3 = t7.x4 +) AS t11 + ON t4.y1 = t11.y2 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_mutate_filter_join_no_cross_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_mutate_filter_join_no_cross_join/out.sql new file mode 100644 index 000000000000..82946b9a13bc --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_mutate_filter_join_no_cross_join/out.sql @@ -0,0 +1,5 @@ +SELECT + t0.person_id +FROM person AS t0 +WHERE + CAST(400 AS SMALLINT) <= CAST(40 AS TINYINT) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_named_expr/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_named_expr/out.sql new file mode 100644 index 000000000000..66e751eda132 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_named_expr/out.sql @@ -0,0 +1,3 @@ +SELECT + t0.double_col * CAST(2 AS TINYINT) AS foo +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_negate/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_negate/out.sql new file mode 100644 index 000000000000..3ea20ad88c56 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_negate/out.sql @@ -0,0 +1,5 @@ +SELECT + NOT ( + t0.double_col > CAST(0 AS TINYINT) + ) AS tmp +FROM functional_alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_no_cart_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cart_join/out.sql new file mode 100644 index 000000000000..580263e1f156 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cart_join/out.sql @@ -0,0 +1,37 @@ +SELECT + t7.ancestor_node_sort_order, + t7.n +FROM ( + SELECT + t6.ancestor_node_sort_order, + CAST(1 AS TINYINT) AS n + FROM ( + SELECT + t2.product_id, + t4.ancestor_level_name, + t4.ancestor_level_number, + t4.ancestor_node_sort_order, + t4.descendant_node_natural_key, + t4.product_level_name + FROM facts AS t2 + INNER JOIN ( + SELECT + t1.ancestor_level_name, + t1.ancestor_level_number, + t1.ancestor_node_sort_order, + t1.descendant_node_natural_key, + CONCAT( + LPAD('-', ( + t1.ancestor_level_number - CAST(1 AS TINYINT) + ) * CAST(7 AS TINYINT), '-'), + t1.ancestor_level_name + ) AS product_level_name + FROM products AS t1 + ) AS t4 + ON t2.product_id = t4.descendant_node_natural_key + ) AS t6 + GROUP BY + 1 +) AS t7 +ORDER BY + t7.ancestor_node_sort_order ASC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_no_cartesian_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cartesian_join/out.sql new file mode 100644 index 000000000000..a3df1de479ac --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cartesian_join/out.sql @@ -0,0 +1,61 @@ +SELECT + t15.customer_id, + t15.first_name, + t15.last_name, + t15.first_order, + t15.most_recent_order, + t15.number_of_orders, + t13.total_amount AS customer_lifetime_value +FROM ( + SELECT + t12.customer_id, + t12.first_name, + t12.last_name, + t12.first_order, + t12.most_recent_order, + t12.number_of_orders + FROM ( + SELECT + t3.customer_id, + t3.first_name, + t3.last_name, + t8.customer_id AS customer_id_right, + t8.first_order, + t8.most_recent_order, + t8.number_of_orders + FROM customers AS t3 + LEFT OUTER JOIN ( + SELECT + t2.customer_id, + MIN(t2.order_date) AS first_order, + MAX(t2.order_date) AS most_recent_order, + COUNT(t2.order_id) AS number_of_orders + FROM orders AS t2 + GROUP BY + 1 + ) AS t8 + ON t3.customer_id = t8.customer_id + ) AS t12 +) AS t15 +LEFT OUTER JOIN ( + SELECT + t9.customer_id, + SUM(t9.amount) AS total_amount + FROM ( + SELECT + t4.payment_id, + t4.order_id, + t4.payment_method, + t4.amount, + t5.order_id AS order_id_right, + t5.customer_id, + t5.order_date, + t5.status + FROM payments AS t4 + LEFT OUTER JOIN orders AS t5 + ON t4.order_id = t5.order_id + ) AS t9 + GROUP BY + 1 +) AS t13 + ON t15.customer_id = t13.customer_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_no_cross_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cross_join/out.sql new file mode 100644 index 000000000000..fc3f08afa72b --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_no_cross_join/out.sql @@ -0,0 +1,16 @@ +SELECT + t3.id, + t3.personal, + t3.family, + t4.taken, + t4.person, + t4.quant, + t4.reading, + t5.id AS id_right, + t5.site, + t5.dated +FROM person AS t3 +INNER JOIN survey AS t4 + ON t3.id = t4.person +INNER JOIN visited AS t5 + ON t5.id = t4.taken \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_not_exists/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_not_exists/out.sql new file mode 100644 index 000000000000..548a1efef2ec --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_not_exists/out.sql @@ -0,0 +1,15 @@ +SELECT + t0.key1, + t0.key2, + t0.value1 +FROM foo_t AS t0 +WHERE + NOT ( + EXISTS( + SELECT + CAST(1 AS TINYINT) AS "1" + FROM bar_t AS t1 + WHERE + t0.key1 = t1.key1 + ) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/column/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/column/out.sql new file mode 100644 index 000000000000..bb666f269b2a --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/column/out.sql @@ -0,0 +1,8 @@ +SELECT + t0.c, + t0.f, + t0.foo_id, + t0.bar_id +FROM star1 AS t0 +ORDER BY + t0.f ASC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/random/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/random/out.sql new file mode 100644 index 000000000000..356b091282c5 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_order_by/random/out.sql @@ -0,0 +1,8 @@ +SELECT + t0.c, + t0.f, + t0.foo_id, + t0.bar_id +FROM star1 AS t0 +ORDER BY + RANDOM() ASC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_order_by_expr/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_order_by_expr/out.sql new file mode 100644 index 000000000000..99a46813f652 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_order_by_expr/out.sql @@ -0,0 +1,8 @@ +SELECT + t0.a, + t0.b +FROM t AS t0 +WHERE + t0.a = CAST(1 AS TINYINT) +ORDER BY + CONCAT(t0.b, 'a') ASC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_searched_case/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_searched_case/out.sql new file mode 100644 index 000000000000..31c40c343501 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_searched_case/out.sql @@ -0,0 +1,9 @@ +SELECT + CASE + WHEN t0.f > CAST(0 AS TINYINT) + THEN t0.d * CAST(2 AS TINYINT) + WHEN t0.c < CAST(0 AS TINYINT) + THEN t0.a * CAST(2 AS TINYINT) + ELSE CAST(NULL AS BIGINT) + END AS tmp +FROM alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/anti.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/anti.sql new file mode 100644 index 000000000000..c598c1264a74 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/anti.sql @@ -0,0 +1,25 @@ +SELECT + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month +FROM functional_alltypes AS t0 +WHERE + NOT ( + EXISTS( + SELECT + CAST(1 AS TINYINT) AS "1" + FROM functional_alltypes AS t1 + WHERE + t0.string_col = t1.string_col + ) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/semi.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/semi.sql new file mode 100644 index 000000000000..16089afced58 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_in_not_exists/semi.sql @@ -0,0 +1,23 @@ +SELECT + t0.id, + t0.bool_col, + t0.tinyint_col, + t0.smallint_col, + t0.int_col, + t0.bigint_col, + t0.float_col, + t0.double_col, + t0.date_string_col, + t0.string_col, + t0.timestamp_col, + t0.year, + t0.month +FROM functional_alltypes AS t0 +WHERE + EXISTS( + SELECT + CAST(1 AS TINYINT) AS "1" + FROM functional_alltypes AS t1 + WHERE + t0.string_col = t1.string_col + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_join/out.sql new file mode 100644 index 000000000000..e9c93029c637 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_self_reference_join/out.sql @@ -0,0 +1,8 @@ +SELECT + t1.c, + t1.f, + t1.foo_id, + t1.bar_id +FROM star1 AS t1 +INNER JOIN star1 AS t3 + ON t1.foo_id = t3.bar_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_simple_case/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_simple_case/out.sql new file mode 100644 index 000000000000..3575c8d6e653 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_simple_case/out.sql @@ -0,0 +1,3 @@ +SELECT + CASE t0.g WHEN 'foo' THEN 'bar' WHEN 'baz' THEN 'qux' ELSE 'default' END AS tmp +FROM alltypes AS t0 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_sort_aggregation_translation_failure/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_sort_aggregation_translation_failure/out.sql new file mode 100644 index 000000000000..e8f9420b4263 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_sort_aggregation_translation_failure/out.sql @@ -0,0 +1,13 @@ +SELECT + t1.string_col, + t1.foo +FROM ( + SELECT + t0.string_col, + MAX(t0.double_col) AS foo + FROM functional_alltypes AS t0 + GROUP BY + 1 +) AS t1 +ORDER BY + t1.foo DESC \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_subquery_aliased/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_subquery_aliased/out.sql new file mode 100644 index 000000000000..e0f8941f3527 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_subquery_aliased/out.sql @@ -0,0 +1,14 @@ +SELECT + t4.foo_id, + t4.total, + t2.value1 +FROM ( + SELECT + t0.foo_id, + SUM(t0.f) AS total + FROM star1 AS t0 + GROUP BY + 1 +) AS t4 +INNER JOIN star2 AS t2 + ON t4.foo_id = t2.foo_id \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery/out.sql new file mode 100644 index 000000000000..0b1767f7a740 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery/out.sql @@ -0,0 +1,21 @@ +SELECT + t0.job, + t0.dept_id, + t0.year, + t0.y +FROM foo AS t0 +WHERE + t0.y > ( + SELECT + AVG(t2.y) AS "Mean(y)" + FROM ( + SELECT + t1.job, + t1.dept_id, + t1.year, + t1.y + FROM foo AS t1 + WHERE + t1.dept_id = t1.dept_id + ) AS t2 + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery_with_join/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery_with_join/out.sql new file mode 100644 index 000000000000..9f3f24cd76eb --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_where_correlated_subquery_with_join/out.sql @@ -0,0 +1,31 @@ +SELECT + t9.p_partkey, + t9.ps_supplycost +FROM ( + SELECT + t3.p_partkey, + t4.ps_supplycost + FROM part AS t3 + INNER JOIN partsupp AS t4 + ON t3.p_partkey = t4.ps_partkey +) AS t9 +WHERE + t9.ps_supplycost = ( + SELECT + MIN(t11.ps_supplycost) AS "Min(ps_supplycost)" + FROM ( + SELECT + t10.ps_partkey, + t10.ps_supplycost + FROM ( + SELECT + t5.ps_partkey, + t5.ps_supplycost + FROM partsupp AS t5 + INNER JOIN supplier AS t6 + ON t6.s_suppkey = t5.ps_suppkey + ) AS t10 + WHERE + t10.ps_partkey = t9.p_partkey + ) AS t11 + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_where_simple_comparisons/decompiled.py b/ibis/backends/tests/sql/snapshots/test_sql/test_where_simple_comparisons/decompiled.py new file mode 100644 index 000000000000..14517d8e9493 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_where_simple_comparisons/decompiled.py @@ -0,0 +1,9 @@ +import ibis + + +star1 = ibis.table( + name="star1", + schema={"c": "int32", "f": "float64", "foo_id": "string", "bar_id": "string"}, +) + +result = star1.filter(star1.f > 0, star1.c < (star1.f * 2)) diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_where_simple_comparisons/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_where_simple_comparisons/out.sql new file mode 100644 index 000000000000..e1914ac959bd --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_where_simple_comparisons/out.sql @@ -0,0 +1,10 @@ +SELECT + t0.c, + t0.f, + t0.foo_id, + t0.bar_id +FROM star1 AS t0 +WHERE + t0.f > CAST(0 AS TINYINT) AND t0.c < ( + t0.f * CAST(2 AS TINYINT) + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_where_uncorrelated_subquery/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_where_uncorrelated_subquery/out.sql new file mode 100644 index 000000000000..d64e69ba3894 --- /dev/null +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_where_uncorrelated_subquery/out.sql @@ -0,0 +1,12 @@ +SELECT + t0.job, + t0.dept_id, + t0.year, + t0.y +FROM foo AS t0 +WHERE + t0.job IN ( + SELECT + t1.job + FROM bar AS t1 + ) \ No newline at end of file diff --git a/ibis/backends/tests/sql/test_compiler.py b/ibis/backends/tests/sql/test_compiler.py index 646b9ad9b652..bc789dba86ba 100644 --- a/ibis/backends/tests/sql/test_compiler.py +++ b/ibis/backends/tests/sql/test_compiler.py @@ -5,8 +5,6 @@ import pytest import ibis - -# from ibis.backends.base.sql.compiler import Compiler from ibis.backends.tests.sql.conftest import to_sql from ibis.tests.util import assert_decompile_roundtrip, schemas_eq @@ -207,7 +205,7 @@ def test_subquery_where_location(snapshot): .aggregate(foo=lambda t: t.float_col.sum()) .foo.count() ) - out = Compiler.to_sql(expr, params={param: "20140101"}) + out = to_sql(expr, params={param: "20140101"}) snapshot.assert_match(out, "out.sql") # params get different auto incremented counter identifiers assert_decompile_roundtrip(expr, snapshot, eq=schemas_eq) diff --git a/ibis/backends/tests/sql/test_select_sql.py b/ibis/backends/tests/sql/test_select_sql.py index 78eee22af88c..2ffaad9120f2 100644 --- a/ibis/backends/tests/sql/test_select_sql.py +++ b/ibis/backends/tests/sql/test_select_sql.py @@ -5,9 +5,7 @@ import ibis from ibis import _ - -# from ibis.backends.base.sql.compiler import Compiler -from ibis.backends.tests.sql.conftest import get_query, to_sql +from ibis.backends.tests.sql.conftest import to_sql from ibis.tests.util import assert_decompile_roundtrip, schemas_eq pytestmark = pytest.mark.duckdb @@ -37,7 +35,9 @@ id="limit_then_filter", ), param(lambda star1, **_: star1.count(), id="aggregate_table_count_metric"), - param(lambda star1, **_: star1.view(), id="self_reference_simple"), + # TODO: this is automatically simplified to `t`, so it's probably not a + # useful test to roundtrip *just* a call to view + # param(lambda star1, **_: star1.view(), id="self_reference_simple"), param(lambda t, **_: t, id="test_physical_table_reference_translate"), ], ) @@ -50,11 +50,7 @@ def test_select_sql(alltypes, star1, expr_fn, snapshot): def test_nameless_table(snapshot): # Generate a unique table name when we haven't passed on nameless = ibis.table([("key", "string")]) - assert to_sql(nameless) == f"SELECT t0.*\nFROM {nameless.op().name} t0" - - expr = ibis.table([("key", "string")], name="baz") - snapshot.assert_match(to_sql(expr), "out.sql") - assert_decompile_roundtrip(expr, snapshot) + assert nameless.op().name is not None def test_simple_joins(star1, star2, snapshot): @@ -250,19 +246,11 @@ def test_fuse_projections(snapshot): def test_projection_filter_fuse(projection_fuse_filter, snapshot): expr1, expr2, expr3 = projection_fuse_filter - sql1 = Compiler.to_sql(expr1) - sql2 = Compiler.to_sql(expr2) + sql1 = ibis.to_sql(expr1) + sql2 = ibis.to_sql(expr2) assert sql1 == sql2 - # ideally sql1 == sql3 but the projection logic has been a mess for a long - # time and causes bugs like - # - # https://github.com/ibis-project/ibis/issues/4003 - # - # so we're conservative in fusing projections and filters - # - # even though it may seem obvious what to do, it's not snapshot.assert_match(to_sql(expr3), "out.sql") @@ -429,14 +417,14 @@ def test_scalar_subquery_different_table(foo, bar, snapshot): snapshot.assert_match(to_sql(expr), "out.sql") -def test_exists_subquery_repr(t1, t2): +def test_exists_subquery(t1, t2, snapshot): # GH #660 cond = t1.key1 == t2.key1 expr = t1[cond.any()] - stmt = get_query(expr) - repr(stmt.where[0]) + snapshot.assert_match(to_sql(expr), "out.sql") + assert repr(expr) def test_filter_inside_exists(snapshot): @@ -491,9 +479,6 @@ def test_multiple_limits(functional_alltypes, snapshot): t = functional_alltypes expr = t.limit(20).limit(10) - stmt = get_query(expr) - - assert stmt.limit.n == 10 snapshot.assert_match(to_sql(expr), "out.sql") assert_decompile_roundtrip(expr, snapshot) @@ -527,10 +512,6 @@ def test_join_filtered_tables_no_pushdown(snapshot): joined = tbl_a_filter.left_join(tbl_b_filter, ["year", "month", "day"]) result = joined[tbl_a_filter.value_a, tbl_b_filter.value_b] - join_op = result.op().table - assert join_op.left == tbl_a_filter.op() - assert join_op.right == tbl_b_filter.op() - snapshot.assert_match(to_sql(result), "out.sql") @@ -860,3 +841,13 @@ def test_chain_limit_doesnt_collapse(snapshot): ) expr = t.city.topk(10)[-5:] snapshot.assert_match(to_sql(expr), "result.sql") + + +def test_join_with_conditional_aggregate(snapshot): + left = ibis.table({"on": "int", "by": "string"}, name="left") + right = ibis.table({"on": "int", "by": "string", "val": "float"}, name="right") + stat = right[(right.by == left.by) & (right.on <= left.on)]["on"].max() + merged = left.join(right, how="left", predicates=left.by == right.by)[ + right.on == stat + ] + snapshot.assert_match(to_sql(merged), "result.sql") diff --git a/ibis/backends/tests/sql/test_sql.py b/ibis/backends/tests/sql/test_sql.py index d91d1aff6683..d4bb8fe8033e 100644 --- a/ibis/backends/tests/sql/test_sql.py +++ b/ibis/backends/tests/sql/test_sql.py @@ -141,10 +141,7 @@ def test_named_expr(functional_alltypes, snapshot): ], ids=["inner", "left", "outer", "inner_select", "left_select", "outer_select"], ) -def test_joins(tpch_region, tpch_nation, expr_fn, snapshot): - region = tpch_region - nation = tpch_nation - +def test_joins(region, nation, expr_fn, snapshot): expr = expr_fn(region, nation) snapshot.assert_match(to_sql(expr), "out.sql") @@ -160,15 +157,12 @@ def test_join_just_materialized(nation, region, customer, snapshot): snapshot.assert_match(to_sql(joined), "out.sql") -def test_full_outer_join(tpch_region, tpch_nation): +def test_full_outer_join(region, nation): """Testing full outer join separately due to previous issue with outer join resulting in left outer join (issue #1773)""" - region = tpch_region - nation = tpch_nation - predicate = region.r_regionkey == nation.n_regionkey joined = region.outer_join(nation, predicate) - joined_sql_str = str(joined.compile()) + joined_sql_str = to_sql(joined) assert "full" in joined_sql_str.lower() assert "left" not in joined_sql_str.lower() @@ -522,3 +516,50 @@ def test_order_by_expr(snapshot): t = ibis.table(dict(a="int", b="string"), name="t") expr = t[lambda t: t.a == 1].order_by(lambda t: t.b + "a") snapshot.assert_match(to_sql(expr), "out.sql") + + +def test_no_cartesian_join(snapshot): + customers = ibis.table( + dict(customer_id="int64", first_name="string", last_name="string"), + name="customers", + ) + orders = ibis.table( + dict(order_id="int64", customer_id="int64", order_date="date", status="string"), + name="orders", + ) + payments = ibis.table( + dict( + payment_id="int64", + order_id="int64", + payment_method="string", + amount="float64", + ), + name="payments", + ) + + customer_orders = orders.group_by("customer_id").aggregate( + first_order=orders.order_date.min(), + most_recent_order=orders.order_date.max(), + number_of_orders=orders.order_id.count(), + ) + + customer_payments = ( + payments.left_join(orders, "order_id") + .group_by(orders.customer_id) + .aggregate(total_amount=payments.amount.sum()) + ) + + final = ( + customers.left_join(customer_orders, "customer_id") + .drop("customer_id_right") + .left_join(customer_payments, "customer_id")[ + customers.customer_id, + customers.first_name, + customers.last_name, + customer_orders.first_order, + customer_orders.most_recent_order, + customer_orders.number_of_orders, + customer_payments.total_amount.name("customer_lifetime_value"), + ] + ) + snapshot.assert_match(ibis.to_sql(final, dialect="duckdb"), "out.sql") diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index dbbd31533419..13ba75b6d2d4 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -1071,7 +1071,12 @@ def test_quantile( raises=com.OperationNotDefinedError, ), pytest.mark.notyet( - ["impala", "mysql", "sqlite", "flink"], + ["duckdb", "snowflake"], + raises=com.UnsupportedOperationError, + reason="backend only implements population correlation coefficient", + ), + pytest.mark.notyet( + ["impala", "mysql", "sqlite"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -1343,10 +1348,7 @@ def test_date_quantile(alltypes, func): "::", id="expr", marks=[ - pytest.mark.notyet( - ["duckdb", "trino"], - raises=com.UnsupportedOperationError, - ), + pytest.mark.notyet(["trino"], raises=com.UnsupportedOperationError), pytest.mark.notyet( ["bigquery"], raises=GoogleBadRequest, @@ -1589,8 +1591,8 @@ def test_binds_are_cast(alltypes): def test_agg_sort(alltypes): query = alltypes.aggregate(count=alltypes.count()) - query = query.order_by(alltypes.year) - query.execute() + with pytest.raises(com.IntegrityError): + query.order_by(alltypes.year) @pytest.mark.xfail_version( diff --git a/ibis/backends/tests/test_api.py b/ibis/backends/tests/test_api.py index 0b4d5ad07007..903bbff3ff39 100644 --- a/ibis/backends/tests/test_api.py +++ b/ibis/backends/tests/test_api.py @@ -119,7 +119,7 @@ def test_limit_chain(alltypes, expr_fn): "expr_fn", [ param(lambda t: t, id="alltypes table"), - param(lambda t: t.join(t.view(), t.id == t.view().int_col), id="self join"), + param(lambda t: t.join(t.view(), [("id", "int_col")]), id="self join"), ], ) def test_unbind(alltypes, expr_fn): diff --git a/ibis/backends/tests/test_benchmarks.py b/ibis/backends/tests/test_benchmarks.py index 9c23b1fc09b6..0305b4fcd6b1 100644 --- a/ibis/backends/tests/test_benchmarks.py +++ b/ibis/backends/tests/test_benchmarks.py @@ -717,7 +717,6 @@ def test_repr_join(benchmark, customers, orders, orders_items, products): @pytest.mark.parametrize("overwrite", [True, False], ids=["overwrite", "no_overwrite"]) def test_insert_duckdb(benchmark, overwrite, tmp_path): pytest.importorskip("duckdb") - pytest.importorskip("duckdb_engine") n_rows = int(1e4) table_name = "t" @@ -806,7 +805,6 @@ def test_duckdb_to_pyarrow(benchmark, sql, ddb) -> None: def test_ibis_duckdb_to_pyarrow(benchmark, sql, ddb) -> None: pytest.importorskip("duckdb") - pytest.importorskip("duckdb_engine") con = ibis.duckdb.connect(ddb, read_only=True) @@ -876,7 +874,6 @@ def test_big_join_expr(benchmark, src, diff): def test_big_join_execute(benchmark, nrels): pytest.importorskip("duckdb") - pytest.importorskip("duckdb_engine") con = ibis.duckdb.connect() diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index ee1f06aff87e..f1f9ec4a1854 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -934,9 +934,16 @@ def test_create_from_in_memory_table(con, temp_table, arg, func, monkeypatch): @pytest.mark.usefixtures("backend") def test_default_backend_option(monkeypatch): - monkeypatch.setattr(ibis.options, "default_backend", ibis.pandas) + pytest.importorskip("duckdb") + + # verify that there's nothing already set + assert ibis.options.default_backend is None + + # patch in duckdb + monkeypatch.setattr(ibis.options, "default_backend", ibis.duckdb) + backend = ibis.config._default_backend() - assert backend.name == "pandas" + assert backend.name == "duckdb" # backend is used to ensure that this test runs in CI in the setting @@ -997,10 +1004,10 @@ def test_default_backend(): for _ in range(2): assert expr.execute() == df.a.sum() - sql = str(ibis.to_sql(expr)) + sql = ibis.to_sql(expr) rx = """\ SELECT - SUM\\((\\w+)\\.a\\) AS ".+" + SUM\\((t\\d+)\\.a\\) AS ".+" FROM \\w+ AS \\1""" assert re.match(rx, sql) is not None @@ -1150,9 +1157,9 @@ def test_has_operation_no_geo(con, op): for name, obj in sorted(inspect.getmembers(builtins), key=itemgetter(0)) for backend in sorted(ALL_BACKENDS) # filter out builtins that are types, except for tuples on ClickHouse - # because tuples are used to represent lists of expressions + # and duckdb because tuples are used to represent lists of expressions if isinstance(obj, type) - if (obj != tuple or backend != "clickhouse") + if (obj != tuple or backend not in ("clickhouse", "duckdb")) if (backend != "pyspark" or vparse(pd.__version__) < vparse("2")) ], ) diff --git a/ibis/backends/tests/test_dot_sql.py b/ibis/backends/tests/test_dot_sql.py index 76424fa7e830..cf50720108c4 100644 --- a/ibis/backends/tests/test_dot_sql.py +++ b/ibis/backends/tests/test_dot_sql.py @@ -15,7 +15,7 @@ PolarsComputeError = None table_dot_sql_notimpl = pytest.mark.notimpl(["bigquery", "impala", "druid"]) -dot_sql_notimpl = pytest.mark.notimpl(["datafusion", "exasol", "flink"]) +dot_sql_notimpl = pytest.mark.notimpl(["exasol", "flink"]) dot_sql_notyet = pytest.mark.notyet( ["snowflake", "oracle"], reason="snowflake and oracle column names are case insensitive", @@ -36,7 +36,7 @@ DatabaseError = None -@dot_sql_notimpl +@pytest.mark.notimpl(["flink"]) @dot_sql_notyet @dot_sql_never @pytest.mark.parametrize( @@ -256,13 +256,13 @@ def test_dot_sql_reuse_alias_with_different_types(backend, alltypes, df): @dot_sql_never def test_table_dot_sql_transpile(backend, alltypes, dialect, df): name = "foo2" - foo = alltypes.select(x=_.int_col + 1).alias(name) + foo = alltypes.select(x=_.bigint_col + 1).alias(name) expr = sg.select("x").from_(sg.table(name, quoted=True)) dialect = _IBIS_TO_SQLGLOT_DIALECT.get(dialect, dialect) sqlstr = expr.sql(dialect=dialect, pretty=True) dot_sql_expr = foo.sql(sqlstr, dialect=dialect) result = dot_sql_expr.execute() - expected = df.int_col.add(1).rename("x") + expected = df.bigint_col.add(1).rename("x") backend.assert_series_equal(result.x, expected) @@ -283,12 +283,12 @@ def test_table_dot_sql_transpile(backend, alltypes, dialect, df): @dot_sql_never def test_con_dot_sql_transpile(backend, con, dialect, df): t = sg.table("functional_alltypes") - foo = sg.select(sg.alias(sg.column("int_col") + 1, "x")).from_(t) + foo = sg.select(sg.alias(sg.column("bigint_col") + 1, "x")).from_(t) dialect = _IBIS_TO_SQLGLOT_DIALECT.get(dialect, dialect) sqlstr = foo.sql(dialect=dialect, pretty=True) expr = con.sql(sqlstr, dialect=dialect) result = expr.execute() - expected = df.int_col.add(1).rename("x") + expected = df.bigint_col.add(1).rename("x") backend.assert_series_equal(result.x, expected) diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py index 6f120cb502e4..09711ecc0417 100644 --- a/ibis/backends/tests/test_export.py +++ b/ibis/backends/tests/test_export.py @@ -23,6 +23,13 @@ PyDeltaTableError = None +try: + from duckdb import NotImplementedException as DuckDBNotImplementedException + from duckdb import ParserException as DuckDBParserException +except ImportError: + DuckDBNotImplementedException = DuckDBParserException = None + + limit = [ param( 42, @@ -225,7 +232,7 @@ def test_table_to_parquet(tmp_path, backend, awards_players): @pytest.mark.notimpl( ["duckdb"], reason="cannot inline WriteOptions objects", - raises=sa.exc.NotSupportedError, + raises=DuckDBNotImplementedException, ) @pytest.mark.parametrize("version", ["1.0", "2.6"]) def test_table_to_parquet_writer_kwargs(version, tmp_path, backend, awards_players): @@ -325,7 +332,7 @@ def test_table_to_csv(tmp_path, backend, awards_players): @pytest.mark.notimpl( ["duckdb"], reason="cannot inline WriteOptions objects", - raises=sa.exc.ProgrammingError, + raises=DuckDBParserException, ) @pytest.mark.parametrize("delimiter", [";", "\t"], ids=["semicolon", "tab"]) def test_table_to_csv_writer_kwargs(delimiter, tmp_path, awards_players): diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 81ce010225d4..0d4f7bb69c03 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -754,16 +754,21 @@ def test_ifelse_column(backend, alltypes, df): def test_select_filter(backend, alltypes, df): t = alltypes - expr = t.select("int_col").filter(t.string_col == "4") + # XXX: should we consider a builder pattern for select and filter too? + # this would allow us to capture the context + # TODO(cpcloud): this now requires the additional string_col projection + expr = t.select("int_col", "string_col").filter(t.string_col == "4") result = expr.execute() - expected = df.loc[df.string_col == "4", ["int_col"]].reset_index(drop=True) + expected = df.loc[df.string_col == "4", ["int_col", "string_col"]].reset_index( + drop=True + ) backend.assert_frame_equal(result, expected) def test_select_filter_select(backend, alltypes, df): t = alltypes - expr = t.select("int_col").filter(t.string_col == "4").int_col + expr = t.select("int_col", "string_col").filter(t.string_col == "4").int_col result = expr.execute().rename("int_col") expected = df.loc[df.string_col == "4", "int_col"].reset_index(drop=True) @@ -1005,6 +1010,9 @@ def test_memtable_column_naming_mismatch(backend, con, monkeypatch, df, columns) ibis.memtable(df, columns=columns) +@pytest.mark.xfail( + raises=com.IntegrityError, reason="inner join convenience not implemented" +) @pytest.mark.notimpl( ["dask", "datafusion", "pandas", "polars"], raises=NotImplementedError, @@ -1639,11 +1647,6 @@ def test_dynamic_table_slice(backend, slc, expected_count_fn): raises=HiveServer2Error, ) @pytest.mark.notyet(["pyspark"], reason="pyspark doesn't support dynamic limit/offset") -@pytest.mark.xfail_version( - duckdb=["duckdb<=0.8.1"], - raises=AssertionError, - reason="https://github.com/duckdb/duckdb/issues/8412", -) @pytest.mark.notyet(["flink"], reason="flink doesn't support dynamic limit/offset") def test_dynamic_table_slice_with_computed_offset(backend): t = backend.functional_alltypes diff --git a/ibis/backends/tests/test_interactive.py b/ibis/backends/tests/test_interactive.py index 111be16de5ee..8014ab7f1fa2 100644 --- a/ibis/backends/tests/test_interactive.py +++ b/ibis/backends/tests/test_interactive.py @@ -15,93 +15,80 @@ import pytest +import ibis from ibis import config -def test_interactive_execute_on_repr(con): - table = con.table("functional_alltypes") - expr = table.bigint_col.sum() - with config.option_context("interactive", True): - repr(expr) +@pytest.fixture +def queries(monkeypatch): + queries = [] + monkeypatch.setattr(ibis.options, "verbose", True) + monkeypatch.setattr(ibis.options, "verbose_log", queries.append) + monkeypatch.setattr(ibis.options, "interactive", True) + return queries + - assert len(con.executed_queries) > 0 +@pytest.fixture(scope="module") +def table(con): + return con.table("functional_alltypes") -def test_repr_png_is_none_in_interactive(con): - table = con.table("functional_alltypes") +def test_interactive_execute_on_repr(table, queries, snapshot): + repr(table.bigint_col.sum()) + snapshot.assert_match(queries[0], "out.sql") + +def test_repr_png_is_none_in_interactive(table): with config.option_context("interactive", True): assert table._repr_png_() is None -def test_repr_png_is_not_none_in_not_interactive(con): +def test_repr_png_is_not_none_in_not_interactive(table): pytest.importorskip("ibis.expr.visualize") - table = con.table("functional_alltypes") - with config.option_context("interactive", False), config.option_context( "graphviz_repr", True ): assert table._repr_png_() is not None -def test_default_limit(con, snapshot): - table = con.table("functional_alltypes").select("id", "bool_col") - - with config.option_context("interactive", True): - repr(table) - - snapshot.assert_match(con.executed_queries[0], "out.sql") - +def test_default_limit(table, snapshot, queries): + repr(table.select("id", "bool_col")) -def test_respect_set_limit(con, snapshot): - table = con.table("functional_alltypes").select("id", "bool_col").limit(10) + snapshot.assert_match(queries[0], "out.sql") - with config.option_context("interactive", True): - repr(table) - snapshot.assert_match(con.executed_queries[0], "out.sql") +def test_respect_set_limit(table, snapshot, queries): + repr(table.select("id", "bool_col").limit(10)) + snapshot.assert_match(queries[0], "out.sql") -def test_disable_query_limit(con, snapshot): - table = con.table("functional_alltypes").select("id", "bool_col") - with config.option_context("interactive", True): - with config.option_context("sql.default_limit", None): - repr(table) +def test_disable_query_limit(table, snapshot, queries): + assert ibis.options.sql.default_limit is None - snapshot.assert_match(con.executed_queries[0], "out.sql") + with config.option_context("sql.default_limit", 10): + assert ibis.options.sql.default_limit == 10 + repr(table.select("id", "bool_col")) + snapshot.assert_match(queries[0], "out.sql") -def test_interactive_non_compilable_repr_not_fail(con): - # #170 - table = con.table("functional_alltypes") - expr = table.string_col.topk(3) +def test_interactive_non_compilable_repr_does_not_fail(table): + """https://github.com/ibis-project/ibis/issues/170""" + repr(table.string_col.topk(3)) - # it works! - with config.option_context("interactive", True): - repr(expr) +def test_histogram_repr_no_query_execute(table, queries): + tier = table.double_col.histogram(10).name("bucket") + expr = table.group_by(tier).size() + expr._repr() -def test_histogram_repr_no_query_execute(con): - t = con.table("functional_alltypes") - tier = t.double_col.histogram(10).name("bucket") - expr = t.group_by(tier).size() - with config.option_context("interactive", True): - expr._repr() - assert con.executed_queries == [] - + assert not queries -def test_compile_no_execute(con): - t = con.table("functional_alltypes") - t.double_col.sum().compile() - assert con.executed_queries == [] +def test_isin_rule_suppressed_exception_repr_not_fail(table): + bool_clause = table["string_col"].notin(["1", "4", "7"]) + expr = table[bool_clause]["string_col"].value_counts() -def test_isin_rule_suppressed_exception_repr_not_fail(con): - with config.option_context("interactive", True): - t = con.table("functional_alltypes") - bool_clause = t["string_col"].notin(["1", "4", "7"]) - expr = t[bool_clause]["string_col"].value_counts() - repr(expr) + repr(expr) diff --git a/ibis/backends/tests/test_join.py b/ibis/backends/tests/test_join.py index 60e1f21d2007..09d4dade0991 100644 --- a/ibis/backends/tests/test_join.py +++ b/ibis/backends/tests/test_join.py @@ -408,3 +408,13 @@ def test_outer_join_nullability(backend, how, nrows, gen_right, keys): result = expr.to_pyarrow() assert len(result) == nrows + + +def test_complex_join_agg(snapshot): + t1 = ibis.table(dict(value1="float", key1="string", key2="string"), name="table1") + t2 = ibis.table(dict(value2="float", key1="string", key4="string"), name="table2") + + avg_diff = (t1.value1 - t2.value2).mean() + expr = t1.left_join(t2, "key1").group_by(t1.key1).aggregate(avg_diff=avg_diff) + + snapshot.assert_match(str(ibis.to_sql(expr, dialect="duckdb")), "out.sql") diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index 5158a3fb8336..cbc04a1d9534 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -24,9 +24,10 @@ import duckdb DuckDBConversionException = duckdb.ConversionException + DuckDBParserException = duckdb.ParserException except ImportError: duckdb = None - DuckDBConversionException = None + DuckDBConversionException = DuckDBParserException = None try: import clickhouse_connect as cc @@ -417,9 +418,7 @@ def test_numeric_literal(con, backend, expr, expected_types): raises=ImpalaHiveServer2Error, ), pytest.mark.broken( - ["duckdb"], - "(duckdb.ParserException) Parser Error: Width must be between 1 and 38!", - raises=sa.exc.ProgrammingError, + ["duckdb"], "Unsupported precision.", raises=DuckDBParserException ), pytest.mark.notyet(["datafusion"], raises=Exception), pytest.mark.notyet( @@ -442,15 +441,16 @@ def test_numeric_literal(con, backend, expr, expected_types): "dask": decimal.Decimal("Infinity"), "impala": float("inf"), "exasol": float("inf"), + "duckdb": float("inf"), }, { "bigquery": "FLOAT64", "snowflake": "VARCHAR", "sqlite": "real", "trino": "decimal(2,1)", - "duckdb": "DECIMAL(18,3)", "postgres": "numeric", "impala": "DOUBLE", + "duckdb": "FLOAT", }, marks=[ pytest.mark.broken( @@ -458,11 +458,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "Unsupported precision. Supported values: [1 : 76]. Current value: None", raises=NotImplementedError, ), - pytest.mark.broken( - ["duckdb"], - "duckdb.ConversionException: Conversion Error: Could not cast value inf to DECIMAL(18,3)", - raises=DuckDBConversionException, - ), pytest.mark.broken( ["trino"], "(trino.exceptions.TrinoUserError) TrinoUserError(type=USER_ERROR, name=INVALID_LITERAL, " @@ -523,15 +518,16 @@ def test_numeric_literal(con, backend, expr, expected_types): "dask": decimal.Decimal("-Infinity"), "impala": float("-inf"), "exasol": float("-inf"), + "duckdb": float("-inf"), }, { "bigquery": "FLOAT64", "snowflake": "VARCHAR", "sqlite": "real", "trino": "decimal(2,1)", - "duckdb": "DECIMAL(18,3)", "postgres": "numeric", "impala": "DOUBLE", + "duckdb": "FLOAT", }, marks=[ pytest.mark.broken( @@ -539,11 +535,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "Unsupported precision. Supported values: [1 : 76]. Current value: None", raises=NotImplementedError, ), - pytest.mark.broken( - ["duckdb"], - "duckdb.ConversionException: Conversion Error: Could not cast value -inf to DECIMAL(18,3)", - raises=DuckDBConversionException, - ), pytest.mark.broken( ["trino"], "(trino.exceptions.TrinoUserError) TrinoUserError(type=USER_ERROR, name=INVALID_LITERAL, " @@ -604,15 +595,16 @@ def test_numeric_literal(con, backend, expr, expected_types): "dask": decimal.Decimal("NaN"), "impala": float("nan"), "exasol": float("nan"), + "duckdb": float("nan"), }, { "bigquery": "FLOAT64", "snowflake": "VARCHAR", "sqlite": "null", "trino": "decimal(2,1)", - "duckdb": "DECIMAL(18,3)", "postgres": "numeric", "impala": "DOUBLE", + "duckdb": "FLOAT", }, marks=[ pytest.mark.broken( @@ -620,14 +612,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "Unsupported precision. Supported values: [1 : 76]. Current value: None", raises=NotImplementedError, ), - pytest.mark.broken( - ["duckdb"], - "(duckdb.InvalidInputException) Invalid Input Error: Attempting " - "to execute an unsuccessful or closed pending query result" - "Error: Invalid Input Error: Type DOUBLE with value nan can't be " - "cast because the value is out of range for the destination type INT64", - raises=sa.exc.ProgrammingError, - ), pytest.mark.broken( ["trino"], "(trino.exceptions.TrinoUserError) TrinoUserError(type=USER_ERROR, name=INVALID_LITERAL, " @@ -1468,7 +1452,6 @@ def test_divide_by_zero(backend, alltypes, df, column, denominator): "mysql": 10, "snowflake": 38, "trino": 18, - "duckdb": None, "sqlite": None, "mssql": None, "oracle": 38, @@ -1478,7 +1461,6 @@ def test_divide_by_zero(backend, alltypes, df, column, denominator): "mysql": 0, "snowflake": 0, "trino": 3, - "duckdb": None, "sqlite": None, "mssql": None, "oracle": 0, @@ -1492,6 +1474,7 @@ def test_divide_by_zero(backend, alltypes, df, column, denominator): "clickhouse", "dask", "datafusion", + "duckdb", "impala", "pandas", "pyspark", diff --git a/ibis/backends/tests/test_register.py b/ibis/backends/tests/test_register.py index 75a121f578d8..712a06c421c6 100644 --- a/ibis/backends/tests/test_register.py +++ b/ibis/backends/tests/test_register.py @@ -379,12 +379,14 @@ def test_register_garbage(con, monkeypatch): monkeypatch.setattr(con, "_load_extensions", lambda x: True) sa = pytest.importorskip("sqlalchemy") + duckdb = pytest.importorskip("duckdb") with pytest.raises( - sa.exc.OperationalError, match="No files found that match the pattern" + (sa.exc.OperationalError, duckdb.IOException), + match="No files found that match the pattern", ): con.read_csv("garbage_notafile") - with pytest.raises(FileNotFoundError): + with pytest.raises((FileNotFoundError, duckdb.IOException)): con.read_parquet("garbage_notafile") diff --git a/ibis/backends/tests/test_sql.py b/ibis/backends/tests/test_sql.py index 21b91984f478..9aaa5a66ec34 100644 --- a/ibis/backends/tests/test_sql.py +++ b/ibis/backends/tests/test_sql.py @@ -95,9 +95,10 @@ def test_group_by_has_index(backend, snapshot): snapshot.assert_match(sql, "out.sql") -@pytest.mark.never( - ["pandas", "dask", "datafusion", "polars", "pyspark"], reason="not SQL" +@pytest.mark.xfail( + raises=exc.IntegrityError, reason="inner join convenience not implemented" ) +@pytest.mark.never(["pandas", "dask", "polars", "pyspark"], reason="not SQL") def test_cte_refs_in_topo_order(backend, snapshot): mr0 = ibis.table(schema=ibis.schema(dict(key="int")), name="leaf") diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index bfc8f1f9579e..0d90d9a6e4ba 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -417,15 +417,7 @@ def uses_java_re(t): id="translate", marks=[ pytest.mark.notimpl( - [ - "clickhouse", - "duckdb", - "mssql", - "mysql", - "polars", - "druid", - "oracle", - ], + ["mssql", "mysql", "polars", "druid", "oracle"], raises=com.OperationNotDefinedError, ), pytest.mark.notyet( @@ -844,6 +836,11 @@ def test_string(backend, alltypes, df, result_func, expected_func): ["mysql", "mssql", "druid", "oracle", "exasol"], raises=com.OperationNotDefinedError, ) +@pytest.mark.broken( + ["duckdb"], + reason="no idea, generated SQL looks very correct but this fails", + raises=AssertionError, +) def test_re_replace_global(con): expr = ibis.literal("aba").re_replace("a", "c") result = con.execute(expr) diff --git a/ibis/backends/tests/test_struct.py b/ibis/backends/tests/test_struct.py index d59459f858c2..3eaf9cae1549 100644 --- a/ibis/backends/tests/test_struct.py +++ b/ibis/backends/tests/test_struct.py @@ -6,6 +6,7 @@ import pandas as pd import pandas.testing as tm import pytest +from pytest import param import ibis import ibis.expr.datatypes as dt @@ -17,20 +18,32 @@ ] -@pytest.mark.notimpl(["dask", "snowflake"]) -@pytest.mark.parametrize("field", ["a", "b", "c"]) -def test_single_field(backend, struct, struct_df, field): +@pytest.mark.notimpl(["dask"]) +@pytest.mark.parametrize( + ("field", "expected"), + [ + param( + "a", + [1.0, 2.0, 3.0, np.nan, 2.0, np.nan, 3.0], + id="a", + marks=pytest.mark.notimpl(["snowflake"]), + ), + param( + "b", ["banana", "apple", "orange", "banana", None, None, "orange"], id="b" + ), + param( + "c", + [2, 3, 4, 2, 3, np.nan, np.nan], + id="c", + marks=pytest.mark.notimpl(["snowflake"]), + ), + ], +) +def test_single_field(struct, field, expected): expr = struct.abc[field] - result = expr.execute().sort_values().reset_index(drop=True) - expected = ( - struct_df.abc.map( - lambda value: value[field] if isinstance(value, dict) else value - ) - .rename(field) - .sort_values() - .reset_index(drop=True) - ) - backend.assert_series_equal(result, expected) + result = expr.execute() + equal_nan = expr.type().is_numeric() + assert np.array_equal(result, expected, equal_nan=equal_nan) @pytest.mark.notimpl(["dask"]) diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 736280c35e07..51f45e1a2278 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -77,34 +77,6 @@ IllegalArgumentException = None -def day_name(obj: pd.core.indexes.accessors.DatetimeProperties | pd.Timestamp) -> str: - """Backwards compatible name-of-day getting function. - - Returns - ------- - str - The name of the day corresponding to `obj` - """ - try: - return obj.day_name() - except AttributeError: - return obj.weekday_name - - -def day_name(obj: pd.core.indexes.accessors.DatetimeProperties | pd.Timestamp) -> str: - """Backwards compatible name-of-day getting function. - - Returns - ------- - str - The name of the day corresponding to `obj` - """ - try: - return obj.day_name() - except AttributeError: - return obj.weekday_name - - @pytest.mark.parametrize("attr", ["year", "month", "day"]) @pytest.mark.parametrize( "expr_fn", @@ -1086,7 +1058,6 @@ def convert_to_offset(x): marks=[ pytest.mark.notimpl( [ - "clickhouse", "dask", "impala", "mysql", @@ -1114,7 +1085,6 @@ def convert_to_offset(x): marks=[ pytest.mark.notimpl( [ - "clickhouse", "sqlite", "postgres", "polars", @@ -1203,10 +1173,10 @@ def convert_to_offset(x): raises=ValidationError, reason="unsupported operand type(s) for -: 'StringColumn' and 'TimestampScalar'", ), - pytest.mark.xfail_version( - duckdb=["duckdb>=0.8.0"], + pytest.mark.broken( + ["duckdb"], raises=AssertionError, - reason="duckdb 0.8.0 returns DateOffset columns", + reason="duckdb returns dateoffsets", ), pytest.mark.broken( ["trino"], @@ -2017,7 +1987,7 @@ def test_day_of_week_column(backend, alltypes, df): backend.assert_series_equal(result_index, expected_index, check_names=False) result_day = expr.full_name().name("tmp").execute() - expected_day = day_name(df.timestamp_col.dt) + expected_day = df.timestamp_col.dt.day_name() backend.assert_series_equal(result_day, expected_day, check_names=False) @@ -2032,7 +2002,7 @@ def test_day_of_week_column(backend, alltypes, df): ), param( lambda t: t.timestamp_col.day_of_week.full_name().length().sum(), - lambda s: day_name(s.dt).str.len().sum(), + lambda s: s.dt.day_name().str.len().sum(), id="day_of_week_full_name", marks=[ pytest.mark.notimpl( @@ -2405,7 +2375,9 @@ def test_extract_time_from_timestamp(con, microsecond): raises=(NotImplementedError, AttributeError), ) @pytest.mark.broken( - ["bigquery"], reason="BigQuery returns DateOffset arrays", raises=AssertionError + ["bigquery", "duckdb"], + reason="BigQuery returns DateOffset arrays", + raises=AssertionError, ) @pytest.mark.xfail_version( datafusion=["datafusion"], @@ -2417,11 +2389,6 @@ def test_extract_time_from_timestamp(con, microsecond): reason="Driver doesn't know how to handle intervals", raises=ClickhouseOperationalError, ) -@pytest.mark.xfail_version( - duckdb=["duckdb>=0.8.0"], - raises=AssertionError, - reason="duckdb 0.8.0 returns DateOffset columns", -) @pytest.mark.notimpl( ["flink"], raises=Py4JJavaError, @@ -2843,7 +2810,7 @@ def test_timestamp_precision_output(con, ts, scale, unit): marks=[ pytest.mark.notimpl( ["clickhouse"], - raises=NotImplementedError, + raises=com.OperationNotDefinedError, reason="time types not yet implemented in ibis for the clickhouse backend", ) ], @@ -2964,8 +2931,8 @@ def test_delta(con, start, end, unit, expected): ) @pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) def test_timestamp_bucket(backend, kws, pd_freq): - ts = backend.functional_alltypes.timestamp_col.name("ts").execute() - res = backend.functional_alltypes.timestamp_col.bucket(**kws).name("ts").execute() + ts = backend.functional_alltypes.timestamp_col.execute().rename("ts") + res = backend.functional_alltypes.timestamp_col.bucket(**kws).execute().rename("ts") sol = ts.dt.floor(pd_freq) backend.assert_series_equal(res, sol) @@ -2999,11 +2966,13 @@ def test_timestamp_bucket(backend, kws, pd_freq): @pytest.mark.parametrize("offset_mins", [2, -2], ids=["pos", "neg"]) @pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) def test_timestamp_bucket_offset(backend, offset_mins): - ts = backend.functional_alltypes.timestamp_col.name("ts") - expr = ts.bucket(minutes=5, offset=ibis.interval(minutes=offset_mins)).name("ts") - res = expr.execute().astype("datetime64[ns]") + ts = backend.functional_alltypes.timestamp_col + expr = ts.bucket(minutes=5, offset=ibis.interval(minutes=offset_mins)) + res = expr.execute().astype("datetime64[ns]").rename("ts") td = pd.Timedelta(minutes=offset_mins) - sol = ((ts.execute() - td).dt.floor("300s") + td).astype("datetime64[ns]") + sol = ((ts.execute().rename("ts") - td).dt.floor("300s") + td).astype( + "datetime64[ns]" + ) backend.assert_series_equal(res, sol) diff --git a/ibis/backends/tests/tpch/conftest.py b/ibis/backends/tests/tpch/conftest.py index 7cb0569e448b..730b4fd612b3 100644 --- a/ibis/backends/tests/tpch/conftest.py +++ b/ibis/backends/tests/tpch/conftest.py @@ -11,6 +11,7 @@ from dateutil.relativedelta import relativedelta import ibis +from ibis.formats.pandas import PandasData if TYPE_CHECKING: import ibis.expr.types as ir @@ -49,29 +50,28 @@ def wrapper(*args, backend, snapshot, **kwargs): raw_sql = sql.sql(dialect="duckdb", pretty=True) - expected_expr = backend.connection.sql( - # in theory this should allow us to use one dialect for every backend - raw_sql, - dialect="duckdb", - ) + expected_expr = backend.connection.sql(raw_sql, dialect="duckdb") result_expr = test(*args, **kwargs) - result = result_expr.execute() + ibis_sql = ibis.to_sql(result_expr, dialect=backend_name) + + assert result_expr._find_backend(use_default=False) is backend.connection + result = backend.connection.execute(result_expr) assert not result.empty expected = expected_expr.execute() - assert not expected.empty - assert list(map(str.lower, expected.columns)) == result.columns.tolist() expected.columns = result.columns + expected = PandasData.convert_table(expected, result_expr.schema()) + assert not expected.empty + assert len(expected) == len(result) backend.assert_frame_equal(result, expected, check_dtype=False) - # only produce sql if the execution passes - result_expr_sql = ibis.to_sql(result_expr, dialect=backend_name) - snapshot.assert_match(result_expr_sql, sql_path_name) + # only write sql if the execution passes + snapshot.assert_match(ibis_sql, sql_path_name) return wrapper diff --git a/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/duckdb/h01.sql b/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/duckdb/h01.sql index b44c9d654e33..953b4dfeefc4 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/duckdb/h01.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/duckdb/h01.sql @@ -1,26 +1,28 @@ SELECT - t0.l_returnflag, - t0.l_linestatus, - t0.sum_qty, - t0.sum_base_price, - t0.sum_disc_price, - t0.sum_charge, - t0.avg_qty, - t0.avg_price, - t0.avg_disc, - t0.count_order + t2.l_returnflag, + t2.l_linestatus, + t2.sum_qty, + t2.sum_base_price, + t2.sum_disc_price, + t2.sum_charge, + t2.avg_qty, + t2.avg_price, + t2.avg_disc, + t2.count_order FROM ( SELECT - t1.l_returnflag AS l_returnflag, - t1.l_linestatus AS l_linestatus, + t1.l_returnflag, + t1.l_linestatus, SUM(t1.l_quantity) AS sum_qty, SUM(t1.l_extendedprice) AS sum_base_price, SUM(t1.l_extendedprice * ( CAST(1 AS TINYINT) - t1.l_discount )) AS sum_disc_price, SUM( - t1.l_extendedprice * ( - CAST(1 AS TINYINT) - t1.l_discount + ( + t1.l_extendedprice * ( + CAST(1 AS TINYINT) - t1.l_discount + ) ) * ( t1.l_tax + CAST(1 AS TINYINT) ) @@ -29,13 +31,32 @@ FROM ( AVG(t1.l_extendedprice) AS avg_price, AVG(t1.l_discount) AS avg_disc, COUNT(*) AS count_order - FROM main.lineitem AS t1 - WHERE - t1.l_shipdate <= MAKE_DATE(1998, 9, 2) + FROM ( + SELECT + t0.l_orderkey, + t0.l_partkey, + t0.l_suppkey, + t0.l_linenumber, + t0.l_quantity, + t0.l_extendedprice, + t0.l_discount, + t0.l_tax, + t0.l_returnflag, + t0.l_linestatus, + t0.l_shipdate, + t0.l_commitdate, + t0.l_receiptdate, + t0.l_shipinstruct, + t0.l_shipmode, + t0.l_comment + FROM lineitem AS t0 + WHERE + t0.l_shipdate <= MAKE_DATE(1998, 9, 2) + ) AS t1 GROUP BY 1, 2 -) AS t0 +) AS t2 ORDER BY - t0.l_returnflag ASC, - t0.l_linestatus ASC \ No newline at end of file + t2.l_returnflag ASC, + t2.l_linestatus ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/snowflake/h01.sql b/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/snowflake/h01.sql index 0bea3f3b2cdf..9f1cf92c38e2 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/snowflake/h01.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/snowflake/h01.sql @@ -1,59 +1,62 @@ -WITH t0 AS ( - SELECT - t2."L_ORDERKEY" AS "l_orderkey", - t2."L_PARTKEY" AS "l_partkey", - t2."L_SUPPKEY" AS "l_suppkey", - t2."L_LINENUMBER" AS "l_linenumber", - t2."L_QUANTITY" AS "l_quantity", - t2."L_EXTENDEDPRICE" AS "l_extendedprice", - t2."L_DISCOUNT" AS "l_discount", - t2."L_TAX" AS "l_tax", - t2."L_RETURNFLAG" AS "l_returnflag", - t2."L_LINESTATUS" AS "l_linestatus", - t2."L_SHIPDATE" AS "l_shipdate", - t2."L_COMMITDATE" AS "l_commitdate", - t2."L_RECEIPTDATE" AS "l_receiptdate", - t2."L_SHIPINSTRUCT" AS "l_shipinstruct", - t2."L_SHIPMODE" AS "l_shipmode", - t2."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t2 -) SELECT - t1."l_returnflag", - t1."l_linestatus", - t1."sum_qty", - t1."sum_base_price", - t1."sum_disc_price", - t1."sum_charge", - t1."avg_qty", - t1."avg_price", - t1."avg_disc", - t1."count_order" + "t2"."l_returnflag" AS "l_returnflag", + "t2"."l_linestatus" AS "l_linestatus", + "t2"."sum_qty" AS "sum_qty", + "t2"."sum_base_price" AS "sum_base_price", + "t2"."sum_disc_price" AS "sum_disc_price", + "t2"."sum_charge" AS "sum_charge", + "t2"."avg_qty" AS "avg_qty", + "t2"."avg_price" AS "avg_price", + "t2"."avg_disc" AS "avg_disc", + "t2"."count_order" AS "count_order" FROM ( SELECT - t0."l_returnflag" AS "l_returnflag", - t0."l_linestatus" AS "l_linestatus", - SUM(t0."l_quantity") AS "sum_qty", - SUM(t0."l_extendedprice") AS "sum_base_price", - SUM(t0."l_extendedprice" * ( - 1 - t0."l_discount" + "t1"."l_returnflag" AS "l_returnflag", + "t1"."l_linestatus" AS "l_linestatus", + SUM("t1"."l_quantity") AS "sum_qty", + SUM("t1"."l_extendedprice") AS "sum_base_price", + SUM("t1"."l_extendedprice" * ( + 1 - "t1"."l_discount" )) AS "sum_disc_price", - SUM(t0."l_extendedprice" * ( - 1 - t0."l_discount" - ) * ( - t0."l_tax" + 1 - )) AS "sum_charge", - AVG(t0."l_quantity") AS "avg_qty", - AVG(t0."l_extendedprice") AS "avg_price", - AVG(t0."l_discount") AS "avg_disc", + SUM( + ( + "t1"."l_extendedprice" * ( + 1 - "t1"."l_discount" + ) + ) * ( + "t1"."l_tax" + 1 + ) + ) AS "sum_charge", + AVG("t1"."l_quantity") AS "avg_qty", + AVG("t1"."l_extendedprice") AS "avg_price", + AVG("t1"."l_discount") AS "avg_disc", COUNT(*) AS "count_order" - FROM t0 - WHERE - t0."l_shipdate" <= DATE_FROM_PARTS(1998, 9, 2) + FROM ( + SELECT + "t0"."L_ORDERKEY" AS "l_orderkey", + "t0"."L_PARTKEY" AS "l_partkey", + "t0"."L_SUPPKEY" AS "l_suppkey", + "t0"."L_LINENUMBER" AS "l_linenumber", + "t0"."L_QUANTITY" AS "l_quantity", + "t0"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t0"."L_DISCOUNT" AS "l_discount", + "t0"."L_TAX" AS "l_tax", + "t0"."L_RETURNFLAG" AS "l_returnflag", + "t0"."L_LINESTATUS" AS "l_linestatus", + "t0"."L_SHIPDATE" AS "l_shipdate", + "t0"."L_COMMITDATE" AS "l_commitdate", + "t0"."L_RECEIPTDATE" AS "l_receiptdate", + "t0"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t0"."L_SHIPMODE" AS "l_shipmode", + "t0"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t0" + WHERE + "t0"."L_SHIPDATE" <= DATEFROMPARTS(1998, 9, 2) + ) AS "t1" GROUP BY 1, 2 -) AS t1 +) AS "t2" ORDER BY - t1."l_returnflag" ASC, - t1."l_linestatus" ASC \ No newline at end of file + "t2"."l_returnflag" ASC, + "t2"."l_linestatus" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/duckdb/h02.sql b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/duckdb/h02.sql index 0742d4a2c8d3..9fa0195c56bb 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/duckdb/h02.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/duckdb/h02.sql @@ -1,84 +1,116 @@ -WITH t0 AS ( - SELECT - t2.p_partkey AS p_partkey, - t2.p_name AS p_name, - t2.p_mfgr AS p_mfgr, - t2.p_brand AS p_brand, - t2.p_type AS p_type, - t2.p_size AS p_size, - t2.p_container AS p_container, - t2.p_retailprice AS p_retailprice, - t2.p_comment AS p_comment, - t3.ps_partkey AS ps_partkey, - t3.ps_suppkey AS ps_suppkey, - t3.ps_availqty AS ps_availqty, - t3.ps_supplycost AS ps_supplycost, - t3.ps_comment AS ps_comment, - t4.s_suppkey AS s_suppkey, - t4.s_name AS s_name, - t4.s_address AS s_address, - t4.s_nationkey AS s_nationkey, - t4.s_phone AS s_phone, - t4.s_acctbal AS s_acctbal, - t4.s_comment AS s_comment, - t5.n_nationkey AS n_nationkey, - t5.n_name AS n_name, - t5.n_regionkey AS n_regionkey, - t5.n_comment AS n_comment, - t6.r_regionkey AS r_regionkey, - t6.r_name AS r_name, - t6.r_comment AS r_comment - FROM main.part AS t2 - JOIN main.partsupp AS t3 - ON t2.p_partkey = t3.ps_partkey - JOIN main.supplier AS t4 - ON t4.s_suppkey = t3.ps_suppkey - JOIN main.nation AS t5 - ON t4.s_nationkey = t5.n_nationkey - JOIN main.region AS t6 - ON t5.n_regionkey = t6.r_regionkey - WHERE - t2.p_size = CAST(15 AS TINYINT) - AND t2.p_type LIKE '%BRASS' - AND t6.r_name = 'EUROPE' - AND t3.ps_supplycost = ( - SELECT - MIN(t3.ps_supplycost) AS "Min(ps_supplycost)" - FROM main.partsupp AS t3 - JOIN main.supplier AS t4 - ON t4.s_suppkey = t3.ps_suppkey - JOIN main.nation AS t5 - ON t4.s_nationkey = t5.n_nationkey - JOIN main.region AS t6 - ON t5.n_regionkey = t6.r_regionkey - WHERE - t6.r_name = 'EUROPE' AND t2.p_partkey = t3.ps_partkey - ) -) SELECT - t1.s_acctbal, - t1.s_name, - t1.n_name, - t1.p_partkey, - t1.p_mfgr, - t1.s_address, - t1.s_phone, - t1.s_comment + t21.s_acctbal, + t21.s_name, + t21.n_name, + t21.p_partkey, + t21.p_mfgr, + t21.s_address, + t21.s_phone, + t21.s_comment FROM ( SELECT - t0.s_acctbal AS s_acctbal, - t0.s_name AS s_name, - t0.n_name AS n_name, - t0.p_partkey AS p_partkey, - t0.p_mfgr AS p_mfgr, - t0.s_address AS s_address, - t0.s_phone AS s_phone, - t0.s_comment AS s_comment - FROM t0 -) AS t1 + t5.p_partkey, + t5.p_name, + t5.p_mfgr, + t5.p_brand, + t5.p_type, + t5.p_size, + t5.p_container, + t5.p_retailprice, + t5.p_comment, + t6.ps_partkey, + t6.ps_suppkey, + t6.ps_availqty, + t6.ps_supplycost, + t6.ps_comment, + t8.s_suppkey, + t8.s_name, + t8.s_address, + t8.s_nationkey, + t8.s_phone, + t8.s_acctbal, + t8.s_comment, + t10.n_nationkey, + t10.n_name, + t10.n_regionkey, + t10.n_comment, + t12.r_regionkey, + t12.r_name, + t12.r_comment + FROM part AS t5 + INNER JOIN partsupp AS t6 + ON t5.p_partkey = t6.ps_partkey + INNER JOIN supplier AS t8 + ON t8.s_suppkey = t6.ps_suppkey + INNER JOIN nation AS t10 + ON t8.s_nationkey = t10.n_nationkey + INNER JOIN region AS t12 + ON t10.n_regionkey = t12.r_regionkey +) AS t21 +WHERE + t21.p_size = CAST(15 AS TINYINT) + AND t21.p_type LIKE '%BRASS' + AND t21.r_name = 'EUROPE' + AND t21.ps_supplycost = ( + SELECT + MIN(t23.ps_supplycost) AS "Min(ps_supplycost)" + FROM ( + SELECT + t22.ps_partkey, + t22.ps_suppkey, + t22.ps_availqty, + t22.ps_supplycost, + t22.ps_comment, + t22.s_suppkey, + t22.s_name, + t22.s_address, + t22.s_nationkey, + t22.s_phone, + t22.s_acctbal, + t22.s_comment, + t22.n_nationkey, + t22.n_name, + t22.n_regionkey, + t22.n_comment, + t22.r_regionkey, + t22.r_name, + t22.r_comment + FROM ( + SELECT + t7.ps_partkey, + t7.ps_suppkey, + t7.ps_availqty, + t7.ps_supplycost, + t7.ps_comment, + t9.s_suppkey, + t9.s_name, + t9.s_address, + t9.s_nationkey, + t9.s_phone, + t9.s_acctbal, + t9.s_comment, + t11.n_nationkey, + t11.n_name, + t11.n_regionkey, + t11.n_comment, + t13.r_regionkey, + t13.r_name, + t13.r_comment + FROM partsupp AS t7 + INNER JOIN supplier AS t9 + ON t9.s_suppkey = t7.ps_suppkey + INNER JOIN nation AS t11 + ON t9.s_nationkey = t11.n_nationkey + INNER JOIN region AS t13 + ON t11.n_regionkey = t13.r_regionkey + ) AS t22 + WHERE + t22.r_name = 'EUROPE' AND t21.p_partkey = t22.ps_partkey + ) AS t23 + ) ORDER BY - t1.s_acctbal DESC, - t1.n_name ASC, - t1.s_name ASC, - t1.p_partkey ASC + t21.s_acctbal DESC, + t21.n_name ASC, + t21.s_name ASC, + t21.p_partkey ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/snowflake/h02.sql b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/snowflake/h02.sql index 1fe27156cd12..01cfa3a33d2e 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/snowflake/h02.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/snowflake/h02.sql @@ -1,127 +1,190 @@ -WITH t1 AS ( - SELECT - t7."P_PARTKEY" AS "p_partkey", - t7."P_NAME" AS "p_name", - t7."P_MFGR" AS "p_mfgr", - t7."P_BRAND" AS "p_brand", - t7."P_TYPE" AS "p_type", - t7."P_SIZE" AS "p_size", - t7."P_CONTAINER" AS "p_container", - t7."P_RETAILPRICE" AS "p_retailprice", - t7."P_COMMENT" AS "p_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS t7 -), t0 AS ( - SELECT - t7."PS_PARTKEY" AS "ps_partkey", - t7."PS_SUPPKEY" AS "ps_suppkey", - t7."PS_AVAILQTY" AS "ps_availqty", - t7."PS_SUPPLYCOST" AS "ps_supplycost", - t7."PS_COMMENT" AS "ps_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PARTSUPP" AS t7 -), t2 AS ( - SELECT - t7."S_SUPPKEY" AS "s_suppkey", - t7."S_NAME" AS "s_name", - t7."S_ADDRESS" AS "s_address", - t7."S_NATIONKEY" AS "s_nationkey", - t7."S_PHONE" AS "s_phone", - t7."S_ACCTBAL" AS "s_acctbal", - t7."S_COMMENT" AS "s_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS t7 -), t3 AS ( - SELECT - t7."N_NATIONKEY" AS "n_nationkey", - t7."N_NAME" AS "n_name", - t7."N_REGIONKEY" AS "n_regionkey", - t7."N_COMMENT" AS "n_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS t7 -), t4 AS ( - SELECT - t7."R_REGIONKEY" AS "r_regionkey", - t7."R_NAME" AS "r_name", - t7."R_COMMENT" AS "r_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."REGION" AS t7 -), t5 AS ( - SELECT - t1."p_partkey" AS "p_partkey", - t1."p_name" AS "p_name", - t1."p_mfgr" AS "p_mfgr", - t1."p_brand" AS "p_brand", - t1."p_type" AS "p_type", - t1."p_size" AS "p_size", - t1."p_container" AS "p_container", - t1."p_retailprice" AS "p_retailprice", - t1."p_comment" AS "p_comment", - t0."ps_partkey" AS "ps_partkey", - t0."ps_suppkey" AS "ps_suppkey", - t0."ps_availqty" AS "ps_availqty", - t0."ps_supplycost" AS "ps_supplycost", - t0."ps_comment" AS "ps_comment", - t2."s_suppkey" AS "s_suppkey", - t2."s_name" AS "s_name", - t2."s_address" AS "s_address", - t2."s_nationkey" AS "s_nationkey", - t2."s_phone" AS "s_phone", - t2."s_acctbal" AS "s_acctbal", - t2."s_comment" AS "s_comment", - t3."n_nationkey" AS "n_nationkey", - t3."n_name" AS "n_name", - t3."n_regionkey" AS "n_regionkey", - t3."n_comment" AS "n_comment", - t4."r_regionkey" AS "r_regionkey", - t4."r_name" AS "r_name", - t4."r_comment" AS "r_comment" - FROM t1 - JOIN t0 - ON t1."p_partkey" = t0."ps_partkey" - JOIN t2 - ON t2."s_suppkey" = t0."ps_suppkey" - JOIN t3 - ON t2."s_nationkey" = t3."n_nationkey" - JOIN t4 - ON t3."n_regionkey" = t4."r_regionkey" - WHERE - t1."p_size" = 15 - AND t1."p_type" LIKE '%BRASS' - AND t4."r_name" = 'EUROPE' - AND t0."ps_supplycost" = ( - SELECT - MIN(t0."ps_supplycost") AS "Min(ps_supplycost)" - FROM t0 - JOIN t2 - ON t2."s_suppkey" = t0."ps_suppkey" - JOIN t3 - ON t2."s_nationkey" = t3."n_nationkey" - JOIN t4 - ON t3."n_regionkey" = t4."r_regionkey" - WHERE - t4."r_name" = 'EUROPE' AND t1."p_partkey" = t0."ps_partkey" - ) -) SELECT - t6."s_acctbal", - t6."s_name", - t6."n_name", - t6."p_partkey", - t6."p_mfgr", - t6."s_address", - t6."s_phone", - t6."s_comment" + "t24"."s_acctbal" AS "s_acctbal", + "t24"."s_name" AS "s_name", + "t24"."n_name" AS "n_name", + "t24"."p_partkey" AS "p_partkey", + "t24"."p_mfgr" AS "p_mfgr", + "t24"."s_address" AS "s_address", + "t24"."s_phone" AS "s_phone", + "t24"."s_comment" AS "s_comment" FROM ( SELECT - t5."s_acctbal" AS "s_acctbal", - t5."s_name" AS "s_name", - t5."n_name" AS "n_name", - t5."p_partkey" AS "p_partkey", - t5."p_mfgr" AS "p_mfgr", - t5."s_address" AS "s_address", - t5."s_phone" AS "s_phone", - t5."s_comment" AS "s_comment" - FROM t5 -) AS t6 + "t5"."p_partkey" AS "p_partkey", + "t5"."p_name" AS "p_name", + "t5"."p_mfgr" AS "p_mfgr", + "t5"."p_brand" AS "p_brand", + "t5"."p_type" AS "p_type", + "t5"."p_size" AS "p_size", + "t5"."p_container" AS "p_container", + "t5"."p_retailprice" AS "p_retailprice", + "t5"."p_comment" AS "p_comment", + "t10"."ps_partkey" AS "ps_partkey", + "t10"."ps_suppkey" AS "ps_suppkey", + "t10"."ps_availqty" AS "ps_availqty", + "t10"."ps_supplycost" AS "ps_supplycost", + "t10"."ps_comment" AS "ps_comment", + "t11"."s_suppkey" AS "s_suppkey", + "t11"."s_name" AS "s_name", + "t11"."s_address" AS "s_address", + "t11"."s_nationkey" AS "s_nationkey", + "t11"."s_phone" AS "s_phone", + "t11"."s_acctbal" AS "s_acctbal", + "t11"."s_comment" AS "s_comment", + "t13"."n_nationkey" AS "n_nationkey", + "t13"."n_name" AS "n_name", + "t13"."n_regionkey" AS "n_regionkey", + "t13"."n_comment" AS "n_comment", + "t15"."r_regionkey" AS "r_regionkey", + "t15"."r_name" AS "r_name", + "t15"."r_comment" AS "r_comment" + FROM ( + SELECT + "t0"."P_PARTKEY" AS "p_partkey", + "t0"."P_NAME" AS "p_name", + "t0"."P_MFGR" AS "p_mfgr", + "t0"."P_BRAND" AS "p_brand", + "t0"."P_TYPE" AS "p_type", + "t0"."P_SIZE" AS "p_size", + "t0"."P_CONTAINER" AS "p_container", + "t0"."P_RETAILPRICE" AS "p_retailprice", + "t0"."P_COMMENT" AS "p_comment" + FROM "PART" AS "t0" + ) AS "t5" + INNER JOIN ( + SELECT + "t1"."PS_PARTKEY" AS "ps_partkey", + "t1"."PS_SUPPKEY" AS "ps_suppkey", + "t1"."PS_AVAILQTY" AS "ps_availqty", + "t1"."PS_SUPPLYCOST" AS "ps_supplycost", + "t1"."PS_COMMENT" AS "ps_comment" + FROM "PARTSUPP" AS "t1" + ) AS "t10" + ON "t5"."p_partkey" = "t10"."ps_partkey" + INNER JOIN ( + SELECT + "t2"."S_SUPPKEY" AS "s_suppkey", + "t2"."S_NAME" AS "s_name", + "t2"."S_ADDRESS" AS "s_address", + "t2"."S_NATIONKEY" AS "s_nationkey", + "t2"."S_PHONE" AS "s_phone", + "t2"."S_ACCTBAL" AS "s_acctbal", + "t2"."S_COMMENT" AS "s_comment" + FROM "SUPPLIER" AS "t2" + ) AS "t11" + ON "t11"."s_suppkey" = "t10"."ps_suppkey" + INNER JOIN ( + SELECT + "t3"."N_NATIONKEY" AS "n_nationkey", + "t3"."N_NAME" AS "n_name", + "t3"."N_REGIONKEY" AS "n_regionkey", + "t3"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t3" + ) AS "t13" + ON "t11"."s_nationkey" = "t13"."n_nationkey" + INNER JOIN ( + SELECT + "t4"."R_REGIONKEY" AS "r_regionkey", + "t4"."R_NAME" AS "r_name", + "t4"."R_COMMENT" AS "r_comment" + FROM "REGION" AS "t4" + ) AS "t15" + ON "t13"."n_regionkey" = "t15"."r_regionkey" +) AS "t24" +WHERE + "t24"."p_size" = 15 + AND "t24"."p_type" LIKE '%BRASS' + AND "t24"."r_name" = 'EUROPE' + AND "t24"."ps_supplycost" = ( + SELECT + MIN("t26"."ps_supplycost") AS "Min(ps_supplycost)" + FROM ( + SELECT + "t25"."ps_partkey" AS "ps_partkey", + "t25"."ps_suppkey" AS "ps_suppkey", + "t25"."ps_availqty" AS "ps_availqty", + "t25"."ps_supplycost" AS "ps_supplycost", + "t25"."ps_comment" AS "ps_comment", + "t25"."s_suppkey" AS "s_suppkey", + "t25"."s_name" AS "s_name", + "t25"."s_address" AS "s_address", + "t25"."s_nationkey" AS "s_nationkey", + "t25"."s_phone" AS "s_phone", + "t25"."s_acctbal" AS "s_acctbal", + "t25"."s_comment" AS "s_comment", + "t25"."n_nationkey" AS "n_nationkey", + "t25"."n_name" AS "n_name", + "t25"."n_regionkey" AS "n_regionkey", + "t25"."n_comment" AS "n_comment", + "t25"."r_regionkey" AS "r_regionkey", + "t25"."r_name" AS "r_name", + "t25"."r_comment" AS "r_comment" + FROM ( + SELECT + "t6"."ps_partkey" AS "ps_partkey", + "t6"."ps_suppkey" AS "ps_suppkey", + "t6"."ps_availqty" AS "ps_availqty", + "t6"."ps_supplycost" AS "ps_supplycost", + "t6"."ps_comment" AS "ps_comment", + "t12"."s_suppkey" AS "s_suppkey", + "t12"."s_name" AS "s_name", + "t12"."s_address" AS "s_address", + "t12"."s_nationkey" AS "s_nationkey", + "t12"."s_phone" AS "s_phone", + "t12"."s_acctbal" AS "s_acctbal", + "t12"."s_comment" AS "s_comment", + "t14"."n_nationkey" AS "n_nationkey", + "t14"."n_name" AS "n_name", + "t14"."n_regionkey" AS "n_regionkey", + "t14"."n_comment" AS "n_comment", + "t16"."r_regionkey" AS "r_regionkey", + "t16"."r_name" AS "r_name", + "t16"."r_comment" AS "r_comment" + FROM ( + SELECT + "t1"."PS_PARTKEY" AS "ps_partkey", + "t1"."PS_SUPPKEY" AS "ps_suppkey", + "t1"."PS_AVAILQTY" AS "ps_availqty", + "t1"."PS_SUPPLYCOST" AS "ps_supplycost", + "t1"."PS_COMMENT" AS "ps_comment" + FROM "PARTSUPP" AS "t1" + ) AS "t6" + INNER JOIN ( + SELECT + "t2"."S_SUPPKEY" AS "s_suppkey", + "t2"."S_NAME" AS "s_name", + "t2"."S_ADDRESS" AS "s_address", + "t2"."S_NATIONKEY" AS "s_nationkey", + "t2"."S_PHONE" AS "s_phone", + "t2"."S_ACCTBAL" AS "s_acctbal", + "t2"."S_COMMENT" AS "s_comment" + FROM "SUPPLIER" AS "t2" + ) AS "t12" + ON "t12"."s_suppkey" = "t6"."ps_suppkey" + INNER JOIN ( + SELECT + "t3"."N_NATIONKEY" AS "n_nationkey", + "t3"."N_NAME" AS "n_name", + "t3"."N_REGIONKEY" AS "n_regionkey", + "t3"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t3" + ) AS "t14" + ON "t12"."s_nationkey" = "t14"."n_nationkey" + INNER JOIN ( + SELECT + "t4"."R_REGIONKEY" AS "r_regionkey", + "t4"."R_NAME" AS "r_name", + "t4"."R_COMMENT" AS "r_comment" + FROM "REGION" AS "t4" + ) AS "t16" + ON "t14"."n_regionkey" = "t16"."r_regionkey" + ) AS "t25" + WHERE + "t25"."r_name" = 'EUROPE' AND "t24"."p_partkey" = "t25"."ps_partkey" + ) AS "t26" + ) ORDER BY - t6."s_acctbal" DESC, - t6."n_name" ASC, - t6."s_name" ASC, - t6."p_partkey" ASC + "t24"."s_acctbal" DESC NULLS LAST, + "t24"."n_name" ASC, + "t24"."s_name" ASC, + "t24"."p_partkey" ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/duckdb/h03.sql b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/duckdb/h03.sql index 105609c556ca..adb97afaf7f1 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/duckdb/h03.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/duckdb/h03.sql @@ -1,39 +1,103 @@ -WITH t0 AS ( +SELECT + t10.l_orderkey, + t10.revenue, + t10.o_orderdate, + t10.o_shippriority +FROM ( SELECT - t4.l_orderkey AS l_orderkey, - t3.o_orderdate AS o_orderdate, - t3.o_shippriority AS o_shippriority, - SUM(t4.l_extendedprice * ( - CAST(1 AS TINYINT) - t4.l_discount + t9.l_orderkey, + t9.o_orderdate, + t9.o_shippriority, + SUM(t9.l_extendedprice * ( + CAST(1 AS TINYINT) - t9.l_discount )) AS revenue - FROM main.customer AS t2 - JOIN main.orders AS t3 - ON t2.c_custkey = t3.o_custkey - JOIN main.lineitem AS t4 - ON t4.l_orderkey = t3.o_orderkey - WHERE - t2.c_mktsegment = 'BUILDING' - AND t3.o_orderdate < MAKE_DATE(1995, 3, 15) - AND t4.l_shipdate > MAKE_DATE(1995, 3, 15) + FROM ( + SELECT + t8.c_custkey, + t8.c_name, + t8.c_address, + t8.c_nationkey, + t8.c_phone, + t8.c_acctbal, + t8.c_mktsegment, + t8.c_comment, + t8.o_orderkey, + t8.o_custkey, + t8.o_orderstatus, + t8.o_totalprice, + t8.o_orderdate, + t8.o_orderpriority, + t8.o_clerk, + t8.o_shippriority, + t8.o_comment, + t8.l_orderkey, + t8.l_partkey, + t8.l_suppkey, + t8.l_linenumber, + t8.l_quantity, + t8.l_extendedprice, + t8.l_discount, + t8.l_tax, + t8.l_returnflag, + t8.l_linestatus, + t8.l_shipdate, + t8.l_commitdate, + t8.l_receiptdate, + t8.l_shipinstruct, + t8.l_shipmode, + t8.l_comment + FROM ( + SELECT + t3.c_custkey, + t3.c_name, + t3.c_address, + t3.c_nationkey, + t3.c_phone, + t3.c_acctbal, + t3.c_mktsegment, + t3.c_comment, + t4.o_orderkey, + t4.o_custkey, + t4.o_orderstatus, + t4.o_totalprice, + t4.o_orderdate, + t4.o_orderpriority, + t4.o_clerk, + t4.o_shippriority, + t4.o_comment, + t5.l_orderkey, + t5.l_partkey, + t5.l_suppkey, + t5.l_linenumber, + t5.l_quantity, + t5.l_extendedprice, + t5.l_discount, + t5.l_tax, + t5.l_returnflag, + t5.l_linestatus, + t5.l_shipdate, + t5.l_commitdate, + t5.l_receiptdate, + t5.l_shipinstruct, + t5.l_shipmode, + t5.l_comment + FROM customer AS t3 + INNER JOIN orders AS t4 + ON t3.c_custkey = t4.o_custkey + INNER JOIN lineitem AS t5 + ON t5.l_orderkey = t4.o_orderkey + ) AS t8 + WHERE + t8.c_mktsegment = 'BUILDING' + AND t8.o_orderdate < MAKE_DATE(1995, 3, 15) + AND t8.l_shipdate > MAKE_DATE(1995, 3, 15) + ) AS t9 GROUP BY 1, 2, 3 -) -SELECT - t1.l_orderkey, - t1.revenue, - t1.o_orderdate, - t1.o_shippriority -FROM ( - SELECT - t0.l_orderkey AS l_orderkey, - t0.revenue AS revenue, - t0.o_orderdate AS o_orderdate, - t0.o_shippriority AS o_shippriority - FROM t0 -) AS t1 +) AS t10 ORDER BY - t1.revenue DESC, - t1.o_orderdate ASC + t10.revenue DESC, + t10.o_orderdate ASC LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/snowflake/h03.sql b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/snowflake/h03.sql index 3c5c4819ed7e..13a8f7da2bd2 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/snowflake/h03.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/snowflake/h03.sql @@ -1,81 +1,145 @@ -WITH t1 AS ( - SELECT - t5."C_CUSTKEY" AS "c_custkey", - t5."C_NAME" AS "c_name", - t5."C_ADDRESS" AS "c_address", - t5."C_NATIONKEY" AS "c_nationkey", - t5."C_PHONE" AS "c_phone", - t5."C_ACCTBAL" AS "c_acctbal", - t5."C_MKTSEGMENT" AS "c_mktsegment", - t5."C_COMMENT" AS "c_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" AS t5 -), t0 AS ( - SELECT - t5."O_ORDERKEY" AS "o_orderkey", - t5."O_CUSTKEY" AS "o_custkey", - t5."O_ORDERSTATUS" AS "o_orderstatus", - t5."O_TOTALPRICE" AS "o_totalprice", - t5."O_ORDERDATE" AS "o_orderdate", - t5."O_ORDERPRIORITY" AS "o_orderpriority", - t5."O_CLERK" AS "o_clerk", - t5."O_SHIPPRIORITY" AS "o_shippriority", - t5."O_COMMENT" AS "o_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS t5 -), t2 AS ( - SELECT - t5."L_ORDERKEY" AS "l_orderkey", - t5."L_PARTKEY" AS "l_partkey", - t5."L_SUPPKEY" AS "l_suppkey", - t5."L_LINENUMBER" AS "l_linenumber", - t5."L_QUANTITY" AS "l_quantity", - t5."L_EXTENDEDPRICE" AS "l_extendedprice", - t5."L_DISCOUNT" AS "l_discount", - t5."L_TAX" AS "l_tax", - t5."L_RETURNFLAG" AS "l_returnflag", - t5."L_LINESTATUS" AS "l_linestatus", - t5."L_SHIPDATE" AS "l_shipdate", - t5."L_COMMITDATE" AS "l_commitdate", - t5."L_RECEIPTDATE" AS "l_receiptdate", - t5."L_SHIPINSTRUCT" AS "l_shipinstruct", - t5."L_SHIPMODE" AS "l_shipmode", - t5."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t5 -), t3 AS ( +SELECT + "t12"."l_orderkey" AS "l_orderkey", + "t12"."revenue" AS "revenue", + "t12"."o_orderdate" AS "o_orderdate", + "t12"."o_shippriority" AS "o_shippriority" +FROM ( SELECT - t2."l_orderkey" AS "l_orderkey", - t0."o_orderdate" AS "o_orderdate", - t0."o_shippriority" AS "o_shippriority", - SUM(t2."l_extendedprice" * ( - 1 - t2."l_discount" + "t11"."l_orderkey" AS "l_orderkey", + "t11"."o_orderdate" AS "o_orderdate", + "t11"."o_shippriority" AS "o_shippriority", + SUM("t11"."l_extendedprice" * ( + 1 - "t11"."l_discount" )) AS "revenue" - FROM t1 - JOIN t0 - ON t1."c_custkey" = t0."o_custkey" - JOIN t2 - ON t2."l_orderkey" = t0."o_orderkey" - WHERE - t1."c_mktsegment" = 'BUILDING' - AND t0."o_orderdate" < DATE_FROM_PARTS(1995, 3, 15) - AND t2."l_shipdate" > DATE_FROM_PARTS(1995, 3, 15) + FROM ( + SELECT + "t10"."c_custkey" AS "c_custkey", + "t10"."c_name" AS "c_name", + "t10"."c_address" AS "c_address", + "t10"."c_nationkey" AS "c_nationkey", + "t10"."c_phone" AS "c_phone", + "t10"."c_acctbal" AS "c_acctbal", + "t10"."c_mktsegment" AS "c_mktsegment", + "t10"."c_comment" AS "c_comment", + "t10"."o_orderkey" AS "o_orderkey", + "t10"."o_custkey" AS "o_custkey", + "t10"."o_orderstatus" AS "o_orderstatus", + "t10"."o_totalprice" AS "o_totalprice", + "t10"."o_orderdate" AS "o_orderdate", + "t10"."o_orderpriority" AS "o_orderpriority", + "t10"."o_clerk" AS "o_clerk", + "t10"."o_shippriority" AS "o_shippriority", + "t10"."o_comment" AS "o_comment", + "t10"."l_orderkey" AS "l_orderkey", + "t10"."l_partkey" AS "l_partkey", + "t10"."l_suppkey" AS "l_suppkey", + "t10"."l_linenumber" AS "l_linenumber", + "t10"."l_quantity" AS "l_quantity", + "t10"."l_extendedprice" AS "l_extendedprice", + "t10"."l_discount" AS "l_discount", + "t10"."l_tax" AS "l_tax", + "t10"."l_returnflag" AS "l_returnflag", + "t10"."l_linestatus" AS "l_linestatus", + "t10"."l_shipdate" AS "l_shipdate", + "t10"."l_commitdate" AS "l_commitdate", + "t10"."l_receiptdate" AS "l_receiptdate", + "t10"."l_shipinstruct" AS "l_shipinstruct", + "t10"."l_shipmode" AS "l_shipmode", + "t10"."l_comment" AS "l_comment" + FROM ( + SELECT + "t3"."c_custkey" AS "c_custkey", + "t3"."c_name" AS "c_name", + "t3"."c_address" AS "c_address", + "t3"."c_nationkey" AS "c_nationkey", + "t3"."c_phone" AS "c_phone", + "t3"."c_acctbal" AS "c_acctbal", + "t3"."c_mktsegment" AS "c_mktsegment", + "t3"."c_comment" AS "c_comment", + "t6"."o_orderkey" AS "o_orderkey", + "t6"."o_custkey" AS "o_custkey", + "t6"."o_orderstatus" AS "o_orderstatus", + "t6"."o_totalprice" AS "o_totalprice", + "t6"."o_orderdate" AS "o_orderdate", + "t6"."o_orderpriority" AS "o_orderpriority", + "t6"."o_clerk" AS "o_clerk", + "t6"."o_shippriority" AS "o_shippriority", + "t6"."o_comment" AS "o_comment", + "t7"."l_orderkey" AS "l_orderkey", + "t7"."l_partkey" AS "l_partkey", + "t7"."l_suppkey" AS "l_suppkey", + "t7"."l_linenumber" AS "l_linenumber", + "t7"."l_quantity" AS "l_quantity", + "t7"."l_extendedprice" AS "l_extendedprice", + "t7"."l_discount" AS "l_discount", + "t7"."l_tax" AS "l_tax", + "t7"."l_returnflag" AS "l_returnflag", + "t7"."l_linestatus" AS "l_linestatus", + "t7"."l_shipdate" AS "l_shipdate", + "t7"."l_commitdate" AS "l_commitdate", + "t7"."l_receiptdate" AS "l_receiptdate", + "t7"."l_shipinstruct" AS "l_shipinstruct", + "t7"."l_shipmode" AS "l_shipmode", + "t7"."l_comment" AS "l_comment" + FROM ( + SELECT + "t0"."C_CUSTKEY" AS "c_custkey", + "t0"."C_NAME" AS "c_name", + "t0"."C_ADDRESS" AS "c_address", + "t0"."C_NATIONKEY" AS "c_nationkey", + "t0"."C_PHONE" AS "c_phone", + "t0"."C_ACCTBAL" AS "c_acctbal", + "t0"."C_MKTSEGMENT" AS "c_mktsegment", + "t0"."C_COMMENT" AS "c_comment" + FROM "CUSTOMER" AS "t0" + ) AS "t3" + INNER JOIN ( + SELECT + "t1"."O_ORDERKEY" AS "o_orderkey", + "t1"."O_CUSTKEY" AS "o_custkey", + "t1"."O_ORDERSTATUS" AS "o_orderstatus", + "t1"."O_TOTALPRICE" AS "o_totalprice", + "t1"."O_ORDERDATE" AS "o_orderdate", + "t1"."O_ORDERPRIORITY" AS "o_orderpriority", + "t1"."O_CLERK" AS "o_clerk", + "t1"."O_SHIPPRIORITY" AS "o_shippriority", + "t1"."O_COMMENT" AS "o_comment" + FROM "ORDERS" AS "t1" + ) AS "t6" + ON "t3"."c_custkey" = "t6"."o_custkey" + INNER JOIN ( + SELECT + "t2"."L_ORDERKEY" AS "l_orderkey", + "t2"."L_PARTKEY" AS "l_partkey", + "t2"."L_SUPPKEY" AS "l_suppkey", + "t2"."L_LINENUMBER" AS "l_linenumber", + "t2"."L_QUANTITY" AS "l_quantity", + "t2"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t2"."L_DISCOUNT" AS "l_discount", + "t2"."L_TAX" AS "l_tax", + "t2"."L_RETURNFLAG" AS "l_returnflag", + "t2"."L_LINESTATUS" AS "l_linestatus", + "t2"."L_SHIPDATE" AS "l_shipdate", + "t2"."L_COMMITDATE" AS "l_commitdate", + "t2"."L_RECEIPTDATE" AS "l_receiptdate", + "t2"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t2"."L_SHIPMODE" AS "l_shipmode", + "t2"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t2" + ) AS "t7" + ON "t7"."l_orderkey" = "t6"."o_orderkey" + ) AS "t10" + WHERE + "t10"."c_mktsegment" = 'BUILDING' + AND "t10"."o_orderdate" < DATEFROMPARTS(1995, 3, 15) + AND "t10"."l_shipdate" > DATEFROMPARTS(1995, 3, 15) + ) AS "t11" GROUP BY 1, 2, 3 -) -SELECT - t4."l_orderkey", - t4."revenue", - t4."o_orderdate", - t4."o_shippriority" -FROM ( - SELECT - t3."l_orderkey" AS "l_orderkey", - t3."revenue" AS "revenue", - t3."o_orderdate" AS "o_orderdate", - t3."o_shippriority" AS "o_shippriority" - FROM t3 -) AS t4 +) AS "t12" ORDER BY - t4."revenue" DESC, - t4."o_orderdate" ASC + "t12"."revenue" DESC NULLS LAST, + "t12"."o_orderdate" ASC LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/duckdb/h04.sql b/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/duckdb/h04.sql index b4ef1e6dabfc..77ba19f9cc07 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/duckdb/h04.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/duckdb/h04.sql @@ -1,20 +1,39 @@ SELECT - t0.o_orderpriority, - COUNT(*) AS order_count -FROM main.orders AS t0 -WHERE - ( - EXISTS( - SELECT - CAST(1 AS TINYINT) AS anon_1 - FROM main.lineitem AS t1 - WHERE - t1.l_orderkey = t0.o_orderkey AND t1.l_commitdate < t1.l_receiptdate - ) - ) - AND t0.o_orderdate >= MAKE_DATE(1993, 7, 1) - AND t0.o_orderdate < MAKE_DATE(1993, 10, 1) -GROUP BY - 1 + t4.o_orderpriority, + t4.order_count +FROM ( + SELECT + t3.o_orderpriority, + COUNT(*) AS order_count + FROM ( + SELECT + t0.o_orderkey, + t0.o_custkey, + t0.o_orderstatus, + t0.o_totalprice, + t0.o_orderdate, + t0.o_orderpriority, + t0.o_clerk, + t0.o_shippriority, + t0.o_comment + FROM orders AS t0 + WHERE + EXISTS( + SELECT + CAST(1 AS TINYINT) AS "1" + FROM lineitem AS t1 + WHERE + ( + t1.l_orderkey = t0.o_orderkey + ) AND ( + t1.l_commitdate < t1.l_receiptdate + ) + ) + AND t0.o_orderdate >= MAKE_DATE(1993, 7, 1) + AND t0.o_orderdate < MAKE_DATE(1993, 10, 1) + ) AS t3 + GROUP BY + 1 +) AS t4 ORDER BY - t0.o_orderpriority ASC \ No newline at end of file + t4.o_orderpriority ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/snowflake/h04.sql b/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/snowflake/h04.sql index 756fa1049150..67291d6a3632 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/snowflake/h04.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/snowflake/h04.sql @@ -1,52 +1,42 @@ -WITH t1 AS ( - SELECT - t2."O_ORDERKEY" AS "o_orderkey", - t2."O_CUSTKEY" AS "o_custkey", - t2."O_ORDERSTATUS" AS "o_orderstatus", - t2."O_TOTALPRICE" AS "o_totalprice", - t2."O_ORDERDATE" AS "o_orderdate", - t2."O_ORDERPRIORITY" AS "o_orderpriority", - t2."O_CLERK" AS "o_clerk", - t2."O_SHIPPRIORITY" AS "o_shippriority", - t2."O_COMMENT" AS "o_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS t2 -), t0 AS ( - SELECT - t2."L_ORDERKEY" AS "l_orderkey", - t2."L_PARTKEY" AS "l_partkey", - t2."L_SUPPKEY" AS "l_suppkey", - t2."L_LINENUMBER" AS "l_linenumber", - t2."L_QUANTITY" AS "l_quantity", - t2."L_EXTENDEDPRICE" AS "l_extendedprice", - t2."L_DISCOUNT" AS "l_discount", - t2."L_TAX" AS "l_tax", - t2."L_RETURNFLAG" AS "l_returnflag", - t2."L_LINESTATUS" AS "l_linestatus", - t2."L_SHIPDATE" AS "l_shipdate", - t2."L_COMMITDATE" AS "l_commitdate", - t2."L_RECEIPTDATE" AS "l_receiptdate", - t2."L_SHIPINSTRUCT" AS "l_shipinstruct", - t2."L_SHIPMODE" AS "l_shipmode", - t2."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t2 -) SELECT - t1."o_orderpriority", - COUNT(*) AS "order_count" -FROM t1 -WHERE - ( - EXISTS( - SELECT - 1 AS anon_1 - FROM t0 - WHERE - t0."l_orderkey" = t1."o_orderkey" AND t0."l_commitdate" < t0."l_receiptdate" - ) - ) - AND t1."o_orderdate" >= DATE_FROM_PARTS(1993, 7, 1) - AND t1."o_orderdate" < DATE_FROM_PARTS(1993, 10, 1) -GROUP BY - 1 + "t4"."o_orderpriority" AS "o_orderpriority", + "t4"."order_count" AS "order_count" +FROM ( + SELECT + "t3"."o_orderpriority" AS "o_orderpriority", + COUNT(*) AS "order_count" + FROM ( + SELECT + "t0"."O_ORDERKEY" AS "o_orderkey", + "t0"."O_CUSTKEY" AS "o_custkey", + "t0"."O_ORDERSTATUS" AS "o_orderstatus", + "t0"."O_TOTALPRICE" AS "o_totalprice", + "t0"."O_ORDERDATE" AS "o_orderdate", + "t0"."O_ORDERPRIORITY" AS "o_orderpriority", + "t0"."O_CLERK" AS "o_clerk", + "t0"."O_SHIPPRIORITY" AS "o_shippriority", + "t0"."O_COMMENT" AS "o_comment" + FROM "ORDERS" AS "t0" + WHERE + EXISTS( + ( + SELECT + 1 AS "1" + FROM "LINEITEM" AS "t1" + WHERE + ( + "t1"."L_ORDERKEY" = "t0"."O_ORDERKEY" + ) + AND ( + "t1"."L_COMMITDATE" < "t1"."L_RECEIPTDATE" + ) + ) + ) + AND "t0"."O_ORDERDATE" >= DATEFROMPARTS(1993, 7, 1) + AND "t0"."O_ORDERDATE" < DATEFROMPARTS(1993, 10, 1) + ) AS "t3" + GROUP BY + 1 +) AS "t4" ORDER BY - t1."o_orderpriority" ASC \ No newline at end of file + "t4"."o_orderpriority" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/duckdb/h05.sql b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/duckdb/h05.sql index 90574ad58db1..ae3bbac7941f 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/duckdb/h05.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/duckdb/h05.sql @@ -1,29 +1,129 @@ SELECT - t0.n_name, - t0.revenue + t19.n_name, + t19.revenue FROM ( SELECT - t5.n_name AS n_name, - SUM(t3.l_extendedprice * ( - CAST(1 AS TINYINT) - t3.l_discount + t18.n_name, + SUM(t18.l_extendedprice * ( + CAST(1 AS TINYINT) - t18.l_discount )) AS revenue - FROM main.customer AS t1 - JOIN main.orders AS t2 - ON t1.c_custkey = t2.o_custkey - JOIN main.lineitem AS t3 - ON t3.l_orderkey = t2.o_orderkey - JOIN main.supplier AS t4 - ON t3.l_suppkey = t4.s_suppkey - JOIN main.nation AS t5 - ON t1.c_nationkey = t4.s_nationkey AND t4.s_nationkey = t5.n_nationkey - JOIN main.region AS t6 - ON t5.n_regionkey = t6.r_regionkey - WHERE - t6.r_name = 'ASIA' - AND t2.o_orderdate >= MAKE_DATE(1994, 1, 1) - AND t2.o_orderdate < MAKE_DATE(1995, 1, 1) + FROM ( + SELECT + t17.c_custkey, + t17.c_name, + t17.c_address, + t17.c_nationkey, + t17.c_phone, + t17.c_acctbal, + t17.c_mktsegment, + t17.c_comment, + t17.o_orderkey, + t17.o_custkey, + t17.o_orderstatus, + t17.o_totalprice, + t17.o_orderdate, + t17.o_orderpriority, + t17.o_clerk, + t17.o_shippriority, + t17.o_comment, + t17.l_orderkey, + t17.l_partkey, + t17.l_suppkey, + t17.l_linenumber, + t17.l_quantity, + t17.l_extendedprice, + t17.l_discount, + t17.l_tax, + t17.l_returnflag, + t17.l_linestatus, + t17.l_shipdate, + t17.l_commitdate, + t17.l_receiptdate, + t17.l_shipinstruct, + t17.l_shipmode, + t17.l_comment, + t17.s_suppkey, + t17.s_name, + t17.s_address, + t17.s_nationkey, + t17.s_phone, + t17.s_acctbal, + t17.s_comment, + t17.n_nationkey, + t17.n_name, + t17.n_regionkey, + t17.n_comment, + t17.r_regionkey, + t17.r_name, + t17.r_comment + FROM ( + SELECT + t6.c_custkey, + t6.c_name, + t6.c_address, + t6.c_nationkey, + t6.c_phone, + t6.c_acctbal, + t6.c_mktsegment, + t6.c_comment, + t7.o_orderkey, + t7.o_custkey, + t7.o_orderstatus, + t7.o_totalprice, + t7.o_orderdate, + t7.o_orderpriority, + t7.o_clerk, + t7.o_shippriority, + t7.o_comment, + t8.l_orderkey, + t8.l_partkey, + t8.l_suppkey, + t8.l_linenumber, + t8.l_quantity, + t8.l_extendedprice, + t8.l_discount, + t8.l_tax, + t8.l_returnflag, + t8.l_linestatus, + t8.l_shipdate, + t8.l_commitdate, + t8.l_receiptdate, + t8.l_shipinstruct, + t8.l_shipmode, + t8.l_comment, + t9.s_suppkey, + t9.s_name, + t9.s_address, + t9.s_nationkey, + t9.s_phone, + t9.s_acctbal, + t9.s_comment, + t10.n_nationkey, + t10.n_name, + t10.n_regionkey, + t10.n_comment, + t11.r_regionkey, + t11.r_name, + t11.r_comment + FROM customer AS t6 + INNER JOIN orders AS t7 + ON t6.c_custkey = t7.o_custkey + INNER JOIN lineitem AS t8 + ON t8.l_orderkey = t7.o_orderkey + INNER JOIN supplier AS t9 + ON t8.l_suppkey = t9.s_suppkey + INNER JOIN nation AS t10 + ON t6.c_nationkey = t9.s_nationkey AND t9.s_nationkey = t10.n_nationkey + INNER JOIN region AS t11 + ON t10.n_regionkey = t11.r_regionkey + ) AS t17 + WHERE + t17.r_name = 'ASIA' + AND t17.o_orderdate >= MAKE_DATE(1994, 1, 1) + AND t17.o_orderdate < MAKE_DATE(1995, 1, 1) + ) AS t18 GROUP BY 1 -) AS t0 +) AS t19 ORDER BY - t0.revenue DESC \ No newline at end of file + t19.revenue DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/snowflake/h05.sql b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/snowflake/h05.sql index 0b1c85164dfa..4ec8d7241f0d 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/snowflake/h05.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/snowflake/h05.sql @@ -1,95 +1,195 @@ -WITH t1 AS ( - SELECT - t7."C_CUSTKEY" AS "c_custkey", - t7."C_NAME" AS "c_name", - t7."C_ADDRESS" AS "c_address", - t7."C_NATIONKEY" AS "c_nationkey", - t7."C_PHONE" AS "c_phone", - t7."C_ACCTBAL" AS "c_acctbal", - t7."C_MKTSEGMENT" AS "c_mktsegment", - t7."C_COMMENT" AS "c_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" AS t7 -), t0 AS ( - SELECT - t7."O_ORDERKEY" AS "o_orderkey", - t7."O_CUSTKEY" AS "o_custkey", - t7."O_ORDERSTATUS" AS "o_orderstatus", - t7."O_TOTALPRICE" AS "o_totalprice", - t7."O_ORDERDATE" AS "o_orderdate", - t7."O_ORDERPRIORITY" AS "o_orderpriority", - t7."O_CLERK" AS "o_clerk", - t7."O_SHIPPRIORITY" AS "o_shippriority", - t7."O_COMMENT" AS "o_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS t7 -), t2 AS ( - SELECT - t7."L_ORDERKEY" AS "l_orderkey", - t7."L_PARTKEY" AS "l_partkey", - t7."L_SUPPKEY" AS "l_suppkey", - t7."L_LINENUMBER" AS "l_linenumber", - t7."L_QUANTITY" AS "l_quantity", - t7."L_EXTENDEDPRICE" AS "l_extendedprice", - t7."L_DISCOUNT" AS "l_discount", - t7."L_TAX" AS "l_tax", - t7."L_RETURNFLAG" AS "l_returnflag", - t7."L_LINESTATUS" AS "l_linestatus", - t7."L_SHIPDATE" AS "l_shipdate", - t7."L_COMMITDATE" AS "l_commitdate", - t7."L_RECEIPTDATE" AS "l_receiptdate", - t7."L_SHIPINSTRUCT" AS "l_shipinstruct", - t7."L_SHIPMODE" AS "l_shipmode", - t7."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t7 -), t3 AS ( - SELECT - t7."S_SUPPKEY" AS "s_suppkey", - t7."S_NAME" AS "s_name", - t7."S_ADDRESS" AS "s_address", - t7."S_NATIONKEY" AS "s_nationkey", - t7."S_PHONE" AS "s_phone", - t7."S_ACCTBAL" AS "s_acctbal", - t7."S_COMMENT" AS "s_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS t7 -), t4 AS ( - SELECT - t7."N_NATIONKEY" AS "n_nationkey", - t7."N_NAME" AS "n_name", - t7."N_REGIONKEY" AS "n_regionkey", - t7."N_COMMENT" AS "n_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS t7 -), t5 AS ( - SELECT - t7."R_REGIONKEY" AS "r_regionkey", - t7."R_NAME" AS "r_name", - t7."R_COMMENT" AS "r_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."REGION" AS t7 -) SELECT - t6."n_name", - t6."revenue" + "t24"."n_name" AS "n_name", + "t24"."revenue" AS "revenue" FROM ( SELECT - t4."n_name" AS "n_name", - SUM(t2."l_extendedprice" * ( - 1 - t2."l_discount" + "t23"."n_name" AS "n_name", + SUM("t23"."l_extendedprice" * ( + 1 - "t23"."l_discount" )) AS "revenue" - FROM t1 - JOIN t0 - ON t1."c_custkey" = t0."o_custkey" - JOIN t2 - ON t2."l_orderkey" = t0."o_orderkey" - JOIN t3 - ON t2."l_suppkey" = t3."s_suppkey" - JOIN t4 - ON t1."c_nationkey" = t3."s_nationkey" AND t3."s_nationkey" = t4."n_nationkey" - JOIN t5 - ON t4."n_regionkey" = t5."r_regionkey" - WHERE - t5."r_name" = 'ASIA' - AND t0."o_orderdate" >= DATE_FROM_PARTS(1994, 1, 1) - AND t0."o_orderdate" < DATE_FROM_PARTS(1995, 1, 1) + FROM ( + SELECT + "t22"."c_custkey" AS "c_custkey", + "t22"."c_name" AS "c_name", + "t22"."c_address" AS "c_address", + "t22"."c_nationkey" AS "c_nationkey", + "t22"."c_phone" AS "c_phone", + "t22"."c_acctbal" AS "c_acctbal", + "t22"."c_mktsegment" AS "c_mktsegment", + "t22"."c_comment" AS "c_comment", + "t22"."o_orderkey" AS "o_orderkey", + "t22"."o_custkey" AS "o_custkey", + "t22"."o_orderstatus" AS "o_orderstatus", + "t22"."o_totalprice" AS "o_totalprice", + "t22"."o_orderdate" AS "o_orderdate", + "t22"."o_orderpriority" AS "o_orderpriority", + "t22"."o_clerk" AS "o_clerk", + "t22"."o_shippriority" AS "o_shippriority", + "t22"."o_comment" AS "o_comment", + "t22"."l_orderkey" AS "l_orderkey", + "t22"."l_partkey" AS "l_partkey", + "t22"."l_suppkey" AS "l_suppkey", + "t22"."l_linenumber" AS "l_linenumber", + "t22"."l_quantity" AS "l_quantity", + "t22"."l_extendedprice" AS "l_extendedprice", + "t22"."l_discount" AS "l_discount", + "t22"."l_tax" AS "l_tax", + "t22"."l_returnflag" AS "l_returnflag", + "t22"."l_linestatus" AS "l_linestatus", + "t22"."l_shipdate" AS "l_shipdate", + "t22"."l_commitdate" AS "l_commitdate", + "t22"."l_receiptdate" AS "l_receiptdate", + "t22"."l_shipinstruct" AS "l_shipinstruct", + "t22"."l_shipmode" AS "l_shipmode", + "t22"."l_comment" AS "l_comment", + "t22"."s_suppkey" AS "s_suppkey", + "t22"."s_name" AS "s_name", + "t22"."s_address" AS "s_address", + "t22"."s_nationkey" AS "s_nationkey", + "t22"."s_phone" AS "s_phone", + "t22"."s_acctbal" AS "s_acctbal", + "t22"."s_comment" AS "s_comment", + "t22"."n_nationkey" AS "n_nationkey", + "t22"."n_name" AS "n_name", + "t22"."n_regionkey" AS "n_regionkey", + "t22"."n_comment" AS "n_comment", + "t22"."r_regionkey" AS "r_regionkey", + "t22"."r_name" AS "r_name", + "t22"."r_comment" AS "r_comment" + FROM ( + SELECT + "t6"."c_custkey" AS "c_custkey", + "t6"."c_name" AS "c_name", + "t6"."c_address" AS "c_address", + "t6"."c_nationkey" AS "c_nationkey", + "t6"."c_phone" AS "c_phone", + "t6"."c_acctbal" AS "c_acctbal", + "t6"."c_mktsegment" AS "c_mktsegment", + "t6"."c_comment" AS "c_comment", + "t12"."o_orderkey" AS "o_orderkey", + "t12"."o_custkey" AS "o_custkey", + "t12"."o_orderstatus" AS "o_orderstatus", + "t12"."o_totalprice" AS "o_totalprice", + "t12"."o_orderdate" AS "o_orderdate", + "t12"."o_orderpriority" AS "o_orderpriority", + "t12"."o_clerk" AS "o_clerk", + "t12"."o_shippriority" AS "o_shippriority", + "t12"."o_comment" AS "o_comment", + "t13"."l_orderkey" AS "l_orderkey", + "t13"."l_partkey" AS "l_partkey", + "t13"."l_suppkey" AS "l_suppkey", + "t13"."l_linenumber" AS "l_linenumber", + "t13"."l_quantity" AS "l_quantity", + "t13"."l_extendedprice" AS "l_extendedprice", + "t13"."l_discount" AS "l_discount", + "t13"."l_tax" AS "l_tax", + "t13"."l_returnflag" AS "l_returnflag", + "t13"."l_linestatus" AS "l_linestatus", + "t13"."l_shipdate" AS "l_shipdate", + "t13"."l_commitdate" AS "l_commitdate", + "t13"."l_receiptdate" AS "l_receiptdate", + "t13"."l_shipinstruct" AS "l_shipinstruct", + "t13"."l_shipmode" AS "l_shipmode", + "t13"."l_comment" AS "l_comment", + "t14"."s_suppkey" AS "s_suppkey", + "t14"."s_name" AS "s_name", + "t14"."s_address" AS "s_address", + "t14"."s_nationkey" AS "s_nationkey", + "t14"."s_phone" AS "s_phone", + "t14"."s_acctbal" AS "s_acctbal", + "t14"."s_comment" AS "s_comment", + "t15"."n_nationkey" AS "n_nationkey", + "t15"."n_name" AS "n_name", + "t15"."n_regionkey" AS "n_regionkey", + "t15"."n_comment" AS "n_comment", + "t16"."r_regionkey" AS "r_regionkey", + "t16"."r_name" AS "r_name", + "t16"."r_comment" AS "r_comment" + FROM ( + SELECT + "t0"."C_CUSTKEY" AS "c_custkey", + "t0"."C_NAME" AS "c_name", + "t0"."C_ADDRESS" AS "c_address", + "t0"."C_NATIONKEY" AS "c_nationkey", + "t0"."C_PHONE" AS "c_phone", + "t0"."C_ACCTBAL" AS "c_acctbal", + "t0"."C_MKTSEGMENT" AS "c_mktsegment", + "t0"."C_COMMENT" AS "c_comment" + FROM "CUSTOMER" AS "t0" + ) AS "t6" + INNER JOIN ( + SELECT + "t1"."O_ORDERKEY" AS "o_orderkey", + "t1"."O_CUSTKEY" AS "o_custkey", + "t1"."O_ORDERSTATUS" AS "o_orderstatus", + "t1"."O_TOTALPRICE" AS "o_totalprice", + "t1"."O_ORDERDATE" AS "o_orderdate", + "t1"."O_ORDERPRIORITY" AS "o_orderpriority", + "t1"."O_CLERK" AS "o_clerk", + "t1"."O_SHIPPRIORITY" AS "o_shippriority", + "t1"."O_COMMENT" AS "o_comment" + FROM "ORDERS" AS "t1" + ) AS "t12" + ON "t6"."c_custkey" = "t12"."o_custkey" + INNER JOIN ( + SELECT + "t2"."L_ORDERKEY" AS "l_orderkey", + "t2"."L_PARTKEY" AS "l_partkey", + "t2"."L_SUPPKEY" AS "l_suppkey", + "t2"."L_LINENUMBER" AS "l_linenumber", + "t2"."L_QUANTITY" AS "l_quantity", + "t2"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t2"."L_DISCOUNT" AS "l_discount", + "t2"."L_TAX" AS "l_tax", + "t2"."L_RETURNFLAG" AS "l_returnflag", + "t2"."L_LINESTATUS" AS "l_linestatus", + "t2"."L_SHIPDATE" AS "l_shipdate", + "t2"."L_COMMITDATE" AS "l_commitdate", + "t2"."L_RECEIPTDATE" AS "l_receiptdate", + "t2"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t2"."L_SHIPMODE" AS "l_shipmode", + "t2"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t2" + ) AS "t13" + ON "t13"."l_orderkey" = "t12"."o_orderkey" + INNER JOIN ( + SELECT + "t3"."S_SUPPKEY" AS "s_suppkey", + "t3"."S_NAME" AS "s_name", + "t3"."S_ADDRESS" AS "s_address", + "t3"."S_NATIONKEY" AS "s_nationkey", + "t3"."S_PHONE" AS "s_phone", + "t3"."S_ACCTBAL" AS "s_acctbal", + "t3"."S_COMMENT" AS "s_comment" + FROM "SUPPLIER" AS "t3" + ) AS "t14" + ON "t13"."l_suppkey" = "t14"."s_suppkey" + INNER JOIN ( + SELECT + "t4"."N_NATIONKEY" AS "n_nationkey", + "t4"."N_NAME" AS "n_name", + "t4"."N_REGIONKEY" AS "n_regionkey", + "t4"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t4" + ) AS "t15" + ON "t6"."c_nationkey" = "t14"."s_nationkey" + AND "t14"."s_nationkey" = "t15"."n_nationkey" + INNER JOIN ( + SELECT + "t5"."R_REGIONKEY" AS "r_regionkey", + "t5"."R_NAME" AS "r_name", + "t5"."R_COMMENT" AS "r_comment" + FROM "REGION" AS "t5" + ) AS "t16" + ON "t15"."n_regionkey" = "t16"."r_regionkey" + ) AS "t22" + WHERE + "t22"."r_name" = 'ASIA' + AND "t22"."o_orderdate" >= DATEFROMPARTS(1994, 1, 1) + AND "t22"."o_orderdate" < DATEFROMPARTS(1995, 1, 1) + ) AS "t23" GROUP BY 1 -) AS t6 +) AS "t24" ORDER BY - t6."revenue" DESC \ No newline at end of file + "t24"."revenue" DESC NULLS LAST \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/duckdb/h06.sql b/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/duckdb/h06.sql index 8ec16703aeee..eea01a0277a6 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/duckdb/h06.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/duckdb/h06.sql @@ -1,8 +1,27 @@ SELECT - SUM(t0.l_extendedprice * t0.l_discount) AS revenue -FROM main.lineitem AS t0 -WHERE - t0.l_shipdate >= MAKE_DATE(1994, 1, 1) - AND t0.l_shipdate < MAKE_DATE(1995, 1, 1) - AND t0.l_discount BETWEEN CAST(0.05 AS REAL(53)) AND CAST(0.07 AS REAL(53)) - AND t0.l_quantity < CAST(24 AS TINYINT) \ No newline at end of file + SUM(t1.l_extendedprice * t1.l_discount) AS revenue +FROM ( + SELECT + t0.l_orderkey, + t0.l_partkey, + t0.l_suppkey, + t0.l_linenumber, + t0.l_quantity, + t0.l_extendedprice, + t0.l_discount, + t0.l_tax, + t0.l_returnflag, + t0.l_linestatus, + t0.l_shipdate, + t0.l_commitdate, + t0.l_receiptdate, + t0.l_shipinstruct, + t0.l_shipmode, + t0.l_comment + FROM lineitem AS t0 + WHERE + t0.l_shipdate >= MAKE_DATE(1994, 1, 1) + AND t0.l_shipdate < MAKE_DATE(1995, 1, 1) + AND t0.l_discount BETWEEN CAST(0.05 AS DOUBLE) AND CAST(0.07 AS DOUBLE) + AND t0.l_quantity < CAST(24 AS TINYINT) +) AS t1 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/snowflake/h06.sql b/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/snowflake/h06.sql index 3ac88adee307..5d0be126fb13 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/snowflake/h06.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/snowflake/h06.sql @@ -1,28 +1,27 @@ -WITH t0 AS ( - SELECT - t1."L_ORDERKEY" AS "l_orderkey", - t1."L_PARTKEY" AS "l_partkey", - t1."L_SUPPKEY" AS "l_suppkey", - t1."L_LINENUMBER" AS "l_linenumber", - t1."L_QUANTITY" AS "l_quantity", - t1."L_EXTENDEDPRICE" AS "l_extendedprice", - t1."L_DISCOUNT" AS "l_discount", - t1."L_TAX" AS "l_tax", - t1."L_RETURNFLAG" AS "l_returnflag", - t1."L_LINESTATUS" AS "l_linestatus", - t1."L_SHIPDATE" AS "l_shipdate", - t1."L_COMMITDATE" AS "l_commitdate", - t1."L_RECEIPTDATE" AS "l_receiptdate", - t1."L_SHIPINSTRUCT" AS "l_shipinstruct", - t1."L_SHIPMODE" AS "l_shipmode", - t1."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t1 -) SELECT - SUM(t0."l_extendedprice" * t0."l_discount") AS "revenue" -FROM t0 -WHERE - t0."l_shipdate" >= DATE_FROM_PARTS(1994, 1, 1) - AND t0."l_shipdate" < DATE_FROM_PARTS(1995, 1, 1) - AND t0."l_discount" BETWEEN 0.05 AND 0.07 - AND t0."l_quantity" < 24 \ No newline at end of file + SUM("t1"."l_extendedprice" * "t1"."l_discount") AS "revenue" +FROM ( + SELECT + "t0"."L_ORDERKEY" AS "l_orderkey", + "t0"."L_PARTKEY" AS "l_partkey", + "t0"."L_SUPPKEY" AS "l_suppkey", + "t0"."L_LINENUMBER" AS "l_linenumber", + "t0"."L_QUANTITY" AS "l_quantity", + "t0"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t0"."L_DISCOUNT" AS "l_discount", + "t0"."L_TAX" AS "l_tax", + "t0"."L_RETURNFLAG" AS "l_returnflag", + "t0"."L_LINESTATUS" AS "l_linestatus", + "t0"."L_SHIPDATE" AS "l_shipdate", + "t0"."L_COMMITDATE" AS "l_commitdate", + "t0"."L_RECEIPTDATE" AS "l_receiptdate", + "t0"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t0"."L_SHIPMODE" AS "l_shipmode", + "t0"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t0" + WHERE + "t0"."L_SHIPDATE" >= DATEFROMPARTS(1994, 1, 1) + AND "t0"."L_SHIPDATE" < DATEFROMPARTS(1995, 1, 1) + AND "t0"."L_DISCOUNT" BETWEEN 0.05 AND 0.07 + AND "t0"."L_QUANTITY" < 24 +) AS "t1" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/duckdb/h07.sql b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/duckdb/h07.sql index 171bbd4b75d8..35411472de9a 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/duckdb/h07.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/duckdb/h07.sql @@ -1,51 +1,71 @@ -WITH t0 AS ( - SELECT - t6.n_name AS supp_nation, - t7.n_name AS cust_nation, - t3.l_shipdate AS l_shipdate, - t3.l_extendedprice AS l_extendedprice, - t3.l_discount AS l_discount, - CAST(EXTRACT(year FROM t3.l_shipdate) AS SMALLINT) AS l_year, - t3.l_extendedprice * ( - CAST(1 AS TINYINT) - t3.l_discount - ) AS volume - FROM main.supplier AS t2 - JOIN main.lineitem AS t3 - ON t2.s_suppkey = t3.l_suppkey - JOIN main.orders AS t4 - ON t4.o_orderkey = t3.l_orderkey - JOIN main.customer AS t5 - ON t5.c_custkey = t4.o_custkey - JOIN main.nation AS t6 - ON t2.s_nationkey = t6.n_nationkey - JOIN main.nation AS t7 - ON t5.c_nationkey = t7.n_nationkey -) SELECT - t1.supp_nation, - t1.cust_nation, - t1.l_year, - t1.revenue + t19.supp_nation, + t19.cust_nation, + t19.l_year, + t19.revenue FROM ( SELECT - t0.supp_nation AS supp_nation, - t0.cust_nation AS cust_nation, - t0.l_year AS l_year, - SUM(t0.volume) AS revenue - FROM t0 - WHERE - ( - t0.cust_nation = 'FRANCE' AND t0.supp_nation = 'GERMANY' - OR t0.cust_nation = 'GERMANY' - AND t0.supp_nation = 'FRANCE' - ) - AND t0.l_shipdate BETWEEN MAKE_DATE(1995, 1, 1) AND MAKE_DATE(1996, 12, 31) + t18.supp_nation, + t18.cust_nation, + t18.l_year, + SUM(t18.volume) AS revenue + FROM ( + SELECT + t17.supp_nation, + t17.cust_nation, + t17.l_shipdate, + t17.l_extendedprice, + t17.l_discount, + t17.l_year, + t17.volume + FROM ( + SELECT + t9.n_name AS supp_nation, + t11.n_name AS cust_nation, + t6.l_shipdate, + t6.l_extendedprice, + t6.l_discount, + EXTRACT('year' FROM t6.l_shipdate) AS l_year, + t6.l_extendedprice * ( + CAST(1 AS TINYINT) - t6.l_discount + ) AS volume + FROM supplier AS t5 + INNER JOIN lineitem AS t6 + ON t5.s_suppkey = t6.l_suppkey + INNER JOIN orders AS t7 + ON t7.o_orderkey = t6.l_orderkey + INNER JOIN customer AS t8 + ON t8.c_custkey = t7.o_custkey + INNER JOIN nation AS t9 + ON t5.s_nationkey = t9.n_nationkey + INNER JOIN nation AS t11 + ON t8.c_nationkey = t11.n_nationkey + ) AS t17 + WHERE + ( + ( + ( + t17.cust_nation = 'FRANCE' + ) AND ( + t17.supp_nation = 'GERMANY' + ) + ) + OR ( + ( + t17.cust_nation = 'GERMANY' + ) AND ( + t17.supp_nation = 'FRANCE' + ) + ) + ) + AND t17.l_shipdate BETWEEN MAKE_DATE(1995, 1, 1) AND MAKE_DATE(1996, 12, 31) + ) AS t18 GROUP BY 1, 2, 3 -) AS t1 +) AS t19 ORDER BY - t1.supp_nation ASC, - t1.cust_nation ASC, - t1.l_year ASC \ No newline at end of file + t19.supp_nation ASC, + t19.cust_nation ASC, + t19.l_year ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql index f7cdf6bd08e1..69d9b1af31d1 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/snowflake/h07.sql @@ -1,110 +1,128 @@ -WITH t1 AS ( - SELECT - t7."S_SUPPKEY" AS "s_suppkey", - t7."S_NAME" AS "s_name", - t7."S_ADDRESS" AS "s_address", - t7."S_NATIONKEY" AS "s_nationkey", - t7."S_PHONE" AS "s_phone", - t7."S_ACCTBAL" AS "s_acctbal", - t7."S_COMMENT" AS "s_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS t7 -), t0 AS ( - SELECT - t7."L_ORDERKEY" AS "l_orderkey", - t7."L_PARTKEY" AS "l_partkey", - t7."L_SUPPKEY" AS "l_suppkey", - t7."L_LINENUMBER" AS "l_linenumber", - t7."L_QUANTITY" AS "l_quantity", - t7."L_EXTENDEDPRICE" AS "l_extendedprice", - t7."L_DISCOUNT" AS "l_discount", - t7."L_TAX" AS "l_tax", - t7."L_RETURNFLAG" AS "l_returnflag", - t7."L_LINESTATUS" AS "l_linestatus", - t7."L_SHIPDATE" AS "l_shipdate", - t7."L_COMMITDATE" AS "l_commitdate", - t7."L_RECEIPTDATE" AS "l_receiptdate", - t7."L_SHIPINSTRUCT" AS "l_shipinstruct", - t7."L_SHIPMODE" AS "l_shipmode", - t7."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t7 -), t2 AS ( - SELECT - t7."O_ORDERKEY" AS "o_orderkey", - t7."O_CUSTKEY" AS "o_custkey", - t7."O_ORDERSTATUS" AS "o_orderstatus", - t7."O_TOTALPRICE" AS "o_totalprice", - t7."O_ORDERDATE" AS "o_orderdate", - t7."O_ORDERPRIORITY" AS "o_orderpriority", - t7."O_CLERK" AS "o_clerk", - t7."O_SHIPPRIORITY" AS "o_shippriority", - t7."O_COMMENT" AS "o_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS t7 -), t3 AS ( - SELECT - t7."C_CUSTKEY" AS "c_custkey", - t7."C_NAME" AS "c_name", - t7."C_ADDRESS" AS "c_address", - t7."C_NATIONKEY" AS "c_nationkey", - t7."C_PHONE" AS "c_phone", - t7."C_ACCTBAL" AS "c_acctbal", - t7."C_MKTSEGMENT" AS "c_mktsegment", - t7."C_COMMENT" AS "c_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" AS t7 -), t4 AS ( - SELECT - t7."N_NATIONKEY" AS "n_nationkey", - t7."N_NAME" AS "n_name", - t7."N_REGIONKEY" AS "n_regionkey", - t7."N_COMMENT" AS "n_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS t7 -), t5 AS ( - SELECT - t4."n_name" AS "supp_nation", - t7."n_name" AS "cust_nation", - t0."l_shipdate" AS "l_shipdate", - t0."l_extendedprice" AS "l_extendedprice", - t0."l_discount" AS "l_discount", - CAST(DATE_PART(year, t0."l_shipdate") AS SMALLINT) AS "l_year", - t0."l_extendedprice" * ( - 1 - t0."l_discount" - ) AS "volume" - FROM t1 - JOIN t0 - ON t1."s_suppkey" = t0."l_suppkey" - JOIN t2 - ON t2."o_orderkey" = t0."l_orderkey" - JOIN t3 - ON t3."c_custkey" = t2."o_custkey" - JOIN t4 - ON t1."s_nationkey" = t4."n_nationkey" - JOIN t4 AS t7 - ON t3."c_nationkey" = t7."n_nationkey" -) SELECT - t6."supp_nation", - t6."cust_nation", - t6."l_year", - t6."revenue" + * FROM ( SELECT - t5."supp_nation" AS "supp_nation", - t5."cust_nation" AS "cust_nation", - t5."l_year" AS "l_year", - SUM(t5."volume") AS "revenue" - FROM t5 - WHERE - ( - t5."cust_nation" = 'FRANCE' AND t5."supp_nation" = 'GERMANY' - OR t5."cust_nation" = 'GERMANY' - AND t5."supp_nation" = 'FRANCE' - ) - AND t5."l_shipdate" BETWEEN DATE_FROM_PARTS(1995, 1, 1) AND DATE_FROM_PARTS(1996, 12, 31) + "t17"."supp_nation" AS "supp_nation", + "t17"."cust_nation" AS "cust_nation", + "t17"."l_year" AS "l_year", + SUM("t17"."volume") AS "revenue" + FROM ( + SELECT + * + FROM ( + SELECT + "t9"."n_name" AS "supp_nation", + "t10"."n_name" AS "cust_nation", + "t6"."l_shipdate" AS "l_shipdate", + "t6"."l_extendedprice" AS "l_extendedprice", + "t6"."l_discount" AS "l_discount", + DATE_PART('year', "t6"."l_shipdate") AS "l_year", + "t6"."l_extendedprice" * ( + 1 - "t6"."l_discount" + ) AS "volume" + FROM ( + SELECT + "t0"."S_SUPPKEY" AS "s_suppkey", + "t0"."S_NAME" AS "s_name", + "t0"."S_ADDRESS" AS "s_address", + "t0"."S_NATIONKEY" AS "s_nationkey", + "t0"."S_PHONE" AS "s_phone", + "t0"."S_ACCTBAL" AS "s_acctbal", + "t0"."S_COMMENT" AS "s_comment" + FROM "SUPPLIER" AS "t0" + ) AS "t5" + INNER JOIN ( + SELECT + "t1"."L_ORDERKEY" AS "l_orderkey", + "t1"."L_PARTKEY" AS "l_partkey", + "t1"."L_SUPPKEY" AS "l_suppkey", + "t1"."L_LINENUMBER" AS "l_linenumber", + "t1"."L_QUANTITY" AS "l_quantity", + "t1"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t1"."L_DISCOUNT" AS "l_discount", + "t1"."L_TAX" AS "l_tax", + "t1"."L_RETURNFLAG" AS "l_returnflag", + "t1"."L_LINESTATUS" AS "l_linestatus", + "t1"."L_SHIPDATE" AS "l_shipdate", + "t1"."L_COMMITDATE" AS "l_commitdate", + "t1"."L_RECEIPTDATE" AS "l_receiptdate", + "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t1"."L_SHIPMODE" AS "l_shipmode", + "t1"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t1" + ) AS "t6" + ON "t5"."s_suppkey" = "t6"."l_suppkey" + INNER JOIN ( + SELECT + "t2"."O_ORDERKEY" AS "o_orderkey", + "t2"."O_CUSTKEY" AS "o_custkey", + "t2"."O_ORDERSTATUS" AS "o_orderstatus", + "t2"."O_TOTALPRICE" AS "o_totalprice", + "t2"."O_ORDERDATE" AS "o_orderdate", + "t2"."O_ORDERPRIORITY" AS "o_orderpriority", + "t2"."O_CLERK" AS "o_clerk", + "t2"."O_SHIPPRIORITY" AS "o_shippriority", + "t2"."O_COMMENT" AS "o_comment" + FROM "ORDERS" AS "t2" + ) AS "t7" + ON "t7"."o_orderkey" = "t6"."l_orderkey" + INNER JOIN ( + SELECT + "t3"."C_CUSTKEY" AS "c_custkey", + "t3"."C_NAME" AS "c_name", + "t3"."C_ADDRESS" AS "c_address", + "t3"."C_NATIONKEY" AS "c_nationkey", + "t3"."C_PHONE" AS "c_phone", + "t3"."C_ACCTBAL" AS "c_acctbal", + "t3"."C_MKTSEGMENT" AS "c_mktsegment", + "t3"."C_COMMENT" AS "c_comment" + FROM "CUSTOMER" AS "t3" + ) AS "t8" + ON "t8"."c_custkey" = "t7"."o_custkey" + INNER JOIN ( + SELECT + "t4"."N_NATIONKEY" AS "n_nationkey", + "t4"."N_NAME" AS "n_name", + "t4"."N_REGIONKEY" AS "n_regionkey", + "t4"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t4" + ) AS "t9" + ON "t5"."s_nationkey" = "t9"."n_nationkey" + INNER JOIN ( + SELECT + "t4"."N_NATIONKEY" AS "n_nationkey", + "t4"."N_NAME" AS "n_name", + "t4"."N_REGIONKEY" AS "n_regionkey", + "t4"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t4" + ) AS "t10" + ON "t8"."c_nationkey" = "t10"."n_nationkey" + ) AS "t16" + WHERE + ( + ( + ( + "t16"."cust_nation" = 'FRANCE' + ) AND ( + "t16"."supp_nation" = 'GERMANY' + ) + ) + OR ( + ( + "t16"."cust_nation" = 'GERMANY' + ) AND ( + "t16"."supp_nation" = 'FRANCE' + ) + ) + ) + AND "t16"."l_shipdate" BETWEEN DATEFROMPARTS(1995, 1, 1) AND DATEFROMPARTS(1996, 12, 31) + ) AS "t17" GROUP BY 1, 2, 3 -) AS t6 +) AS "t18" ORDER BY - t6."supp_nation" ASC, - t6."cust_nation" ASC, - t6."l_year" ASC \ No newline at end of file + "t18"."supp_nation" ASC, + "t18"."cust_nation" ASC, + "t18"."l_year" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/duckdb/h08.sql b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/duckdb/h08.sql index 26823ce9ad6b..97b1be133851 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/duckdb/h08.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/duckdb/h08.sql @@ -1,64 +1,52 @@ -WITH t0 AS ( - SELECT - CAST(EXTRACT(year FROM t7.o_orderdate) AS SMALLINT) AS o_year, - t5.l_extendedprice * ( - CAST(1 AS TINYINT) - t5.l_discount - ) AS volume, - t11.n_name AS nation, - t10.r_name AS r_name, - t7.o_orderdate AS o_orderdate, - t4.p_type AS p_type - FROM main.part AS t4 - JOIN main.lineitem AS t5 - ON t4.p_partkey = t5.l_partkey - JOIN main.supplier AS t6 - ON t6.s_suppkey = t5.l_suppkey - JOIN main.orders AS t7 - ON t5.l_orderkey = t7.o_orderkey - JOIN main.customer AS t8 - ON t7.o_custkey = t8.c_custkey - JOIN main.nation AS t9 - ON t8.c_nationkey = t9.n_nationkey - JOIN main.region AS t10 - ON t9.n_regionkey = t10.r_regionkey - JOIN main.nation AS t11 - ON t6.s_nationkey = t11.n_nationkey -), t1 AS ( - SELECT - t0.o_year AS o_year, - t0.volume AS volume, - t0.nation AS nation, - t0.r_name AS r_name, - t0.o_orderdate AS o_orderdate, - t0.p_type AS p_type - FROM t0 - WHERE - t0.r_name = 'AMERICA' - AND t0.o_orderdate BETWEEN MAKE_DATE(1995, 1, 1) AND MAKE_DATE(1996, 12, 31) - AND t0.p_type = 'ECONOMY ANODIZED STEEL' -), t2 AS ( - SELECT - t1.o_year AS o_year, - t1.volume AS volume, - t1.nation AS nation, - t1.r_name AS r_name, - t1.o_orderdate AS o_orderdate, - t1.p_type AS p_type, - CASE WHEN ( - t1.nation = 'BRAZIL' - ) THEN t1.volume ELSE CAST(0 AS TINYINT) END AS nation_volume - FROM t1 -) SELECT - t3.o_year, - t3.mkt_share + t25.o_year, + t25.mkt_share FROM ( SELECT - t2.o_year AS o_year, - SUM(t2.nation_volume) / SUM(t2.volume) AS mkt_share - FROM t2 + t24.o_year, + SUM(t24.nation_volume) / SUM(t24.volume) AS mkt_share + FROM ( + SELECT + t23.o_year, + t23.volume, + t23.nation, + t23.r_name, + t23.o_orderdate, + t23.p_type, + CASE WHEN t23.nation = 'BRAZIL' THEN t23.volume ELSE CAST(0 AS TINYINT) END AS nation_volume + FROM ( + SELECT + EXTRACT('year' FROM t10.o_orderdate) AS o_year, + t8.l_extendedprice * ( + CAST(1 AS TINYINT) - t8.l_discount + ) AS volume, + t15.n_name AS nation, + t14.r_name, + t10.o_orderdate, + t7.p_type + FROM part AS t7 + INNER JOIN lineitem AS t8 + ON t7.p_partkey = t8.l_partkey + INNER JOIN supplier AS t9 + ON t9.s_suppkey = t8.l_suppkey + INNER JOIN orders AS t10 + ON t8.l_orderkey = t10.o_orderkey + INNER JOIN customer AS t11 + ON t10.o_custkey = t11.c_custkey + INNER JOIN nation AS t12 + ON t11.c_nationkey = t12.n_nationkey + INNER JOIN region AS t14 + ON t12.n_regionkey = t14.r_regionkey + INNER JOIN nation AS t15 + ON t9.s_nationkey = t15.n_nationkey + ) AS t23 + WHERE + t23.r_name = 'AMERICA' + AND t23.o_orderdate BETWEEN MAKE_DATE(1995, 1, 1) AND MAKE_DATE(1996, 12, 31) + AND t23.p_type = 'ECONOMY ANODIZED STEEL' + ) AS t24 GROUP BY 1 -) AS t3 +) AS t25 ORDER BY - t3.o_year ASC \ No newline at end of file + t25.o_year ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql index 84b17e92e572..800d7a74c645 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/snowflake/h08.sql @@ -1,146 +1,136 @@ -WITH t1 AS ( - SELECT - t12."P_PARTKEY" AS "p_partkey", - t12."P_NAME" AS "p_name", - t12."P_MFGR" AS "p_mfgr", - t12."P_BRAND" AS "p_brand", - t12."P_TYPE" AS "p_type", - t12."P_SIZE" AS "p_size", - t12."P_CONTAINER" AS "p_container", - t12."P_RETAILPRICE" AS "p_retailprice", - t12."P_COMMENT" AS "p_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS t12 -), t0 AS ( - SELECT - t12."L_ORDERKEY" AS "l_orderkey", - t12."L_PARTKEY" AS "l_partkey", - t12."L_SUPPKEY" AS "l_suppkey", - t12."L_LINENUMBER" AS "l_linenumber", - t12."L_QUANTITY" AS "l_quantity", - t12."L_EXTENDEDPRICE" AS "l_extendedprice", - t12."L_DISCOUNT" AS "l_discount", - t12."L_TAX" AS "l_tax", - t12."L_RETURNFLAG" AS "l_returnflag", - t12."L_LINESTATUS" AS "l_linestatus", - t12."L_SHIPDATE" AS "l_shipdate", - t12."L_COMMITDATE" AS "l_commitdate", - t12."L_RECEIPTDATE" AS "l_receiptdate", - t12."L_SHIPINSTRUCT" AS "l_shipinstruct", - t12."L_SHIPMODE" AS "l_shipmode", - t12."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t12 -), t2 AS ( - SELECT - t12."S_SUPPKEY" AS "s_suppkey", - t12."S_NAME" AS "s_name", - t12."S_ADDRESS" AS "s_address", - t12."S_NATIONKEY" AS "s_nationkey", - t12."S_PHONE" AS "s_phone", - t12."S_ACCTBAL" AS "s_acctbal", - t12."S_COMMENT" AS "s_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS t12 -), t3 AS ( - SELECT - t12."O_ORDERKEY" AS "o_orderkey", - t12."O_CUSTKEY" AS "o_custkey", - t12."O_ORDERSTATUS" AS "o_orderstatus", - t12."O_TOTALPRICE" AS "o_totalprice", - t12."O_ORDERDATE" AS "o_orderdate", - t12."O_ORDERPRIORITY" AS "o_orderpriority", - t12."O_CLERK" AS "o_clerk", - t12."O_SHIPPRIORITY" AS "o_shippriority", - t12."O_COMMENT" AS "o_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS t12 -), t4 AS ( - SELECT - t12."C_CUSTKEY" AS "c_custkey", - t12."C_NAME" AS "c_name", - t12."C_ADDRESS" AS "c_address", - t12."C_NATIONKEY" AS "c_nationkey", - t12."C_PHONE" AS "c_phone", - t12."C_ACCTBAL" AS "c_acctbal", - t12."C_MKTSEGMENT" AS "c_mktsegment", - t12."C_COMMENT" AS "c_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" AS t12 -), t5 AS ( - SELECT - t12."N_NATIONKEY" AS "n_nationkey", - t12."N_NAME" AS "n_name", - t12."N_REGIONKEY" AS "n_regionkey", - t12."N_COMMENT" AS "n_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS t12 -), t6 AS ( - SELECT - t12."R_REGIONKEY" AS "r_regionkey", - t12."R_NAME" AS "r_name", - t12."R_COMMENT" AS "r_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."REGION" AS t12 -), t7 AS ( - SELECT - CAST(DATE_PART(year, t3."o_orderdate") AS SMALLINT) AS "o_year", - t0."l_extendedprice" * ( - 1 - t0."l_discount" - ) AS "volume", - t12."n_name" AS "nation", - t6."r_name" AS "r_name", - t3."o_orderdate" AS "o_orderdate", - t1."p_type" AS "p_type" - FROM t1 - JOIN t0 - ON t1."p_partkey" = t0."l_partkey" - JOIN t2 - ON t2."s_suppkey" = t0."l_suppkey" - JOIN t3 - ON t0."l_orderkey" = t3."o_orderkey" - JOIN t4 - ON t3."o_custkey" = t4."c_custkey" - JOIN t5 - ON t4."c_nationkey" = t5."n_nationkey" - JOIN t6 - ON t5."n_regionkey" = t6."r_regionkey" - JOIN t5 AS t12 - ON t2."s_nationkey" = t12."n_nationkey" -), t8 AS ( - SELECT - t7."o_year" AS "o_year", - t7."volume" AS "volume", - t7."nation" AS "nation", - t7."r_name" AS "r_name", - t7."o_orderdate" AS "o_orderdate", - t7."p_type" AS "p_type" - FROM t7 - WHERE - t7."r_name" = 'AMERICA' - AND t7."o_orderdate" BETWEEN '1995-01-01' AND '1996-12-31' - AND t7."p_type" = 'ECONOMY ANODIZED STEEL' -), t9 AS ( - SELECT - t8."o_year" AS "o_year", - t8."volume" AS "volume", - t8."nation" AS "nation", - t8."r_name" AS "r_name", - t8."o_orderdate" AS "o_orderdate", - t8."p_type" AS "p_type", - CASE WHEN ( - t8."nation" = 'BRAZIL' - ) THEN t8."volume" ELSE 0 END AS "nation_volume" - FROM t8 -), t10 AS ( - SELECT - t9."o_year" AS "o_year", - SUM(t9."nation_volume") / SUM(t9."volume") AS "mkt_share" - FROM t9 - GROUP BY - 1 -) SELECT - CAST(t11."o_year" AS BIGINT) AS "o_year", - CAST(t11."mkt_share" AS DECIMAL(38, 10)) AS "mkt_share" + "t30"."o_year" AS "o_year", + "t30"."mkt_share" AS "mkt_share" FROM ( SELECT - t10."o_year" AS "o_year", - t10."mkt_share" AS "mkt_share" - FROM t10 - ORDER BY - t10."o_year" -) AS t11 \ No newline at end of file + "t29"."o_year" AS "o_year", + SUM("t29"."nation_volume") / SUM("t29"."volume") AS "mkt_share" + FROM ( + SELECT + "t28"."o_year" AS "o_year", + "t28"."volume" AS "volume", + "t28"."nation" AS "nation", + "t28"."r_name" AS "r_name", + "t28"."o_orderdate" AS "o_orderdate", + "t28"."p_type" AS "p_type", + CASE WHEN "t28"."nation" = 'BRAZIL' THEN "t28"."volume" ELSE 0 END AS "nation_volume" + FROM ( + SELECT + DATE_PART('year', "t16"."o_orderdate") AS "o_year", + "t14"."l_extendedprice" * ( + 1 - "t14"."l_discount" + ) AS "volume", + "t19"."n_name" AS "nation", + "t20"."r_name" AS "r_name", + "t16"."o_orderdate" AS "o_orderdate", + "t7"."p_type" AS "p_type" + FROM ( + SELECT + "t0"."P_PARTKEY" AS "p_partkey", + "t0"."P_NAME" AS "p_name", + "t0"."P_MFGR" AS "p_mfgr", + "t0"."P_BRAND" AS "p_brand", + "t0"."P_TYPE" AS "p_type", + "t0"."P_SIZE" AS "p_size", + "t0"."P_CONTAINER" AS "p_container", + "t0"."P_RETAILPRICE" AS "p_retailprice", + "t0"."P_COMMENT" AS "p_comment" + FROM "PART" AS "t0" + ) AS "t7" + INNER JOIN ( + SELECT + "t1"."L_ORDERKEY" AS "l_orderkey", + "t1"."L_PARTKEY" AS "l_partkey", + "t1"."L_SUPPKEY" AS "l_suppkey", + "t1"."L_LINENUMBER" AS "l_linenumber", + "t1"."L_QUANTITY" AS "l_quantity", + "t1"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t1"."L_DISCOUNT" AS "l_discount", + "t1"."L_TAX" AS "l_tax", + "t1"."L_RETURNFLAG" AS "l_returnflag", + "t1"."L_LINESTATUS" AS "l_linestatus", + "t1"."L_SHIPDATE" AS "l_shipdate", + "t1"."L_COMMITDATE" AS "l_commitdate", + "t1"."L_RECEIPTDATE" AS "l_receiptdate", + "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t1"."L_SHIPMODE" AS "l_shipmode", + "t1"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t1" + ) AS "t14" + ON "t7"."p_partkey" = "t14"."l_partkey" + INNER JOIN ( + SELECT + "t2"."S_SUPPKEY" AS "s_suppkey", + "t2"."S_NAME" AS "s_name", + "t2"."S_ADDRESS" AS "s_address", + "t2"."S_NATIONKEY" AS "s_nationkey", + "t2"."S_PHONE" AS "s_phone", + "t2"."S_ACCTBAL" AS "s_acctbal", + "t2"."S_COMMENT" AS "s_comment" + FROM "SUPPLIER" AS "t2" + ) AS "t15" + ON "t15"."s_suppkey" = "t14"."l_suppkey" + INNER JOIN ( + SELECT + "t3"."O_ORDERKEY" AS "o_orderkey", + "t3"."O_CUSTKEY" AS "o_custkey", + "t3"."O_ORDERSTATUS" AS "o_orderstatus", + "t3"."O_TOTALPRICE" AS "o_totalprice", + "t3"."O_ORDERDATE" AS "o_orderdate", + "t3"."O_ORDERPRIORITY" AS "o_orderpriority", + "t3"."O_CLERK" AS "o_clerk", + "t3"."O_SHIPPRIORITY" AS "o_shippriority", + "t3"."O_COMMENT" AS "o_comment" + FROM "ORDERS" AS "t3" + ) AS "t16" + ON "t14"."l_orderkey" = "t16"."o_orderkey" + INNER JOIN ( + SELECT + "t4"."C_CUSTKEY" AS "c_custkey", + "t4"."C_NAME" AS "c_name", + "t4"."C_ADDRESS" AS "c_address", + "t4"."C_NATIONKEY" AS "c_nationkey", + "t4"."C_PHONE" AS "c_phone", + "t4"."C_ACCTBAL" AS "c_acctbal", + "t4"."C_MKTSEGMENT" AS "c_mktsegment", + "t4"."C_COMMENT" AS "c_comment" + FROM "CUSTOMER" AS "t4" + ) AS "t17" + ON "t16"."o_custkey" = "t17"."c_custkey" + INNER JOIN ( + SELECT + "t5"."N_NATIONKEY" AS "n_nationkey", + "t5"."N_NAME" AS "n_name", + "t5"."N_REGIONKEY" AS "n_regionkey", + "t5"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t5" + ) AS "t18" + ON "t17"."c_nationkey" = "t18"."n_nationkey" + INNER JOIN ( + SELECT + "t6"."R_REGIONKEY" AS "r_regionkey", + "t6"."R_NAME" AS "r_name", + "t6"."R_COMMENT" AS "r_comment" + FROM "REGION" AS "t6" + ) AS "t20" + ON "t18"."n_regionkey" = "t20"."r_regionkey" + INNER JOIN ( + SELECT + "t5"."N_NATIONKEY" AS "n_nationkey", + "t5"."N_NAME" AS "n_name", + "t5"."N_REGIONKEY" AS "n_regionkey", + "t5"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t5" + ) AS "t19" + ON "t15"."s_nationkey" = "t19"."n_nationkey" + ) AS "t28" + WHERE + "t28"."r_name" = 'AMERICA' + AND "t28"."o_orderdate" BETWEEN DATEFROMPARTS(1995, 1, 1) AND DATEFROMPARTS(1996, 12, 31) + AND "t28"."p_type" = 'ECONOMY ANODIZED STEEL' + ) AS "t29" + GROUP BY + 1 +) AS "t30" +ORDER BY + "t30"."o_year" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/duckdb/h09.sql b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/duckdb/h09.sql index b524abecbc3d..21489f03313d 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/duckdb/h09.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/duckdb/h09.sql @@ -1,39 +1,49 @@ -WITH t0 AS ( - SELECT - t2.l_extendedprice * ( - CAST(1 AS TINYINT) - t2.l_discount - ) - t4.ps_supplycost * t2.l_quantity AS amount, - CAST(EXTRACT(year FROM t6.o_orderdate) AS SMALLINT) AS o_year, - t7.n_name AS nation, - t5.p_name AS p_name - FROM main.lineitem AS t2 - JOIN main.supplier AS t3 - ON t3.s_suppkey = t2.l_suppkey - JOIN main.partsupp AS t4 - ON t4.ps_suppkey = t2.l_suppkey AND t4.ps_partkey = t2.l_partkey - JOIN main.part AS t5 - ON t5.p_partkey = t2.l_partkey - JOIN main.orders AS t6 - ON t6.o_orderkey = t2.l_orderkey - JOIN main.nation AS t7 - ON t3.s_nationkey = t7.n_nationkey - WHERE - t5.p_name LIKE '%green%' -) SELECT - t1.nation, - t1.o_year, - t1.sum_profit + t19.nation, + t19.o_year, + t19.sum_profit FROM ( SELECT - t0.nation AS nation, - t0.o_year AS o_year, - SUM(t0.amount) AS sum_profit - FROM t0 + t18.nation, + t18.o_year, + SUM(t18.amount) AS sum_profit + FROM ( + SELECT + t17.amount, + t17.o_year, + t17.nation, + t17.p_name + FROM ( + SELECT + ( + t6.l_extendedprice * ( + CAST(1 AS TINYINT) - t6.l_discount + ) + ) - ( + t8.ps_supplycost * t6.l_quantity + ) AS amount, + EXTRACT('year' FROM t10.o_orderdate) AS o_year, + t11.n_name AS nation, + t9.p_name + FROM lineitem AS t6 + INNER JOIN supplier AS t7 + ON t7.s_suppkey = t6.l_suppkey + INNER JOIN partsupp AS t8 + ON t8.ps_suppkey = t6.l_suppkey AND t8.ps_partkey = t6.l_partkey + INNER JOIN part AS t9 + ON t9.p_partkey = t6.l_partkey + INNER JOIN orders AS t10 + ON t10.o_orderkey = t6.l_orderkey + INNER JOIN nation AS t11 + ON t7.s_nationkey = t11.n_nationkey + ) AS t17 + WHERE + t17.p_name LIKE '%green%' + ) AS t18 GROUP BY 1, 2 -) AS t1 +) AS t19 ORDER BY - t1.nation ASC, - t1.o_year DESC \ No newline at end of file + t19.nation ASC, + t19.o_year DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/snowflake/h09.sql b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/snowflake/h09.sql index 0982de40732d..2ae1e1172403 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/snowflake/h09.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/snowflake/h09.sql @@ -1,107 +1,117 @@ -WITH t0 AS ( - SELECT - t8."L_ORDERKEY" AS "l_orderkey", - t8."L_PARTKEY" AS "l_partkey", - t8."L_SUPPKEY" AS "l_suppkey", - t8."L_LINENUMBER" AS "l_linenumber", - t8."L_QUANTITY" AS "l_quantity", - t8."L_EXTENDEDPRICE" AS "l_extendedprice", - t8."L_DISCOUNT" AS "l_discount", - t8."L_TAX" AS "l_tax", - t8."L_RETURNFLAG" AS "l_returnflag", - t8."L_LINESTATUS" AS "l_linestatus", - t8."L_SHIPDATE" AS "l_shipdate", - t8."L_COMMITDATE" AS "l_commitdate", - t8."L_RECEIPTDATE" AS "l_receiptdate", - t8."L_SHIPINSTRUCT" AS "l_shipinstruct", - t8."L_SHIPMODE" AS "l_shipmode", - t8."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t8 -), t1 AS ( - SELECT - t8."S_SUPPKEY" AS "s_suppkey", - t8."S_NAME" AS "s_name", - t8."S_ADDRESS" AS "s_address", - t8."S_NATIONKEY" AS "s_nationkey", - t8."S_PHONE" AS "s_phone", - t8."S_ACCTBAL" AS "s_acctbal", - t8."S_COMMENT" AS "s_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS t8 -), t2 AS ( - SELECT - t8."PS_PARTKEY" AS "ps_partkey", - t8."PS_SUPPKEY" AS "ps_suppkey", - t8."PS_AVAILQTY" AS "ps_availqty", - t8."PS_SUPPLYCOST" AS "ps_supplycost", - t8."PS_COMMENT" AS "ps_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PARTSUPP" AS t8 -), t3 AS ( - SELECT - t8."P_PARTKEY" AS "p_partkey", - t8."P_NAME" AS "p_name", - t8."P_MFGR" AS "p_mfgr", - t8."P_BRAND" AS "p_brand", - t8."P_TYPE" AS "p_type", - t8."P_SIZE" AS "p_size", - t8."P_CONTAINER" AS "p_container", - t8."P_RETAILPRICE" AS "p_retailprice", - t8."P_COMMENT" AS "p_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS t8 -), t4 AS ( - SELECT - t8."O_ORDERKEY" AS "o_orderkey", - t8."O_CUSTKEY" AS "o_custkey", - t8."O_ORDERSTATUS" AS "o_orderstatus", - t8."O_TOTALPRICE" AS "o_totalprice", - t8."O_ORDERDATE" AS "o_orderdate", - t8."O_ORDERPRIORITY" AS "o_orderpriority", - t8."O_CLERK" AS "o_clerk", - t8."O_SHIPPRIORITY" AS "o_shippriority", - t8."O_COMMENT" AS "o_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS t8 -), t5 AS ( - SELECT - t8."N_NATIONKEY" AS "n_nationkey", - t8."N_NAME" AS "n_name", - t8."N_REGIONKEY" AS "n_regionkey", - t8."N_COMMENT" AS "n_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS t8 -), t6 AS ( - SELECT - t0."l_extendedprice" * ( - 1 - t0."l_discount" - ) - t2."ps_supplycost" * t0."l_quantity" AS "amount", - CAST(DATE_PART(year, t4."o_orderdate") AS SMALLINT) AS "o_year", - t5."n_name" AS "nation", - t3."p_name" AS "p_name" - FROM t0 - JOIN t1 - ON t1."s_suppkey" = t0."l_suppkey" - JOIN t2 - ON t2."ps_suppkey" = t0."l_suppkey" AND t2."ps_partkey" = t0."l_partkey" - JOIN t3 - ON t3."p_partkey" = t0."l_partkey" - JOIN t4 - ON t4."o_orderkey" = t0."l_orderkey" - JOIN t5 - ON t1."s_nationkey" = t5."n_nationkey" - WHERE - t3."p_name" LIKE '%green%' -) SELECT - t7."nation", - t7."o_year", - t7."sum_profit" + "t24"."nation" AS "nation", + "t24"."o_year" AS "o_year", + "t24"."sum_profit" AS "sum_profit" FROM ( SELECT - t6."nation" AS "nation", - t6."o_year" AS "o_year", - SUM(t6."amount") AS "sum_profit" - FROM t6 + "t23"."nation" AS "nation", + "t23"."o_year" AS "o_year", + SUM("t23"."amount") AS "sum_profit" + FROM ( + SELECT + "t22"."amount" AS "amount", + "t22"."o_year" AS "o_year", + "t22"."nation" AS "nation", + "t22"."p_name" AS "p_name" + FROM ( + SELECT + ( + "t6"."l_extendedprice" * ( + 1 - "t6"."l_discount" + ) + ) - ( + "t13"."ps_supplycost" * "t6"."l_quantity" + ) AS "amount", + DATE_PART('year', "t15"."o_orderdate") AS "o_year", + "t16"."n_name" AS "nation", + "t14"."p_name" AS "p_name" + FROM ( + SELECT + "t0"."L_ORDERKEY" AS "l_orderkey", + "t0"."L_PARTKEY" AS "l_partkey", + "t0"."L_SUPPKEY" AS "l_suppkey", + "t0"."L_LINENUMBER" AS "l_linenumber", + "t0"."L_QUANTITY" AS "l_quantity", + "t0"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t0"."L_DISCOUNT" AS "l_discount", + "t0"."L_TAX" AS "l_tax", + "t0"."L_RETURNFLAG" AS "l_returnflag", + "t0"."L_LINESTATUS" AS "l_linestatus", + "t0"."L_SHIPDATE" AS "l_shipdate", + "t0"."L_COMMITDATE" AS "l_commitdate", + "t0"."L_RECEIPTDATE" AS "l_receiptdate", + "t0"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t0"."L_SHIPMODE" AS "l_shipmode", + "t0"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t0" + ) AS "t6" + INNER JOIN ( + SELECT + "t1"."S_SUPPKEY" AS "s_suppkey", + "t1"."S_NAME" AS "s_name", + "t1"."S_ADDRESS" AS "s_address", + "t1"."S_NATIONKEY" AS "s_nationkey", + "t1"."S_PHONE" AS "s_phone", + "t1"."S_ACCTBAL" AS "s_acctbal", + "t1"."S_COMMENT" AS "s_comment" + FROM "SUPPLIER" AS "t1" + ) AS "t12" + ON "t12"."s_suppkey" = "t6"."l_suppkey" + INNER JOIN ( + SELECT + "t2"."PS_PARTKEY" AS "ps_partkey", + "t2"."PS_SUPPKEY" AS "ps_suppkey", + "t2"."PS_AVAILQTY" AS "ps_availqty", + "t2"."PS_SUPPLYCOST" AS "ps_supplycost", + "t2"."PS_COMMENT" AS "ps_comment" + FROM "PARTSUPP" AS "t2" + ) AS "t13" + ON "t13"."ps_suppkey" = "t6"."l_suppkey" AND "t13"."ps_partkey" = "t6"."l_partkey" + INNER JOIN ( + SELECT + "t3"."P_PARTKEY" AS "p_partkey", + "t3"."P_NAME" AS "p_name", + "t3"."P_MFGR" AS "p_mfgr", + "t3"."P_BRAND" AS "p_brand", + "t3"."P_TYPE" AS "p_type", + "t3"."P_SIZE" AS "p_size", + "t3"."P_CONTAINER" AS "p_container", + "t3"."P_RETAILPRICE" AS "p_retailprice", + "t3"."P_COMMENT" AS "p_comment" + FROM "PART" AS "t3" + ) AS "t14" + ON "t14"."p_partkey" = "t6"."l_partkey" + INNER JOIN ( + SELECT + "t4"."O_ORDERKEY" AS "o_orderkey", + "t4"."O_CUSTKEY" AS "o_custkey", + "t4"."O_ORDERSTATUS" AS "o_orderstatus", + "t4"."O_TOTALPRICE" AS "o_totalprice", + "t4"."O_ORDERDATE" AS "o_orderdate", + "t4"."O_ORDERPRIORITY" AS "o_orderpriority", + "t4"."O_CLERK" AS "o_clerk", + "t4"."O_SHIPPRIORITY" AS "o_shippriority", + "t4"."O_COMMENT" AS "o_comment" + FROM "ORDERS" AS "t4" + ) AS "t15" + ON "t15"."o_orderkey" = "t6"."l_orderkey" + INNER JOIN ( + SELECT + "t5"."N_NATIONKEY" AS "n_nationkey", + "t5"."N_NAME" AS "n_name", + "t5"."N_REGIONKEY" AS "n_regionkey", + "t5"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t5" + ) AS "t16" + ON "t12"."s_nationkey" = "t16"."n_nationkey" + ) AS "t22" + WHERE + "t22"."p_name" LIKE '%green%' + ) AS "t23" GROUP BY 1, 2 -) AS t7 +) AS "t24" ORDER BY - t7."nation" ASC, - t7."o_year" DESC \ No newline at end of file + "t24"."nation" ASC, + "t24"."o_year" DESC NULLS LAST \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/duckdb/h10.sql b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/duckdb/h10.sql index c1c8835fd3e8..a08b8198283b 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/duckdb/h10.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/duckdb/h10.sql @@ -1,26 +1,115 @@ -WITH t0 AS ( +SELECT + t13.c_custkey, + t13.c_name, + t13.revenue, + t13.c_acctbal, + t13.n_name, + t13.c_address, + t13.c_phone, + t13.c_comment +FROM ( SELECT - t2.c_custkey AS c_custkey, - t2.c_name AS c_name, - t2.c_acctbal AS c_acctbal, - t5.n_name AS n_name, - t2.c_address AS c_address, - t2.c_phone AS c_phone, - t2.c_comment AS c_comment, - SUM(t4.l_extendedprice * ( - CAST(1 AS TINYINT) - t4.l_discount + t12.c_custkey, + t12.c_name, + t12.c_acctbal, + t12.n_name, + t12.c_address, + t12.c_phone, + t12.c_comment, + SUM(t12.l_extendedprice * ( + CAST(1 AS TINYINT) - t12.l_discount )) AS revenue - FROM main.customer AS t2 - JOIN main.orders AS t3 - ON t2.c_custkey = t3.o_custkey - JOIN main.lineitem AS t4 - ON t4.l_orderkey = t3.o_orderkey - JOIN main.nation AS t5 - ON t2.c_nationkey = t5.n_nationkey - WHERE - t3.o_orderdate >= MAKE_DATE(1993, 10, 1) - AND t3.o_orderdate < MAKE_DATE(1994, 1, 1) - AND t4.l_returnflag = 'R' + FROM ( + SELECT + t11.c_custkey, + t11.c_name, + t11.c_address, + t11.c_nationkey, + t11.c_phone, + t11.c_acctbal, + t11.c_mktsegment, + t11.c_comment, + t11.o_orderkey, + t11.o_custkey, + t11.o_orderstatus, + t11.o_totalprice, + t11.o_orderdate, + t11.o_orderpriority, + t11.o_clerk, + t11.o_shippriority, + t11.o_comment, + t11.l_orderkey, + t11.l_partkey, + t11.l_suppkey, + t11.l_linenumber, + t11.l_quantity, + t11.l_extendedprice, + t11.l_discount, + t11.l_tax, + t11.l_returnflag, + t11.l_linestatus, + t11.l_shipdate, + t11.l_commitdate, + t11.l_receiptdate, + t11.l_shipinstruct, + t11.l_shipmode, + t11.l_comment, + t11.n_nationkey, + t11.n_name, + t11.n_regionkey, + t11.n_comment + FROM ( + SELECT + t4.c_custkey, + t4.c_name, + t4.c_address, + t4.c_nationkey, + t4.c_phone, + t4.c_acctbal, + t4.c_mktsegment, + t4.c_comment, + t5.o_orderkey, + t5.o_custkey, + t5.o_orderstatus, + t5.o_totalprice, + t5.o_orderdate, + t5.o_orderpriority, + t5.o_clerk, + t5.o_shippriority, + t5.o_comment, + t6.l_orderkey, + t6.l_partkey, + t6.l_suppkey, + t6.l_linenumber, + t6.l_quantity, + t6.l_extendedprice, + t6.l_discount, + t6.l_tax, + t6.l_returnflag, + t6.l_linestatus, + t6.l_shipdate, + t6.l_commitdate, + t6.l_receiptdate, + t6.l_shipinstruct, + t6.l_shipmode, + t6.l_comment, + t7.n_nationkey, + t7.n_name, + t7.n_regionkey, + t7.n_comment + FROM customer AS t4 + INNER JOIN orders AS t5 + ON t4.c_custkey = t5.o_custkey + INNER JOIN lineitem AS t6 + ON t6.l_orderkey = t5.o_orderkey + INNER JOIN nation AS t7 + ON t4.c_nationkey = t7.n_nationkey + ) AS t11 + WHERE + t11.o_orderdate >= MAKE_DATE(1993, 10, 1) + AND t11.o_orderdate < MAKE_DATE(1994, 1, 1) + AND t11.l_returnflag = 'R' + ) AS t12 GROUP BY 1, 2, @@ -29,28 +118,7 @@ WITH t0 AS ( 5, 6, 7 -) -SELECT - t1.c_custkey, - t1.c_name, - t1.revenue, - t1.c_acctbal, - t1.n_name, - t1.c_address, - t1.c_phone, - t1.c_comment -FROM ( - SELECT - t0.c_custkey AS c_custkey, - t0.c_name AS c_name, - t0.revenue AS revenue, - t0.c_acctbal AS c_acctbal, - t0.n_name AS n_name, - t0.c_address AS c_address, - t0.c_phone AS c_phone, - t0.c_comment AS c_comment - FROM t0 -) AS t1 +) AS t13 ORDER BY - t1.revenue DESC + t13.revenue DESC LIMIT 20 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/snowflake/h10.sql b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/snowflake/h10.sql index deb4545ccffb..a6f5f97cb8c8 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/snowflake/h10.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/snowflake/h10.sql @@ -1,75 +1,164 @@ -WITH t1 AS ( - SELECT - t6."C_CUSTKEY" AS "c_custkey", - t6."C_NAME" AS "c_name", - t6."C_ADDRESS" AS "c_address", - t6."C_NATIONKEY" AS "c_nationkey", - t6."C_PHONE" AS "c_phone", - t6."C_ACCTBAL" AS "c_acctbal", - t6."C_MKTSEGMENT" AS "c_mktsegment", - t6."C_COMMENT" AS "c_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" AS t6 -), t0 AS ( - SELECT - t6."O_ORDERKEY" AS "o_orderkey", - t6."O_CUSTKEY" AS "o_custkey", - t6."O_ORDERSTATUS" AS "o_orderstatus", - t6."O_TOTALPRICE" AS "o_totalprice", - t6."O_ORDERDATE" AS "o_orderdate", - t6."O_ORDERPRIORITY" AS "o_orderpriority", - t6."O_CLERK" AS "o_clerk", - t6."O_SHIPPRIORITY" AS "o_shippriority", - t6."O_COMMENT" AS "o_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS t6 -), t2 AS ( - SELECT - t6."L_ORDERKEY" AS "l_orderkey", - t6."L_PARTKEY" AS "l_partkey", - t6."L_SUPPKEY" AS "l_suppkey", - t6."L_LINENUMBER" AS "l_linenumber", - t6."L_QUANTITY" AS "l_quantity", - t6."L_EXTENDEDPRICE" AS "l_extendedprice", - t6."L_DISCOUNT" AS "l_discount", - t6."L_TAX" AS "l_tax", - t6."L_RETURNFLAG" AS "l_returnflag", - t6."L_LINESTATUS" AS "l_linestatus", - t6."L_SHIPDATE" AS "l_shipdate", - t6."L_COMMITDATE" AS "l_commitdate", - t6."L_RECEIPTDATE" AS "l_receiptdate", - t6."L_SHIPINSTRUCT" AS "l_shipinstruct", - t6."L_SHIPMODE" AS "l_shipmode", - t6."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t6 -), t3 AS ( - SELECT - t6."N_NATIONKEY" AS "n_nationkey", - t6."N_NAME" AS "n_name", - t6."N_REGIONKEY" AS "n_regionkey", - t6."N_COMMENT" AS "n_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS t6 -), t4 AS ( +SELECT + "t16"."c_custkey" AS "c_custkey", + "t16"."c_name" AS "c_name", + "t16"."revenue" AS "revenue", + "t16"."c_acctbal" AS "c_acctbal", + "t16"."n_name" AS "n_name", + "t16"."c_address" AS "c_address", + "t16"."c_phone" AS "c_phone", + "t16"."c_comment" AS "c_comment" +FROM ( SELECT - t1."c_custkey" AS "c_custkey", - t1."c_name" AS "c_name", - t1."c_acctbal" AS "c_acctbal", - t3."n_name" AS "n_name", - t1."c_address" AS "c_address", - t1."c_phone" AS "c_phone", - t1."c_comment" AS "c_comment", - SUM(t2."l_extendedprice" * ( - 1 - t2."l_discount" + "t15"."c_custkey" AS "c_custkey", + "t15"."c_name" AS "c_name", + "t15"."c_acctbal" AS "c_acctbal", + "t15"."n_name" AS "n_name", + "t15"."c_address" AS "c_address", + "t15"."c_phone" AS "c_phone", + "t15"."c_comment" AS "c_comment", + SUM("t15"."l_extendedprice" * ( + 1 - "t15"."l_discount" )) AS "revenue" - FROM t1 - JOIN t0 - ON t1."c_custkey" = t0."o_custkey" - JOIN t2 - ON t2."l_orderkey" = t0."o_orderkey" - JOIN t3 - ON t1."c_nationkey" = t3."n_nationkey" - WHERE - t0."o_orderdate" >= DATE_FROM_PARTS(1993, 10, 1) - AND t0."o_orderdate" < DATE_FROM_PARTS(1994, 1, 1) - AND t2."l_returnflag" = 'R' + FROM ( + SELECT + "t14"."c_custkey" AS "c_custkey", + "t14"."c_name" AS "c_name", + "t14"."c_address" AS "c_address", + "t14"."c_nationkey" AS "c_nationkey", + "t14"."c_phone" AS "c_phone", + "t14"."c_acctbal" AS "c_acctbal", + "t14"."c_mktsegment" AS "c_mktsegment", + "t14"."c_comment" AS "c_comment", + "t14"."o_orderkey" AS "o_orderkey", + "t14"."o_custkey" AS "o_custkey", + "t14"."o_orderstatus" AS "o_orderstatus", + "t14"."o_totalprice" AS "o_totalprice", + "t14"."o_orderdate" AS "o_orderdate", + "t14"."o_orderpriority" AS "o_orderpriority", + "t14"."o_clerk" AS "o_clerk", + "t14"."o_shippriority" AS "o_shippriority", + "t14"."o_comment" AS "o_comment", + "t14"."l_orderkey" AS "l_orderkey", + "t14"."l_partkey" AS "l_partkey", + "t14"."l_suppkey" AS "l_suppkey", + "t14"."l_linenumber" AS "l_linenumber", + "t14"."l_quantity" AS "l_quantity", + "t14"."l_extendedprice" AS "l_extendedprice", + "t14"."l_discount" AS "l_discount", + "t14"."l_tax" AS "l_tax", + "t14"."l_returnflag" AS "l_returnflag", + "t14"."l_linestatus" AS "l_linestatus", + "t14"."l_shipdate" AS "l_shipdate", + "t14"."l_commitdate" AS "l_commitdate", + "t14"."l_receiptdate" AS "l_receiptdate", + "t14"."l_shipinstruct" AS "l_shipinstruct", + "t14"."l_shipmode" AS "l_shipmode", + "t14"."l_comment" AS "l_comment", + "t14"."n_nationkey" AS "n_nationkey", + "t14"."n_name" AS "n_name", + "t14"."n_regionkey" AS "n_regionkey", + "t14"."n_comment" AS "n_comment" + FROM ( + SELECT + "t4"."c_custkey" AS "c_custkey", + "t4"."c_name" AS "c_name", + "t4"."c_address" AS "c_address", + "t4"."c_nationkey" AS "c_nationkey", + "t4"."c_phone" AS "c_phone", + "t4"."c_acctbal" AS "c_acctbal", + "t4"."c_mktsegment" AS "c_mktsegment", + "t4"."c_comment" AS "c_comment", + "t8"."o_orderkey" AS "o_orderkey", + "t8"."o_custkey" AS "o_custkey", + "t8"."o_orderstatus" AS "o_orderstatus", + "t8"."o_totalprice" AS "o_totalprice", + "t8"."o_orderdate" AS "o_orderdate", + "t8"."o_orderpriority" AS "o_orderpriority", + "t8"."o_clerk" AS "o_clerk", + "t8"."o_shippriority" AS "o_shippriority", + "t8"."o_comment" AS "o_comment", + "t9"."l_orderkey" AS "l_orderkey", + "t9"."l_partkey" AS "l_partkey", + "t9"."l_suppkey" AS "l_suppkey", + "t9"."l_linenumber" AS "l_linenumber", + "t9"."l_quantity" AS "l_quantity", + "t9"."l_extendedprice" AS "l_extendedprice", + "t9"."l_discount" AS "l_discount", + "t9"."l_tax" AS "l_tax", + "t9"."l_returnflag" AS "l_returnflag", + "t9"."l_linestatus" AS "l_linestatus", + "t9"."l_shipdate" AS "l_shipdate", + "t9"."l_commitdate" AS "l_commitdate", + "t9"."l_receiptdate" AS "l_receiptdate", + "t9"."l_shipinstruct" AS "l_shipinstruct", + "t9"."l_shipmode" AS "l_shipmode", + "t9"."l_comment" AS "l_comment", + "t10"."n_nationkey" AS "n_nationkey", + "t10"."n_name" AS "n_name", + "t10"."n_regionkey" AS "n_regionkey", + "t10"."n_comment" AS "n_comment" + FROM ( + SELECT + "t0"."C_CUSTKEY" AS "c_custkey", + "t0"."C_NAME" AS "c_name", + "t0"."C_ADDRESS" AS "c_address", + "t0"."C_NATIONKEY" AS "c_nationkey", + "t0"."C_PHONE" AS "c_phone", + "t0"."C_ACCTBAL" AS "c_acctbal", + "t0"."C_MKTSEGMENT" AS "c_mktsegment", + "t0"."C_COMMENT" AS "c_comment" + FROM "CUSTOMER" AS "t0" + ) AS "t4" + INNER JOIN ( + SELECT + "t1"."O_ORDERKEY" AS "o_orderkey", + "t1"."O_CUSTKEY" AS "o_custkey", + "t1"."O_ORDERSTATUS" AS "o_orderstatus", + "t1"."O_TOTALPRICE" AS "o_totalprice", + "t1"."O_ORDERDATE" AS "o_orderdate", + "t1"."O_ORDERPRIORITY" AS "o_orderpriority", + "t1"."O_CLERK" AS "o_clerk", + "t1"."O_SHIPPRIORITY" AS "o_shippriority", + "t1"."O_COMMENT" AS "o_comment" + FROM "ORDERS" AS "t1" + ) AS "t8" + ON "t4"."c_custkey" = "t8"."o_custkey" + INNER JOIN ( + SELECT + "t2"."L_ORDERKEY" AS "l_orderkey", + "t2"."L_PARTKEY" AS "l_partkey", + "t2"."L_SUPPKEY" AS "l_suppkey", + "t2"."L_LINENUMBER" AS "l_linenumber", + "t2"."L_QUANTITY" AS "l_quantity", + "t2"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t2"."L_DISCOUNT" AS "l_discount", + "t2"."L_TAX" AS "l_tax", + "t2"."L_RETURNFLAG" AS "l_returnflag", + "t2"."L_LINESTATUS" AS "l_linestatus", + "t2"."L_SHIPDATE" AS "l_shipdate", + "t2"."L_COMMITDATE" AS "l_commitdate", + "t2"."L_RECEIPTDATE" AS "l_receiptdate", + "t2"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t2"."L_SHIPMODE" AS "l_shipmode", + "t2"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t2" + ) AS "t9" + ON "t9"."l_orderkey" = "t8"."o_orderkey" + INNER JOIN ( + SELECT + "t3"."N_NATIONKEY" AS "n_nationkey", + "t3"."N_NAME" AS "n_name", + "t3"."N_REGIONKEY" AS "n_regionkey", + "t3"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t3" + ) AS "t10" + ON "t4"."c_nationkey" = "t10"."n_nationkey" + ) AS "t14" + WHERE + "t14"."o_orderdate" >= DATEFROMPARTS(1993, 10, 1) + AND "t14"."o_orderdate" < DATEFROMPARTS(1994, 1, 1) + AND "t14"."l_returnflag" = 'R' + ) AS "t15" GROUP BY 1, 2, @@ -78,28 +167,7 @@ WITH t1 AS ( 5, 6, 7 -) -SELECT - t5."c_custkey", - t5."c_name", - t5."revenue", - t5."c_acctbal", - t5."n_name", - t5."c_address", - t5."c_phone", - t5."c_comment" -FROM ( - SELECT - t4."c_custkey" AS "c_custkey", - t4."c_name" AS "c_name", - t4."revenue" AS "revenue", - t4."c_acctbal" AS "c_acctbal", - t4."n_name" AS "n_name", - t4."c_address" AS "c_address", - t4."c_phone" AS "c_phone", - t4."c_comment" AS "c_comment" - FROM t4 -) AS t5 +) AS "t16" ORDER BY - t5."revenue" DESC + "t16"."revenue" DESC NULLS LAST LIMIT 20 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/duckdb/h11.sql b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/duckdb/h11.sql index c19193299b09..c5d401180d41 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/duckdb/h11.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/duckdb/h11.sql @@ -1,41 +1,109 @@ -WITH t0 AS ( - SELECT - t2.ps_partkey AS ps_partkey, - SUM(t2.ps_supplycost * t2.ps_availqty) AS value - FROM main.partsupp AS t2 - JOIN main.supplier AS t3 - ON t2.ps_suppkey = t3.s_suppkey - JOIN main.nation AS t4 - ON t4.n_nationkey = t3.s_nationkey - WHERE - t4.n_name = 'GERMANY' - GROUP BY - 1 -) SELECT - t1.ps_partkey, - t1.value + t10.ps_partkey, + t10.value FROM ( SELECT - t0.ps_partkey AS ps_partkey, - t0.value AS value - FROM t0 - WHERE - t0.value > ( + t9.ps_partkey, + SUM(t9.ps_supplycost * t9.ps_availqty) AS value + FROM ( + SELECT + t8.ps_partkey, + t8.ps_suppkey, + t8.ps_availqty, + t8.ps_supplycost, + t8.ps_comment, + t8.s_suppkey, + t8.s_name, + t8.s_address, + t8.s_nationkey, + t8.s_phone, + t8.s_acctbal, + t8.s_comment, + t8.n_nationkey, + t8.n_name, + t8.n_regionkey, + t8.n_comment + FROM ( + SELECT + t3.ps_partkey, + t3.ps_suppkey, + t3.ps_availqty, + t3.ps_supplycost, + t3.ps_comment, + t4.s_suppkey, + t4.s_name, + t4.s_address, + t4.s_nationkey, + t4.s_phone, + t4.s_acctbal, + t4.s_comment, + t5.n_nationkey, + t5.n_name, + t5.n_regionkey, + t5.n_comment + FROM partsupp AS t3 + INNER JOIN supplier AS t4 + ON t3.ps_suppkey = t4.s_suppkey + INNER JOIN nation AS t5 + ON t5.n_nationkey = t4.s_nationkey + ) AS t8 + WHERE + t8.n_name = 'GERMANY' + ) AS t9 + GROUP BY + 1 +) AS t10 +WHERE + t10.value > ( + ( SELECT - anon_1.total + SUM(t9.ps_supplycost * t9.ps_availqty) AS "Sum(Multiply(ps_supplycost, ps_availqty))" FROM ( SELECT - SUM(t2.ps_supplycost * t2.ps_availqty) AS total - FROM main.partsupp AS t2 - JOIN main.supplier AS t3 - ON t2.ps_suppkey = t3.s_suppkey - JOIN main.nation AS t4 - ON t4.n_nationkey = t3.s_nationkey + t8.ps_partkey, + t8.ps_suppkey, + t8.ps_availqty, + t8.ps_supplycost, + t8.ps_comment, + t8.s_suppkey, + t8.s_name, + t8.s_address, + t8.s_nationkey, + t8.s_phone, + t8.s_acctbal, + t8.s_comment, + t8.n_nationkey, + t8.n_name, + t8.n_regionkey, + t8.n_comment + FROM ( + SELECT + t3.ps_partkey, + t3.ps_suppkey, + t3.ps_availqty, + t3.ps_supplycost, + t3.ps_comment, + t4.s_suppkey, + t4.s_name, + t4.s_address, + t4.s_nationkey, + t4.s_phone, + t4.s_acctbal, + t4.s_comment, + t5.n_nationkey, + t5.n_name, + t5.n_regionkey, + t5.n_comment + FROM partsupp AS t3 + INNER JOIN supplier AS t4 + ON t3.ps_suppkey = t4.s_suppkey + INNER JOIN nation AS t5 + ON t5.n_nationkey = t4.s_nationkey + ) AS t8 WHERE - t4.n_name = 'GERMANY' - ) AS anon_1 - ) * CAST(0.0001 AS REAL(53)) -) AS t1 + t8.n_name = 'GERMANY' + ) AS t9 + ) * CAST(0.0001 AS DOUBLE) + ) ORDER BY - t1.value DESC \ No newline at end of file + t10.value DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/snowflake/h11.sql b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/snowflake/h11.sql new file mode 100644 index 000000000000..5d311642834a --- /dev/null +++ b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/snowflake/h11.sql @@ -0,0 +1,159 @@ +SELECT + "t18"."ps_partkey" AS "ps_partkey", + "t18"."value" AS "value" +FROM ( + SELECT + "t16"."ps_partkey" AS "ps_partkey", + SUM("t16"."ps_supplycost" * "t16"."ps_availqty") AS "value" + FROM ( + SELECT + "t14"."ps_partkey" AS "ps_partkey", + "t14"."ps_suppkey" AS "ps_suppkey", + "t14"."ps_availqty" AS "ps_availqty", + "t14"."ps_supplycost" AS "ps_supplycost", + "t14"."ps_comment" AS "ps_comment", + "t14"."s_suppkey" AS "s_suppkey", + "t14"."s_name" AS "s_name", + "t14"."s_address" AS "s_address", + "t14"."s_nationkey" AS "s_nationkey", + "t14"."s_phone" AS "s_phone", + "t14"."s_acctbal" AS "s_acctbal", + "t14"."s_comment" AS "s_comment", + "t14"."n_nationkey" AS "n_nationkey", + "t14"."n_name" AS "n_name", + "t14"."n_regionkey" AS "n_regionkey", + "t14"."n_comment" AS "n_comment" + FROM ( + SELECT + "t3"."ps_partkey" AS "ps_partkey", + "t3"."ps_suppkey" AS "ps_suppkey", + "t3"."ps_availqty" AS "ps_availqty", + "t3"."ps_supplycost" AS "ps_supplycost", + "t3"."ps_comment" AS "ps_comment", + "t6"."s_suppkey" AS "s_suppkey", + "t6"."s_name" AS "s_name", + "t6"."s_address" AS "s_address", + "t6"."s_nationkey" AS "s_nationkey", + "t6"."s_phone" AS "s_phone", + "t6"."s_acctbal" AS "s_acctbal", + "t6"."s_comment" AS "s_comment", + "t8"."n_nationkey" AS "n_nationkey", + "t8"."n_name" AS "n_name", + "t8"."n_regionkey" AS "n_regionkey", + "t8"."n_comment" AS "n_comment" + FROM ( + SELECT + "t0"."PS_PARTKEY" AS "ps_partkey", + "t0"."PS_SUPPKEY" AS "ps_suppkey", + "t0"."PS_AVAILQTY" AS "ps_availqty", + "t0"."PS_SUPPLYCOST" AS "ps_supplycost", + "t0"."PS_COMMENT" AS "ps_comment" + FROM "PARTSUPP" AS "t0" + ) AS "t3" + INNER JOIN ( + SELECT + "t1"."S_SUPPKEY" AS "s_suppkey", + "t1"."S_NAME" AS "s_name", + "t1"."S_ADDRESS" AS "s_address", + "t1"."S_NATIONKEY" AS "s_nationkey", + "t1"."S_PHONE" AS "s_phone", + "t1"."S_ACCTBAL" AS "s_acctbal", + "t1"."S_COMMENT" AS "s_comment" + FROM "SUPPLIER" AS "t1" + ) AS "t6" + ON "t3"."ps_suppkey" = "t6"."s_suppkey" + INNER JOIN ( + SELECT + "t2"."N_NATIONKEY" AS "n_nationkey", + "t2"."N_NAME" AS "n_name", + "t2"."N_REGIONKEY" AS "n_regionkey", + "t2"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t2" + ) AS "t8" + ON "t8"."n_nationkey" = "t6"."s_nationkey" + ) AS "t14" + WHERE + "t14"."n_name" = 'GERMANY' + ) AS "t16" + GROUP BY + 1 +) AS "t18" +WHERE + "t18"."value" > ( + ( + SELECT + SUM("t17"."ps_supplycost" * "t17"."ps_availqty") AS "Sum(Multiply(ps_supplycost, ps_availqty))" + FROM ( + SELECT + "t15"."ps_partkey" AS "ps_partkey", + "t15"."ps_suppkey" AS "ps_suppkey", + "t15"."ps_availqty" AS "ps_availqty", + "t15"."ps_supplycost" AS "ps_supplycost", + "t15"."ps_comment" AS "ps_comment", + "t15"."s_suppkey" AS "s_suppkey", + "t15"."s_name" AS "s_name", + "t15"."s_address" AS "s_address", + "t15"."s_nationkey" AS "s_nationkey", + "t15"."s_phone" AS "s_phone", + "t15"."s_acctbal" AS "s_acctbal", + "t15"."s_comment" AS "s_comment", + "t15"."n_nationkey" AS "n_nationkey", + "t15"."n_name" AS "n_name", + "t15"."n_regionkey" AS "n_regionkey", + "t15"."n_comment" AS "n_comment" + FROM ( + SELECT + "t3"."ps_partkey" AS "ps_partkey", + "t3"."ps_suppkey" AS "ps_suppkey", + "t3"."ps_availqty" AS "ps_availqty", + "t3"."ps_supplycost" AS "ps_supplycost", + "t3"."ps_comment" AS "ps_comment", + "t7"."s_suppkey" AS "s_suppkey", + "t7"."s_name" AS "s_name", + "t7"."s_address" AS "s_address", + "t7"."s_nationkey" AS "s_nationkey", + "t7"."s_phone" AS "s_phone", + "t7"."s_acctbal" AS "s_acctbal", + "t7"."s_comment" AS "s_comment", + "t9"."n_nationkey" AS "n_nationkey", + "t9"."n_name" AS "n_name", + "t9"."n_regionkey" AS "n_regionkey", + "t9"."n_comment" AS "n_comment" + FROM ( + SELECT + "t0"."PS_PARTKEY" AS "ps_partkey", + "t0"."PS_SUPPKEY" AS "ps_suppkey", + "t0"."PS_AVAILQTY" AS "ps_availqty", + "t0"."PS_SUPPLYCOST" AS "ps_supplycost", + "t0"."PS_COMMENT" AS "ps_comment" + FROM "PARTSUPP" AS "t0" + ) AS "t3" + INNER JOIN ( + SELECT + "t1"."S_SUPPKEY" AS "s_suppkey", + "t1"."S_NAME" AS "s_name", + "t1"."S_ADDRESS" AS "s_address", + "t1"."S_NATIONKEY" AS "s_nationkey", + "t1"."S_PHONE" AS "s_phone", + "t1"."S_ACCTBAL" AS "s_acctbal", + "t1"."S_COMMENT" AS "s_comment" + FROM "SUPPLIER" AS "t1" + ) AS "t7" + ON "t3"."ps_suppkey" = "t7"."s_suppkey" + INNER JOIN ( + SELECT + "t2"."N_NATIONKEY" AS "n_nationkey", + "t2"."N_NAME" AS "n_name", + "t2"."N_REGIONKEY" AS "n_regionkey", + "t2"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t2" + ) AS "t9" + ON "t9"."n_nationkey" = "t7"."s_nationkey" + ) AS "t15" + WHERE + "t15"."n_name" = 'GERMANY' + ) AS "t17" + ) * 0.0001 + ) +ORDER BY + "t18"."value" DESC NULLS LAST \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/duckdb/h12.sql b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/duckdb/h12.sql index 0542a214407d..1b0c38b528aa 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/duckdb/h12.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/duckdb/h12.sql @@ -1,12 +1,12 @@ SELECT - t0.l_shipmode, - t0.high_line_count, - t0.low_line_count + t7.l_shipmode, + t7.high_line_count, + t7.low_line_count FROM ( SELECT - t2.l_shipmode AS l_shipmode, + t6.l_shipmode, SUM( - CASE t1.o_orderpriority + CASE t6.o_orderpriority WHEN '1-URGENT' THEN CAST(1 AS TINYINT) WHEN '2-HIGH' @@ -15,7 +15,7 @@ FROM ( END ) AS high_line_count, SUM( - CASE t1.o_orderpriority + CASE t6.o_orderpriority WHEN '1-URGENT' THEN CAST(0 AS TINYINT) WHEN '2-HIGH' @@ -23,17 +23,73 @@ FROM ( ELSE CAST(1 AS TINYINT) END ) AS low_line_count - FROM main.orders AS t1 - JOIN main.lineitem AS t2 - ON t1.o_orderkey = t2.l_orderkey - WHERE - t2.l_shipmode IN ('MAIL', 'SHIP') - AND t2.l_commitdate < t2.l_receiptdate - AND t2.l_shipdate < t2.l_commitdate - AND t2.l_receiptdate >= MAKE_DATE(1994, 1, 1) - AND t2.l_receiptdate < MAKE_DATE(1995, 1, 1) + FROM ( + SELECT + t5.o_orderkey, + t5.o_custkey, + t5.o_orderstatus, + t5.o_totalprice, + t5.o_orderdate, + t5.o_orderpriority, + t5.o_clerk, + t5.o_shippriority, + t5.o_comment, + t5.l_orderkey, + t5.l_partkey, + t5.l_suppkey, + t5.l_linenumber, + t5.l_quantity, + t5.l_extendedprice, + t5.l_discount, + t5.l_tax, + t5.l_returnflag, + t5.l_linestatus, + t5.l_shipdate, + t5.l_commitdate, + t5.l_receiptdate, + t5.l_shipinstruct, + t5.l_shipmode, + t5.l_comment + FROM ( + SELECT + t2.o_orderkey, + t2.o_custkey, + t2.o_orderstatus, + t2.o_totalprice, + t2.o_orderdate, + t2.o_orderpriority, + t2.o_clerk, + t2.o_shippriority, + t2.o_comment, + t3.l_orderkey, + t3.l_partkey, + t3.l_suppkey, + t3.l_linenumber, + t3.l_quantity, + t3.l_extendedprice, + t3.l_discount, + t3.l_tax, + t3.l_returnflag, + t3.l_linestatus, + t3.l_shipdate, + t3.l_commitdate, + t3.l_receiptdate, + t3.l_shipinstruct, + t3.l_shipmode, + t3.l_comment + FROM orders AS t2 + INNER JOIN lineitem AS t3 + ON t2.o_orderkey = t3.l_orderkey + ) AS t5 + WHERE + t5.l_shipmode IN ('MAIL', 'SHIP') + AND t5.l_commitdate < t5.l_receiptdate + AND t5.l_shipdate < t5.l_commitdate + AND t5.l_receiptdate >= MAKE_DATE(1994, 1, 1) + AND t5.l_receiptdate < MAKE_DATE(1995, 1, 1) + ) AS t6 GROUP BY 1 -) AS t0 +) AS t7 ORDER BY - t0.l_shipmode ASC \ No newline at end of file + t7.l_shipmode ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/snowflake/h12.sql b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/snowflake/h12.sql index 9bff7b2fc81c..799ef5b12cd0 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/snowflake/h12.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/snowflake/h12.sql @@ -1,55 +1,114 @@ -WITH t1 AS ( - SELECT - t3."O_ORDERKEY" AS "o_orderkey", - t3."O_CUSTKEY" AS "o_custkey", - t3."O_ORDERSTATUS" AS "o_orderstatus", - t3."O_TOTALPRICE" AS "o_totalprice", - t3."O_ORDERDATE" AS "o_orderdate", - t3."O_ORDERPRIORITY" AS "o_orderpriority", - t3."O_CLERK" AS "o_clerk", - t3."O_SHIPPRIORITY" AS "o_shippriority", - t3."O_COMMENT" AS "o_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS t3 -), t0 AS ( - SELECT - t3."L_ORDERKEY" AS "l_orderkey", - t3."L_PARTKEY" AS "l_partkey", - t3."L_SUPPKEY" AS "l_suppkey", - t3."L_LINENUMBER" AS "l_linenumber", - t3."L_QUANTITY" AS "l_quantity", - t3."L_EXTENDEDPRICE" AS "l_extendedprice", - t3."L_DISCOUNT" AS "l_discount", - t3."L_TAX" AS "l_tax", - t3."L_RETURNFLAG" AS "l_returnflag", - t3."L_LINESTATUS" AS "l_linestatus", - t3."L_SHIPDATE" AS "l_shipdate", - t3."L_COMMITDATE" AS "l_commitdate", - t3."L_RECEIPTDATE" AS "l_receiptdate", - t3."L_SHIPINSTRUCT" AS "l_shipinstruct", - t3."L_SHIPMODE" AS "l_shipmode", - t3."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t3 -) SELECT - t2."l_shipmode", - t2."high_line_count", - t2."low_line_count" + "t8"."l_shipmode" AS "l_shipmode", + "t8"."high_line_count" AS "high_line_count", + "t8"."low_line_count" AS "low_line_count" FROM ( SELECT - t0."l_shipmode" AS "l_shipmode", - SUM(CASE t1."o_orderpriority" WHEN '1-URGENT' THEN 1 WHEN '2-HIGH' THEN 1 ELSE 0 END) AS "high_line_count", - SUM(CASE t1."o_orderpriority" WHEN '1-URGENT' THEN 0 WHEN '2-HIGH' THEN 0 ELSE 1 END) AS "low_line_count" - FROM t1 - JOIN t0 - ON t1."o_orderkey" = t0."l_orderkey" - WHERE - t0."l_shipmode" IN ('MAIL', 'SHIP') - AND t0."l_commitdate" < t0."l_receiptdate" - AND t0."l_shipdate" < t0."l_commitdate" - AND t0."l_receiptdate" >= DATE_FROM_PARTS(1994, 1, 1) - AND t0."l_receiptdate" < DATE_FROM_PARTS(1995, 1, 1) + "t7"."l_shipmode" AS "l_shipmode", + SUM( + CASE "t7"."o_orderpriority" WHEN '1-URGENT' THEN 1 WHEN '2-HIGH' THEN 1 ELSE 0 END + ) AS "high_line_count", + SUM( + CASE "t7"."o_orderpriority" WHEN '1-URGENT' THEN 0 WHEN '2-HIGH' THEN 0 ELSE 1 END + ) AS "low_line_count" + FROM ( + SELECT + "t6"."o_orderkey" AS "o_orderkey", + "t6"."o_custkey" AS "o_custkey", + "t6"."o_orderstatus" AS "o_orderstatus", + "t6"."o_totalprice" AS "o_totalprice", + "t6"."o_orderdate" AS "o_orderdate", + "t6"."o_orderpriority" AS "o_orderpriority", + "t6"."o_clerk" AS "o_clerk", + "t6"."o_shippriority" AS "o_shippriority", + "t6"."o_comment" AS "o_comment", + "t6"."l_orderkey" AS "l_orderkey", + "t6"."l_partkey" AS "l_partkey", + "t6"."l_suppkey" AS "l_suppkey", + "t6"."l_linenumber" AS "l_linenumber", + "t6"."l_quantity" AS "l_quantity", + "t6"."l_extendedprice" AS "l_extendedprice", + "t6"."l_discount" AS "l_discount", + "t6"."l_tax" AS "l_tax", + "t6"."l_returnflag" AS "l_returnflag", + "t6"."l_linestatus" AS "l_linestatus", + "t6"."l_shipdate" AS "l_shipdate", + "t6"."l_commitdate" AS "l_commitdate", + "t6"."l_receiptdate" AS "l_receiptdate", + "t6"."l_shipinstruct" AS "l_shipinstruct", + "t6"."l_shipmode" AS "l_shipmode", + "t6"."l_comment" AS "l_comment" + FROM ( + SELECT + "t2"."o_orderkey" AS "o_orderkey", + "t2"."o_custkey" AS "o_custkey", + "t2"."o_orderstatus" AS "o_orderstatus", + "t2"."o_totalprice" AS "o_totalprice", + "t2"."o_orderdate" AS "o_orderdate", + "t2"."o_orderpriority" AS "o_orderpriority", + "t2"."o_clerk" AS "o_clerk", + "t2"."o_shippriority" AS "o_shippriority", + "t2"."o_comment" AS "o_comment", + "t4"."l_orderkey" AS "l_orderkey", + "t4"."l_partkey" AS "l_partkey", + "t4"."l_suppkey" AS "l_suppkey", + "t4"."l_linenumber" AS "l_linenumber", + "t4"."l_quantity" AS "l_quantity", + "t4"."l_extendedprice" AS "l_extendedprice", + "t4"."l_discount" AS "l_discount", + "t4"."l_tax" AS "l_tax", + "t4"."l_returnflag" AS "l_returnflag", + "t4"."l_linestatus" AS "l_linestatus", + "t4"."l_shipdate" AS "l_shipdate", + "t4"."l_commitdate" AS "l_commitdate", + "t4"."l_receiptdate" AS "l_receiptdate", + "t4"."l_shipinstruct" AS "l_shipinstruct", + "t4"."l_shipmode" AS "l_shipmode", + "t4"."l_comment" AS "l_comment" + FROM ( + SELECT + "t0"."O_ORDERKEY" AS "o_orderkey", + "t0"."O_CUSTKEY" AS "o_custkey", + "t0"."O_ORDERSTATUS" AS "o_orderstatus", + "t0"."O_TOTALPRICE" AS "o_totalprice", + "t0"."O_ORDERDATE" AS "o_orderdate", + "t0"."O_ORDERPRIORITY" AS "o_orderpriority", + "t0"."O_CLERK" AS "o_clerk", + "t0"."O_SHIPPRIORITY" AS "o_shippriority", + "t0"."O_COMMENT" AS "o_comment" + FROM "ORDERS" AS "t0" + ) AS "t2" + INNER JOIN ( + SELECT + "t1"."L_ORDERKEY" AS "l_orderkey", + "t1"."L_PARTKEY" AS "l_partkey", + "t1"."L_SUPPKEY" AS "l_suppkey", + "t1"."L_LINENUMBER" AS "l_linenumber", + "t1"."L_QUANTITY" AS "l_quantity", + "t1"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t1"."L_DISCOUNT" AS "l_discount", + "t1"."L_TAX" AS "l_tax", + "t1"."L_RETURNFLAG" AS "l_returnflag", + "t1"."L_LINESTATUS" AS "l_linestatus", + "t1"."L_SHIPDATE" AS "l_shipdate", + "t1"."L_COMMITDATE" AS "l_commitdate", + "t1"."L_RECEIPTDATE" AS "l_receiptdate", + "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t1"."L_SHIPMODE" AS "l_shipmode", + "t1"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t1" + ) AS "t4" + ON "t2"."o_orderkey" = "t4"."l_orderkey" + ) AS "t6" + WHERE + "t6"."l_shipmode" IN ('MAIL', 'SHIP') + AND "t6"."l_commitdate" < "t6"."l_receiptdate" + AND "t6"."l_shipdate" < "t6"."l_commitdate" + AND "t6"."l_receiptdate" >= DATEFROMPARTS(1994, 1, 1) + AND "t6"."l_receiptdate" < DATEFROMPARTS(1995, 1, 1) + ) AS "t7" GROUP BY 1 -) AS t2 +) AS "t8" ORDER BY - t2."l_shipmode" ASC \ No newline at end of file + "t8"."l_shipmode" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/duckdb/h13.sql b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/duckdb/h13.sql index ddc206f3e537..58270b87504b 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/duckdb/h13.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/duckdb/h13.sql @@ -1,24 +1,45 @@ -WITH t0 AS ( - SELECT - t2.c_custkey AS c_custkey, - COUNT(t3.o_orderkey) AS c_count - FROM main.customer AS t2 - LEFT OUTER JOIN main.orders AS t3 - ON t2.c_custkey = t3.o_custkey AND NOT t3.o_comment LIKE '%special%requests%' - GROUP BY - 1 -) SELECT - t1.c_count, - t1.custdist + t7.c_count, + t7.custdist FROM ( SELECT - t0.c_count AS c_count, + t6.c_count, COUNT(*) AS custdist - FROM t0 + FROM ( + SELECT + t5.c_custkey, + COUNT(t5.o_orderkey) AS c_count + FROM ( + SELECT + t2.c_custkey, + t2.c_name, + t2.c_address, + t2.c_nationkey, + t2.c_phone, + t2.c_acctbal, + t2.c_mktsegment, + t2.c_comment, + t3.o_orderkey, + t3.o_custkey, + t3.o_orderstatus, + t3.o_totalprice, + t3.o_orderdate, + t3.o_orderpriority, + t3.o_clerk, + t3.o_shippriority, + t3.o_comment + FROM customer AS t2 + LEFT OUTER JOIN orders AS t3 + ON t2.c_custkey = t3.o_custkey AND NOT ( + t3.o_comment LIKE '%special%requests%' + ) + ) AS t5 + GROUP BY + 1 + ) AS t6 GROUP BY 1 -) AS t1 +) AS t7 ORDER BY - t1.custdist DESC, - t1.c_count DESC \ No newline at end of file + t7.custdist DESC, + t7.c_count DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/snowflake/h13.sql b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/snowflake/h13.sql index 30129ee623a1..fd4edd9dfb77 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/snowflake/h13.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/snowflake/h13.sql @@ -1,47 +1,69 @@ -WITH t1 AS ( - SELECT - t4."C_CUSTKEY" AS "c_custkey", - t4."C_NAME" AS "c_name", - t4."C_ADDRESS" AS "c_address", - t4."C_NATIONKEY" AS "c_nationkey", - t4."C_PHONE" AS "c_phone", - t4."C_ACCTBAL" AS "c_acctbal", - t4."C_MKTSEGMENT" AS "c_mktsegment", - t4."C_COMMENT" AS "c_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" AS t4 -), t0 AS ( - SELECT - t4."O_ORDERKEY" AS "o_orderkey", - t4."O_CUSTKEY" AS "o_custkey", - t4."O_ORDERSTATUS" AS "o_orderstatus", - t4."O_TOTALPRICE" AS "o_totalprice", - t4."O_ORDERDATE" AS "o_orderdate", - t4."O_ORDERPRIORITY" AS "o_orderpriority", - t4."O_CLERK" AS "o_clerk", - t4."O_SHIPPRIORITY" AS "o_shippriority", - t4."O_COMMENT" AS "o_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS t4 -), t2 AS ( - SELECT - t1."c_custkey" AS "c_custkey", - COUNT(t0."o_orderkey") AS "c_count" - FROM t1 - LEFT OUTER JOIN t0 - ON t1."c_custkey" = t0."o_custkey" AND NOT t0."o_comment" LIKE '%special%requests%' - GROUP BY - 1 -) SELECT - t3."c_count", - t3."custdist" + "t8"."c_count" AS "c_count", + "t8"."custdist" AS "custdist" FROM ( SELECT - t2."c_count" AS "c_count", + "t7"."c_count" AS "c_count", COUNT(*) AS "custdist" - FROM t2 + FROM ( + SELECT + "t6"."c_custkey" AS "c_custkey", + COUNT("t6"."o_orderkey") AS "c_count" + FROM ( + SELECT + "t2"."c_custkey" AS "c_custkey", + "t2"."c_name" AS "c_name", + "t2"."c_address" AS "c_address", + "t2"."c_nationkey" AS "c_nationkey", + "t2"."c_phone" AS "c_phone", + "t2"."c_acctbal" AS "c_acctbal", + "t2"."c_mktsegment" AS "c_mktsegment", + "t2"."c_comment" AS "c_comment", + "t4"."o_orderkey" AS "o_orderkey", + "t4"."o_custkey" AS "o_custkey", + "t4"."o_orderstatus" AS "o_orderstatus", + "t4"."o_totalprice" AS "o_totalprice", + "t4"."o_orderdate" AS "o_orderdate", + "t4"."o_orderpriority" AS "o_orderpriority", + "t4"."o_clerk" AS "o_clerk", + "t4"."o_shippriority" AS "o_shippriority", + "t4"."o_comment" AS "o_comment" + FROM ( + SELECT + "t0"."C_CUSTKEY" AS "c_custkey", + "t0"."C_NAME" AS "c_name", + "t0"."C_ADDRESS" AS "c_address", + "t0"."C_NATIONKEY" AS "c_nationkey", + "t0"."C_PHONE" AS "c_phone", + "t0"."C_ACCTBAL" AS "c_acctbal", + "t0"."C_MKTSEGMENT" AS "c_mktsegment", + "t0"."C_COMMENT" AS "c_comment" + FROM "CUSTOMER" AS "t0" + ) AS "t2" + LEFT OUTER JOIN ( + SELECT + "t1"."O_ORDERKEY" AS "o_orderkey", + "t1"."O_CUSTKEY" AS "o_custkey", + "t1"."O_ORDERSTATUS" AS "o_orderstatus", + "t1"."O_TOTALPRICE" AS "o_totalprice", + "t1"."O_ORDERDATE" AS "o_orderdate", + "t1"."O_ORDERPRIORITY" AS "o_orderpriority", + "t1"."O_CLERK" AS "o_clerk", + "t1"."O_SHIPPRIORITY" AS "o_shippriority", + "t1"."O_COMMENT" AS "o_comment" + FROM "ORDERS" AS "t1" + ) AS "t4" + ON "t2"."c_custkey" = "t4"."o_custkey" + AND NOT ( + "t4"."o_comment" LIKE '%special%requests%' + ) + ) AS "t6" + GROUP BY + 1 + ) AS "t7" GROUP BY 1 -) AS t3 +) AS "t8" ORDER BY - t3."custdist" DESC, - t3."c_count" DESC \ No newline at end of file + "t8"."custdist" DESC NULLS LAST, + "t8"."c_count" DESC NULLS LAST \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/duckdb/h14.sql b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/duckdb/h14.sql index 8bfbb135fce7..42d6dbe835b4 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/duckdb/h14.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/duckdb/h14.sql @@ -2,20 +2,74 @@ SELECT ( SUM( CASE - WHEN ( - t1.p_type LIKE 'PROMO%' - ) - THEN t0.l_extendedprice * ( - CAST(1 AS TINYINT) - t0.l_discount + WHEN t6.p_type LIKE 'PROMO%' + THEN t6.l_extendedprice * ( + CAST(1 AS TINYINT) - t6.l_discount ) ELSE CAST(0 AS TINYINT) END ) * CAST(100 AS TINYINT) - ) / SUM(t0.l_extendedprice * ( - CAST(1 AS TINYINT) - t0.l_discount + ) / SUM(t6.l_extendedprice * ( + CAST(1 AS TINYINT) - t6.l_discount )) AS promo_revenue -FROM main.lineitem AS t0 -JOIN main.part AS t1 - ON t0.l_partkey = t1.p_partkey -WHERE - t0.l_shipdate >= MAKE_DATE(1995, 9, 1) AND t0.l_shipdate < MAKE_DATE(1995, 10, 1) \ No newline at end of file +FROM ( + SELECT + t5.l_orderkey, + t5.l_partkey, + t5.l_suppkey, + t5.l_linenumber, + t5.l_quantity, + t5.l_extendedprice, + t5.l_discount, + t5.l_tax, + t5.l_returnflag, + t5.l_linestatus, + t5.l_shipdate, + t5.l_commitdate, + t5.l_receiptdate, + t5.l_shipinstruct, + t5.l_shipmode, + t5.l_comment, + t5.p_partkey, + t5.p_name, + t5.p_mfgr, + t5.p_brand, + t5.p_type, + t5.p_size, + t5.p_container, + t5.p_retailprice, + t5.p_comment + FROM ( + SELECT + t2.l_orderkey, + t2.l_partkey, + t2.l_suppkey, + t2.l_linenumber, + t2.l_quantity, + t2.l_extendedprice, + t2.l_discount, + t2.l_tax, + t2.l_returnflag, + t2.l_linestatus, + t2.l_shipdate, + t2.l_commitdate, + t2.l_receiptdate, + t2.l_shipinstruct, + t2.l_shipmode, + t2.l_comment, + t3.p_partkey, + t3.p_name, + t3.p_mfgr, + t3.p_brand, + t3.p_type, + t3.p_size, + t3.p_container, + t3.p_retailprice, + t3.p_comment + FROM lineitem AS t2 + INNER JOIN part AS t3 + ON t2.l_partkey = t3.p_partkey + ) AS t5 + WHERE + t5.l_shipdate >= MAKE_DATE(1995, 9, 1) AND t5.l_shipdate < MAKE_DATE(1995, 10, 1) +) AS t6 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/snowflake/h14.sql b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/snowflake/h14.sql index 76a5bce247d0..a417e7d42d34 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/snowflake/h14.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/snowflake/h14.sql @@ -1,51 +1,103 @@ -WITH t1 AS ( - SELECT - t3."L_ORDERKEY" AS "l_orderkey", - t3."L_PARTKEY" AS "l_partkey", - t3."L_SUPPKEY" AS "l_suppkey", - t3."L_LINENUMBER" AS "l_linenumber", - t3."L_QUANTITY" AS "l_quantity", - t3."L_EXTENDEDPRICE" AS "l_extendedprice", - t3."L_DISCOUNT" AS "l_discount", - t3."L_TAX" AS "l_tax", - t3."L_RETURNFLAG" AS "l_returnflag", - t3."L_LINESTATUS" AS "l_linestatus", - t3."L_SHIPDATE" AS "l_shipdate", - t3."L_COMMITDATE" AS "l_commitdate", - t3."L_RECEIPTDATE" AS "l_receiptdate", - t3."L_SHIPINSTRUCT" AS "l_shipinstruct", - t3."L_SHIPMODE" AS "l_shipmode", - t3."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t3 -), t0 AS ( - SELECT - t3."P_PARTKEY" AS "p_partkey", - t3."P_NAME" AS "p_name", - t3."P_MFGR" AS "p_mfgr", - t3."P_BRAND" AS "p_brand", - t3."P_TYPE" AS "p_type", - t3."P_SIZE" AS "p_size", - t3."P_CONTAINER" AS "p_container", - t3."P_RETAILPRICE" AS "p_retailprice", - t3."P_COMMENT" AS "p_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS t3 -) SELECT - CAST(t2."promo_revenue" AS DECIMAL(38, 10)) AS "promo_revenue" + ( + SUM( + IFF("t7"."p_type" LIKE 'PROMO%', "t7"."l_extendedprice" * ( + 1 - "t7"."l_discount" + ), 0) + ) * 100 + ) / SUM("t7"."l_extendedprice" * ( + 1 - "t7"."l_discount" + )) AS "promo_revenue" FROM ( SELECT - ( - SUM( - IFF(t0."p_type" LIKE 'PROMO%', t1."l_extendedprice" * ( - 1 - t1."l_discount" - ), 0) - ) * 100 - ) / SUM(t1."l_extendedprice" * ( - 1 - t1."l_discount" - )) AS "promo_revenue" - FROM t1 - JOIN t0 - ON t1."l_partkey" = t0."p_partkey" + "t6"."l_orderkey" AS "l_orderkey", + "t6"."l_partkey" AS "l_partkey", + "t6"."l_suppkey" AS "l_suppkey", + "t6"."l_linenumber" AS "l_linenumber", + "t6"."l_quantity" AS "l_quantity", + "t6"."l_extendedprice" AS "l_extendedprice", + "t6"."l_discount" AS "l_discount", + "t6"."l_tax" AS "l_tax", + "t6"."l_returnflag" AS "l_returnflag", + "t6"."l_linestatus" AS "l_linestatus", + "t6"."l_shipdate" AS "l_shipdate", + "t6"."l_commitdate" AS "l_commitdate", + "t6"."l_receiptdate" AS "l_receiptdate", + "t6"."l_shipinstruct" AS "l_shipinstruct", + "t6"."l_shipmode" AS "l_shipmode", + "t6"."l_comment" AS "l_comment", + "t6"."p_partkey" AS "p_partkey", + "t6"."p_name" AS "p_name", + "t6"."p_mfgr" AS "p_mfgr", + "t6"."p_brand" AS "p_brand", + "t6"."p_type" AS "p_type", + "t6"."p_size" AS "p_size", + "t6"."p_container" AS "p_container", + "t6"."p_retailprice" AS "p_retailprice", + "t6"."p_comment" AS "p_comment" + FROM ( + SELECT + "t2"."l_orderkey" AS "l_orderkey", + "t2"."l_partkey" AS "l_partkey", + "t2"."l_suppkey" AS "l_suppkey", + "t2"."l_linenumber" AS "l_linenumber", + "t2"."l_quantity" AS "l_quantity", + "t2"."l_extendedprice" AS "l_extendedprice", + "t2"."l_discount" AS "l_discount", + "t2"."l_tax" AS "l_tax", + "t2"."l_returnflag" AS "l_returnflag", + "t2"."l_linestatus" AS "l_linestatus", + "t2"."l_shipdate" AS "l_shipdate", + "t2"."l_commitdate" AS "l_commitdate", + "t2"."l_receiptdate" AS "l_receiptdate", + "t2"."l_shipinstruct" AS "l_shipinstruct", + "t2"."l_shipmode" AS "l_shipmode", + "t2"."l_comment" AS "l_comment", + "t4"."p_partkey" AS "p_partkey", + "t4"."p_name" AS "p_name", + "t4"."p_mfgr" AS "p_mfgr", + "t4"."p_brand" AS "p_brand", + "t4"."p_type" AS "p_type", + "t4"."p_size" AS "p_size", + "t4"."p_container" AS "p_container", + "t4"."p_retailprice" AS "p_retailprice", + "t4"."p_comment" AS "p_comment" + FROM ( + SELECT + "t0"."L_ORDERKEY" AS "l_orderkey", + "t0"."L_PARTKEY" AS "l_partkey", + "t0"."L_SUPPKEY" AS "l_suppkey", + "t0"."L_LINENUMBER" AS "l_linenumber", + "t0"."L_QUANTITY" AS "l_quantity", + "t0"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t0"."L_DISCOUNT" AS "l_discount", + "t0"."L_TAX" AS "l_tax", + "t0"."L_RETURNFLAG" AS "l_returnflag", + "t0"."L_LINESTATUS" AS "l_linestatus", + "t0"."L_SHIPDATE" AS "l_shipdate", + "t0"."L_COMMITDATE" AS "l_commitdate", + "t0"."L_RECEIPTDATE" AS "l_receiptdate", + "t0"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t0"."L_SHIPMODE" AS "l_shipmode", + "t0"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t0" + ) AS "t2" + INNER JOIN ( + SELECT + "t1"."P_PARTKEY" AS "p_partkey", + "t1"."P_NAME" AS "p_name", + "t1"."P_MFGR" AS "p_mfgr", + "t1"."P_BRAND" AS "p_brand", + "t1"."P_TYPE" AS "p_type", + "t1"."P_SIZE" AS "p_size", + "t1"."P_CONTAINER" AS "p_container", + "t1"."P_RETAILPRICE" AS "p_retailprice", + "t1"."P_COMMENT" AS "p_comment" + FROM "PART" AS "t1" + ) AS "t4" + ON "t2"."l_partkey" = "t4"."p_partkey" + ) AS "t6" WHERE - t1."l_shipdate" >= '1995-09-01' AND t1."l_shipdate" < '1995-10-01' -) AS t2 \ No newline at end of file + "t6"."l_shipdate" >= DATEFROMPARTS(1995, 9, 1) + AND "t6"."l_shipdate" < DATEFROMPARTS(1995, 10, 1) +) AS "t7" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/duckdb/h15.sql b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/duckdb/h15.sql index 34a990f1423d..afad257dc2f2 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/duckdb/h15.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/duckdb/h15.sql @@ -1,53 +1,103 @@ -WITH t0 AS ( - SELECT - t3.l_suppkey AS l_suppkey, - SUM(t3.l_extendedprice * ( - CAST(1 AS TINYINT) - t3.l_discount - )) AS total_revenue - FROM main.lineitem AS t3 - WHERE - t3.l_shipdate >= MAKE_DATE(1996, 1, 1) AND t3.l_shipdate < MAKE_DATE(1996, 4, 1) - GROUP BY - 1 -), t1 AS ( - SELECT - t3.s_suppkey AS s_suppkey, - t3.s_name AS s_name, - t3.s_address AS s_address, - t3.s_nationkey AS s_nationkey, - t3.s_phone AS s_phone, - t3.s_acctbal AS s_acctbal, - t3.s_comment AS s_comment, - t0.l_suppkey AS l_suppkey, - t0.total_revenue AS total_revenue - FROM main.supplier AS t3 - JOIN t0 - ON t3.s_suppkey = t0.l_suppkey - WHERE - t0.total_revenue = ( - SELECT - MAX(t0.total_revenue) AS "Max(total_revenue)" - FROM t0 - ) -) SELECT - t2.s_suppkey, - t2.s_name, - t2.s_address, - t2.s_phone, - t2.total_revenue + t7.s_suppkey, + t7.s_name, + t7.s_address, + t7.s_phone, + t7.total_revenue FROM ( SELECT - t1.s_suppkey AS s_suppkey, - t1.s_name AS s_name, - t1.s_address AS s_address, - t1.s_nationkey AS s_nationkey, - t1.s_phone AS s_phone, - t1.s_acctbal AS s_acctbal, - t1.s_comment AS s_comment, - t1.l_suppkey AS l_suppkey, - t1.total_revenue AS total_revenue - FROM t1 - ORDER BY - t1.s_suppkey ASC -) AS t2 \ No newline at end of file + t2.s_suppkey, + t2.s_name, + t2.s_address, + t2.s_nationkey, + t2.s_phone, + t2.s_acctbal, + t2.s_comment, + t5.l_suppkey, + t5.total_revenue + FROM supplier AS t2 + INNER JOIN ( + SELECT + t3.l_suppkey, + SUM(t3.l_extendedprice * ( + CAST(1 AS TINYINT) - t3.l_discount + )) AS total_revenue + FROM ( + SELECT + t1.l_orderkey, + t1.l_partkey, + t1.l_suppkey, + t1.l_linenumber, + t1.l_quantity, + t1.l_extendedprice, + t1.l_discount, + t1.l_tax, + t1.l_returnflag, + t1.l_linestatus, + t1.l_shipdate, + t1.l_commitdate, + t1.l_receiptdate, + t1.l_shipinstruct, + t1.l_shipmode, + t1.l_comment + FROM lineitem AS t1 + WHERE + t1.l_shipdate >= MAKE_DATE(1996, 1, 1) AND t1.l_shipdate < MAKE_DATE(1996, 4, 1) + ) AS t3 + GROUP BY + 1 + ) AS t5 + ON t2.s_suppkey = t5.l_suppkey +) AS t7 +WHERE + t7.total_revenue = ( + SELECT + MAX(t7.total_revenue) AS "Max(total_revenue)" + FROM ( + SELECT + t2.s_suppkey, + t2.s_name, + t2.s_address, + t2.s_nationkey, + t2.s_phone, + t2.s_acctbal, + t2.s_comment, + t5.l_suppkey, + t5.total_revenue + FROM supplier AS t2 + INNER JOIN ( + SELECT + t3.l_suppkey, + SUM(t3.l_extendedprice * ( + CAST(1 AS TINYINT) - t3.l_discount + )) AS total_revenue + FROM ( + SELECT + t1.l_orderkey, + t1.l_partkey, + t1.l_suppkey, + t1.l_linenumber, + t1.l_quantity, + t1.l_extendedprice, + t1.l_discount, + t1.l_tax, + t1.l_returnflag, + t1.l_linestatus, + t1.l_shipdate, + t1.l_commitdate, + t1.l_receiptdate, + t1.l_shipinstruct, + t1.l_shipmode, + t1.l_comment + FROM lineitem AS t1 + WHERE + t1.l_shipdate >= MAKE_DATE(1996, 1, 1) AND t1.l_shipdate < MAKE_DATE(1996, 4, 1) + ) AS t3 + GROUP BY + 1 + ) AS t5 + ON t2.s_suppkey = t5.l_suppkey + ) AS t7 + ) +ORDER BY + t7.s_suppkey ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/snowflake/h15.sql b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/snowflake/h15.sql index 1ce09bf25f75..c919360a42d2 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/snowflake/h15.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/snowflake/h15.sql @@ -1,83 +1,125 @@ -WITH t2 AS ( - SELECT - t5."S_SUPPKEY" AS "s_suppkey", - t5."S_NAME" AS "s_name", - t5."S_ADDRESS" AS "s_address", - t5."S_NATIONKEY" AS "s_nationkey", - t5."S_PHONE" AS "s_phone", - t5."S_ACCTBAL" AS "s_acctbal", - t5."S_COMMENT" AS "s_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS t5 -), t0 AS ( - SELECT - t5."L_ORDERKEY" AS "l_orderkey", - t5."L_PARTKEY" AS "l_partkey", - t5."L_SUPPKEY" AS "l_suppkey", - t5."L_LINENUMBER" AS "l_linenumber", - t5."L_QUANTITY" AS "l_quantity", - t5."L_EXTENDEDPRICE" AS "l_extendedprice", - t5."L_DISCOUNT" AS "l_discount", - t5."L_TAX" AS "l_tax", - t5."L_RETURNFLAG" AS "l_returnflag", - t5."L_LINESTATUS" AS "l_linestatus", - t5."L_SHIPDATE" AS "l_shipdate", - t5."L_COMMITDATE" AS "l_commitdate", - t5."L_RECEIPTDATE" AS "l_receiptdate", - t5."L_SHIPINSTRUCT" AS "l_shipinstruct", - t5."L_SHIPMODE" AS "l_shipmode", - t5."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t5 -), t1 AS ( - SELECT - t0."l_suppkey" AS "l_suppkey", - SUM(t0."l_extendedprice" * ( - 1 - t0."l_discount" - )) AS "total_revenue" - FROM t0 - WHERE - t0."l_shipdate" >= DATE_FROM_PARTS(1996, 1, 1) - AND t0."l_shipdate" < DATE_FROM_PARTS(1996, 4, 1) - GROUP BY - 1 -), t3 AS ( - SELECT - t2."s_suppkey" AS "s_suppkey", - t2."s_name" AS "s_name", - t2."s_address" AS "s_address", - t2."s_nationkey" AS "s_nationkey", - t2."s_phone" AS "s_phone", - t2."s_acctbal" AS "s_acctbal", - t2."s_comment" AS "s_comment", - t1."l_suppkey" AS "l_suppkey", - t1."total_revenue" AS "total_revenue" - FROM t2 - JOIN t1 - ON t2."s_suppkey" = t1."l_suppkey" - WHERE - t1."total_revenue" = ( - SELECT - MAX(t1."total_revenue") AS "Max(total_revenue)" - FROM t1 - ) -) SELECT - t4."s_suppkey", - t4."s_name", - t4."s_address", - t4."s_phone", - t4."total_revenue" + "t7"."s_suppkey" AS "s_suppkey", + "t7"."s_name" AS "s_name", + "t7"."s_address" AS "s_address", + "t7"."s_phone" AS "s_phone", + "t7"."total_revenue" AS "total_revenue" FROM ( SELECT - t3."s_suppkey" AS "s_suppkey", - t3."s_name" AS "s_name", - t3."s_address" AS "s_address", - t3."s_nationkey" AS "s_nationkey", - t3."s_phone" AS "s_phone", - t3."s_acctbal" AS "s_acctbal", - t3."s_comment" AS "s_comment", - t3."l_suppkey" AS "l_suppkey", - t3."total_revenue" AS "total_revenue" - FROM t3 - ORDER BY - t3."s_suppkey" ASC -) AS t4 \ No newline at end of file + "t2"."s_suppkey" AS "s_suppkey", + "t2"."s_name" AS "s_name", + "t2"."s_address" AS "s_address", + "t2"."s_nationkey" AS "s_nationkey", + "t2"."s_phone" AS "s_phone", + "t2"."s_acctbal" AS "s_acctbal", + "t2"."s_comment" AS "s_comment", + "t5"."l_suppkey" AS "l_suppkey", + "t5"."total_revenue" AS "total_revenue" + FROM ( + SELECT + "t0"."S_SUPPKEY" AS "s_suppkey", + "t0"."S_NAME" AS "s_name", + "t0"."S_ADDRESS" AS "s_address", + "t0"."S_NATIONKEY" AS "s_nationkey", + "t0"."S_PHONE" AS "s_phone", + "t0"."S_ACCTBAL" AS "s_acctbal", + "t0"."S_COMMENT" AS "s_comment" + FROM "SUPPLIER" AS "t0" + ) AS "t2" + INNER JOIN ( + SELECT + "t3"."l_suppkey" AS "l_suppkey", + SUM("t3"."l_extendedprice" * ( + 1 - "t3"."l_discount" + )) AS "total_revenue" + FROM ( + SELECT + "t1"."L_ORDERKEY" AS "l_orderkey", + "t1"."L_PARTKEY" AS "l_partkey", + "t1"."L_SUPPKEY" AS "l_suppkey", + "t1"."L_LINENUMBER" AS "l_linenumber", + "t1"."L_QUANTITY" AS "l_quantity", + "t1"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t1"."L_DISCOUNT" AS "l_discount", + "t1"."L_TAX" AS "l_tax", + "t1"."L_RETURNFLAG" AS "l_returnflag", + "t1"."L_LINESTATUS" AS "l_linestatus", + "t1"."L_SHIPDATE" AS "l_shipdate", + "t1"."L_COMMITDATE" AS "l_commitdate", + "t1"."L_RECEIPTDATE" AS "l_receiptdate", + "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t1"."L_SHIPMODE" AS "l_shipmode", + "t1"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t1" + WHERE + "t1"."L_SHIPDATE" >= DATEFROMPARTS(1996, 1, 1) + AND "t1"."L_SHIPDATE" < DATEFROMPARTS(1996, 4, 1) + ) AS "t3" + GROUP BY + 1 + ) AS "t5" + ON "t2"."s_suppkey" = "t5"."l_suppkey" +) AS "t7" +WHERE + "t7"."total_revenue" = ( + SELECT + MAX("t7"."total_revenue") AS "Max(total_revenue)" + FROM ( + SELECT + "t2"."s_suppkey" AS "s_suppkey", + "t2"."s_name" AS "s_name", + "t2"."s_address" AS "s_address", + "t2"."s_nationkey" AS "s_nationkey", + "t2"."s_phone" AS "s_phone", + "t2"."s_acctbal" AS "s_acctbal", + "t2"."s_comment" AS "s_comment", + "t5"."l_suppkey" AS "l_suppkey", + "t5"."total_revenue" AS "total_revenue" + FROM ( + SELECT + "t0"."S_SUPPKEY" AS "s_suppkey", + "t0"."S_NAME" AS "s_name", + "t0"."S_ADDRESS" AS "s_address", + "t0"."S_NATIONKEY" AS "s_nationkey", + "t0"."S_PHONE" AS "s_phone", + "t0"."S_ACCTBAL" AS "s_acctbal", + "t0"."S_COMMENT" AS "s_comment" + FROM "SUPPLIER" AS "t0" + ) AS "t2" + INNER JOIN ( + SELECT + "t3"."l_suppkey" AS "l_suppkey", + SUM("t3"."l_extendedprice" * ( + 1 - "t3"."l_discount" + )) AS "total_revenue" + FROM ( + SELECT + "t1"."L_ORDERKEY" AS "l_orderkey", + "t1"."L_PARTKEY" AS "l_partkey", + "t1"."L_SUPPKEY" AS "l_suppkey", + "t1"."L_LINENUMBER" AS "l_linenumber", + "t1"."L_QUANTITY" AS "l_quantity", + "t1"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t1"."L_DISCOUNT" AS "l_discount", + "t1"."L_TAX" AS "l_tax", + "t1"."L_RETURNFLAG" AS "l_returnflag", + "t1"."L_LINESTATUS" AS "l_linestatus", + "t1"."L_SHIPDATE" AS "l_shipdate", + "t1"."L_COMMITDATE" AS "l_commitdate", + "t1"."L_RECEIPTDATE" AS "l_receiptdate", + "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t1"."L_SHIPMODE" AS "l_shipmode", + "t1"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t1" + WHERE + "t1"."L_SHIPDATE" >= DATEFROMPARTS(1996, 1, 1) + AND "t1"."L_SHIPDATE" < DATEFROMPARTS(1996, 4, 1) + ) AS "t3" + GROUP BY + 1 + ) AS "t5" + ON "t2"."s_suppkey" = "t5"."l_suppkey" + ) AS "t7" + ) +ORDER BY + "t7"."s_suppkey" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/duckdb/h16.sql b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/duckdb/h16.sql index 5f9ebafc8322..711276ec20dd 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/duckdb/h16.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/duckdb/h16.sql @@ -1,47 +1,73 @@ SELECT - t0.p_brand, - t0.p_type, - t0.p_size, - t0.supplier_cnt + t9.p_brand, + t9.p_type, + t9.p_size, + t9.supplier_cnt FROM ( SELECT - t2.p_brand AS p_brand, - t2.p_type AS p_type, - t2.p_size AS p_size, - COUNT(DISTINCT t1.ps_suppkey) AS supplier_cnt - FROM main.partsupp AS t1 - JOIN main.part AS t2 - ON t2.p_partkey = t1.ps_partkey - WHERE - t2.p_brand <> 'Brand#45' - AND NOT t2.p_type LIKE 'MEDIUM POLISHED%' - AND t2.p_size IN (CAST(49 AS TINYINT), CAST(14 AS TINYINT), CAST(23 AS TINYINT), CAST(45 AS TINYINT), CAST(19 AS TINYINT), CAST(3 AS TINYINT), CAST(36 AS TINYINT), CAST(9 AS TINYINT)) - AND ( - NOT t1.ps_suppkey IN ( - SELECT - t3.s_suppkey - FROM ( + t8.p_brand, + t8.p_type, + t8.p_size, + COUNT(DISTINCT t8.ps_suppkey) AS supplier_cnt + FROM ( + SELECT + t7.ps_partkey, + t7.ps_suppkey, + t7.ps_availqty, + t7.ps_supplycost, + t7.ps_comment, + t7.p_partkey, + t7.p_name, + t7.p_mfgr, + t7.p_brand, + t7.p_type, + t7.p_size, + t7.p_container, + t7.p_retailprice, + t7.p_comment + FROM ( + SELECT + t3.ps_partkey, + t3.ps_suppkey, + t3.ps_availqty, + t3.ps_supplycost, + t3.ps_comment, + t4.p_partkey, + t4.p_name, + t4.p_mfgr, + t4.p_brand, + t4.p_type, + t4.p_size, + t4.p_container, + t4.p_retailprice, + t4.p_comment + FROM partsupp AS t3 + INNER JOIN part AS t4 + ON t4.p_partkey = t3.ps_partkey + ) AS t7 + WHERE + t7.p_brand <> 'Brand#45' + AND NOT ( + t7.p_type LIKE 'MEDIUM POLISHED%' + ) + AND t7.p_size IN (CAST(49 AS TINYINT), CAST(14 AS TINYINT), CAST(23 AS TINYINT), CAST(45 AS TINYINT), CAST(19 AS TINYINT), CAST(3 AS TINYINT), CAST(36 AS TINYINT), CAST(9 AS TINYINT)) + AND NOT ( + t7.ps_suppkey IN ( SELECT - t4.s_suppkey AS s_suppkey, - t4.s_name AS s_name, - t4.s_address AS s_address, - t4.s_nationkey AS s_nationkey, - t4.s_phone AS s_phone, - t4.s_acctbal AS s_acctbal, - t4.s_comment AS s_comment - FROM main.supplier AS t4 + t2.s_suppkey + FROM supplier AS t2 WHERE - t4.s_comment LIKE '%Customer%Complaints%' - ) AS t3 + t2.s_comment LIKE '%Customer%Complaints%' + ) ) - ) + ) AS t8 GROUP BY 1, 2, 3 -) AS t0 +) AS t9 ORDER BY - t0.supplier_cnt DESC, - t0.p_brand ASC, - t0.p_type ASC, - t0.p_size ASC \ No newline at end of file + t9.supplier_cnt DESC, + t9.p_brand ASC, + t9.p_type ASC, + t9.p_size ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/snowflake/h16.sql b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/snowflake/h16.sql index da9166dcf9f0..6ebdf27930bf 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/snowflake/h16.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/snowflake/h16.sql @@ -1,78 +1,93 @@ -WITH t1 AS ( - SELECT - t4."PS_PARTKEY" AS "ps_partkey", - t4."PS_SUPPKEY" AS "ps_suppkey", - t4."PS_AVAILQTY" AS "ps_availqty", - t4."PS_SUPPLYCOST" AS "ps_supplycost", - t4."PS_COMMENT" AS "ps_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PARTSUPP" AS t4 -), t2 AS ( - SELECT - t4."P_PARTKEY" AS "p_partkey", - t4."P_NAME" AS "p_name", - t4."P_MFGR" AS "p_mfgr", - t4."P_BRAND" AS "p_brand", - t4."P_TYPE" AS "p_type", - t4."P_SIZE" AS "p_size", - t4."P_CONTAINER" AS "p_container", - t4."P_RETAILPRICE" AS "p_retailprice", - t4."P_COMMENT" AS "p_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS t4 -), t0 AS ( - SELECT - t4."S_SUPPKEY" AS "s_suppkey", - t4."S_NAME" AS "s_name", - t4."S_ADDRESS" AS "s_address", - t4."S_NATIONKEY" AS "s_nationkey", - t4."S_PHONE" AS "s_phone", - t4."S_ACCTBAL" AS "s_acctbal", - t4."S_COMMENT" AS "s_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS t4 -) SELECT - t3."p_brand", - t3."p_type", - t3."p_size", - t3."supplier_cnt" + "t10"."p_brand" AS "p_brand", + "t10"."p_type" AS "p_type", + "t10"."p_size" AS "p_size", + "t10"."supplier_cnt" AS "supplier_cnt" FROM ( SELECT - t2."p_brand" AS "p_brand", - t2."p_type" AS "p_type", - t2."p_size" AS "p_size", - COUNT(DISTINCT t1."ps_suppkey") AS "supplier_cnt" - FROM t1 - JOIN t2 - ON t2."p_partkey" = t1."ps_partkey" - WHERE - t2."p_brand" <> 'Brand#45' - AND NOT t2."p_type" LIKE 'MEDIUM POLISHED%' - AND t2."p_size" IN (49, 14, 23, 45, 19, 3, 36, 9) - AND ( - NOT t1."ps_suppkey" IN ( + "t9"."p_brand" AS "p_brand", + "t9"."p_type" AS "p_type", + "t9"."p_size" AS "p_size", + COUNT(DISTINCT "t9"."ps_suppkey") AS "supplier_cnt" + FROM ( + SELECT + "t8"."ps_partkey" AS "ps_partkey", + "t8"."ps_suppkey" AS "ps_suppkey", + "t8"."ps_availqty" AS "ps_availqty", + "t8"."ps_supplycost" AS "ps_supplycost", + "t8"."ps_comment" AS "ps_comment", + "t8"."p_partkey" AS "p_partkey", + "t8"."p_name" AS "p_name", + "t8"."p_mfgr" AS "p_mfgr", + "t8"."p_brand" AS "p_brand", + "t8"."p_type" AS "p_type", + "t8"."p_size" AS "p_size", + "t8"."p_container" AS "p_container", + "t8"."p_retailprice" AS "p_retailprice", + "t8"."p_comment" AS "p_comment" + FROM ( + SELECT + "t3"."ps_partkey" AS "ps_partkey", + "t3"."ps_suppkey" AS "ps_suppkey", + "t3"."ps_availqty" AS "ps_availqty", + "t3"."ps_supplycost" AS "ps_supplycost", + "t3"."ps_comment" AS "ps_comment", + "t6"."p_partkey" AS "p_partkey", + "t6"."p_name" AS "p_name", + "t6"."p_mfgr" AS "p_mfgr", + "t6"."p_brand" AS "p_brand", + "t6"."p_type" AS "p_type", + "t6"."p_size" AS "p_size", + "t6"."p_container" AS "p_container", + "t6"."p_retailprice" AS "p_retailprice", + "t6"."p_comment" AS "p_comment" + FROM ( SELECT - t4."s_suppkey" - FROM ( + "t0"."PS_PARTKEY" AS "ps_partkey", + "t0"."PS_SUPPKEY" AS "ps_suppkey", + "t0"."PS_AVAILQTY" AS "ps_availqty", + "t0"."PS_SUPPLYCOST" AS "ps_supplycost", + "t0"."PS_COMMENT" AS "ps_comment" + FROM "PARTSUPP" AS "t0" + ) AS "t3" + INNER JOIN ( + SELECT + "t2"."P_PARTKEY" AS "p_partkey", + "t2"."P_NAME" AS "p_name", + "t2"."P_MFGR" AS "p_mfgr", + "t2"."P_BRAND" AS "p_brand", + "t2"."P_TYPE" AS "p_type", + "t2"."P_SIZE" AS "p_size", + "t2"."P_CONTAINER" AS "p_container", + "t2"."P_RETAILPRICE" AS "p_retailprice", + "t2"."P_COMMENT" AS "p_comment" + FROM "PART" AS "t2" + ) AS "t6" + ON "t6"."p_partkey" = "t3"."ps_partkey" + ) AS "t8" + WHERE + "t8"."p_brand" <> 'Brand#45' + AND NOT ( + "t8"."p_type" LIKE 'MEDIUM POLISHED%' + ) + AND "t8"."p_size" IN (49, 14, 23, 45, 19, 3, 36, 9) + AND NOT ( + "t8"."ps_suppkey" IN (( SELECT - t0."s_suppkey" AS "s_suppkey", - t0."s_name" AS "s_name", - t0."s_address" AS "s_address", - t0."s_nationkey" AS "s_nationkey", - t0."s_phone" AS "s_phone", - t0."s_acctbal" AS "s_acctbal", - t0."s_comment" AS "s_comment" - FROM t0 + "t1"."S_SUPPKEY" AS "s_suppkey" + FROM "SUPPLIER" AS "t1" WHERE - t0."s_comment" LIKE '%Customer%Complaints%' - ) AS t4 + "t1"."S_COMMENT" LIKE '%Customer%Complaints%' + )) ) - ) + ) AS "t9" GROUP BY 1, 2, 3 -) AS t3 +) AS "t10" ORDER BY - t3."supplier_cnt" DESC, - t3."p_brand" ASC, - t3."p_type" ASC, - t3."p_size" ASC \ No newline at end of file + "t10"."supplier_cnt" DESC NULLS LAST, + "t10"."p_brand" ASC, + "t10"."p_type" ASC, + "t10"."p_size" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/duckdb/h17.sql b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/duckdb/h17.sql index 4e9c6e9f6da4..905e5c095d3d 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/duckdb/h17.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/duckdb/h17.sql @@ -1,15 +1,92 @@ SELECT - SUM(t0.l_extendedprice) / CAST(7.0 AS REAL(53)) AS avg_yearly -FROM main.lineitem AS t0 -JOIN main.part AS t1 - ON t1.p_partkey = t0.l_partkey -WHERE - t1.p_brand = 'Brand#23' - AND t1.p_container = 'MED BOX' - AND t0.l_quantity < ( + SUM(t8.l_extendedprice) / CAST(7.0 AS DOUBLE) AS avg_yearly +FROM ( + SELECT + t5.l_orderkey, + t5.l_partkey, + t5.l_suppkey, + t5.l_linenumber, + t5.l_quantity, + t5.l_extendedprice, + t5.l_discount, + t5.l_tax, + t5.l_returnflag, + t5.l_linestatus, + t5.l_shipdate, + t5.l_commitdate, + t5.l_receiptdate, + t5.l_shipinstruct, + t5.l_shipmode, + t5.l_comment, + t5.p_partkey, + t5.p_name, + t5.p_mfgr, + t5.p_brand, + t5.p_type, + t5.p_size, + t5.p_container, + t5.p_retailprice, + t5.p_comment + FROM ( SELECT - AVG(t0.l_quantity) AS "Mean(l_quantity)" - FROM main.lineitem AS t0 - WHERE - t0.l_partkey = t1.p_partkey - ) * CAST(0.2 AS REAL(53)) \ No newline at end of file + t2.l_orderkey, + t2.l_partkey, + t2.l_suppkey, + t2.l_linenumber, + t2.l_quantity, + t2.l_extendedprice, + t2.l_discount, + t2.l_tax, + t2.l_returnflag, + t2.l_linestatus, + t2.l_shipdate, + t2.l_commitdate, + t2.l_receiptdate, + t2.l_shipinstruct, + t2.l_shipmode, + t2.l_comment, + t3.p_partkey, + t3.p_name, + t3.p_mfgr, + t3.p_brand, + t3.p_type, + t3.p_size, + t3.p_container, + t3.p_retailprice, + t3.p_comment + FROM lineitem AS t2 + INNER JOIN part AS t3 + ON t3.p_partkey = t2.l_partkey + ) AS t5 + WHERE + t5.p_brand = 'Brand#23' + AND t5.p_container = 'MED BOX' + AND t5.l_quantity < ( + ( + SELECT + AVG(t6.l_quantity) AS "Mean(l_quantity)" + FROM ( + SELECT + t0.l_orderkey, + t0.l_partkey, + t0.l_suppkey, + t0.l_linenumber, + t0.l_quantity, + t0.l_extendedprice, + t0.l_discount, + t0.l_tax, + t0.l_returnflag, + t0.l_linestatus, + t0.l_shipdate, + t0.l_commitdate, + t0.l_receiptdate, + t0.l_shipinstruct, + t0.l_shipmode, + t0.l_comment + FROM lineitem AS t0 + WHERE + t0.l_partkey = t5.p_partkey + ) AS t6 + ) * CAST(0.2 AS DOUBLE) + ) +) AS t8 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/snowflake/h17.sql b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/snowflake/h17.sql index 890b8e22fc2e..6bd68abfdee8 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/snowflake/h17.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/snowflake/h17.sql @@ -1,51 +1,111 @@ -WITH t0 AS ( - SELECT - t3."L_ORDERKEY" AS "l_orderkey", - t3."L_PARTKEY" AS "l_partkey", - t3."L_SUPPKEY" AS "l_suppkey", - t3."L_LINENUMBER" AS "l_linenumber", - t3."L_QUANTITY" AS "l_quantity", - t3."L_EXTENDEDPRICE" AS "l_extendedprice", - t3."L_DISCOUNT" AS "l_discount", - t3."L_TAX" AS "l_tax", - t3."L_RETURNFLAG" AS "l_returnflag", - t3."L_LINESTATUS" AS "l_linestatus", - t3."L_SHIPDATE" AS "l_shipdate", - t3."L_COMMITDATE" AS "l_commitdate", - t3."L_RECEIPTDATE" AS "l_receiptdate", - t3."L_SHIPINSTRUCT" AS "l_shipinstruct", - t3."L_SHIPMODE" AS "l_shipmode", - t3."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t3 -), t1 AS ( - SELECT - t3."P_PARTKEY" AS "p_partkey", - t3."P_NAME" AS "p_name", - t3."P_MFGR" AS "p_mfgr", - t3."P_BRAND" AS "p_brand", - t3."P_TYPE" AS "p_type", - t3."P_SIZE" AS "p_size", - t3."P_CONTAINER" AS "p_container", - t3."P_RETAILPRICE" AS "p_retailprice", - t3."P_COMMENT" AS "p_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS t3 -) SELECT - CAST(t2."avg_yearly" AS DECIMAL(30, 8)) AS "avg_yearly" + SUM("t8"."l_extendedprice") / 7.0 AS "avg_yearly" FROM ( SELECT - SUM(t0."l_extendedprice") / 7.0 AS "avg_yearly" - FROM t0 - JOIN t1 - ON t1."p_partkey" = t0."l_partkey" - WHERE - t1."p_brand" = 'Brand#23' - AND t1."p_container" = 'MED BOX' - AND t0."l_quantity" < ( + * + FROM ( + SELECT + "t2"."l_orderkey" AS "l_orderkey", + "t2"."l_partkey" AS "l_partkey", + "t2"."l_suppkey" AS "l_suppkey", + "t2"."l_linenumber" AS "l_linenumber", + "t2"."l_quantity" AS "l_quantity", + "t2"."l_extendedprice" AS "l_extendedprice", + "t2"."l_discount" AS "l_discount", + "t2"."l_tax" AS "l_tax", + "t2"."l_returnflag" AS "l_returnflag", + "t2"."l_linestatus" AS "l_linestatus", + "t2"."l_shipdate" AS "l_shipdate", + "t2"."l_commitdate" AS "l_commitdate", + "t2"."l_receiptdate" AS "l_receiptdate", + "t2"."l_shipinstruct" AS "l_shipinstruct", + "t2"."l_shipmode" AS "l_shipmode", + "t2"."l_comment" AS "l_comment", + "t3"."p_partkey" AS "p_partkey", + "t3"."p_name" AS "p_name", + "t3"."p_mfgr" AS "p_mfgr", + "t3"."p_brand" AS "p_brand", + "t3"."p_type" AS "p_type", + "t3"."p_size" AS "p_size", + "t3"."p_container" AS "p_container", + "t3"."p_retailprice" AS "p_retailprice", + "t3"."p_comment" AS "p_comment" + FROM ( SELECT - AVG(t0."l_quantity") AS "Mean(l_quantity)" - FROM t0 - WHERE - t0."l_partkey" = t1."p_partkey" - ) * 0.2 -) AS t2 \ No newline at end of file + "t0"."L_ORDERKEY" AS "l_orderkey", + "t0"."L_PARTKEY" AS "l_partkey", + "t0"."L_SUPPKEY" AS "l_suppkey", + "t0"."L_LINENUMBER" AS "l_linenumber", + "t0"."L_QUANTITY" AS "l_quantity", + "t0"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t0"."L_DISCOUNT" AS "l_discount", + "t0"."L_TAX" AS "l_tax", + "t0"."L_RETURNFLAG" AS "l_returnflag", + "t0"."L_LINESTATUS" AS "l_linestatus", + "t0"."L_SHIPDATE" AS "l_shipdate", + "t0"."L_COMMITDATE" AS "l_commitdate", + "t0"."L_RECEIPTDATE" AS "l_receiptdate", + "t0"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t0"."L_SHIPMODE" AS "l_shipmode", + "t0"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t0" + ) AS "t2" + INNER JOIN ( + SELECT + "t1"."P_PARTKEY" AS "p_partkey", + "t1"."P_NAME" AS "p_name", + "t1"."P_MFGR" AS "p_mfgr", + "t1"."P_BRAND" AS "p_brand", + "t1"."P_TYPE" AS "p_type", + "t1"."P_SIZE" AS "p_size", + "t1"."P_CONTAINER" AS "p_container", + "t1"."P_RETAILPRICE" AS "p_retailprice", + "t1"."P_COMMENT" AS "p_comment" + FROM "PART" AS "t1" + ) AS "t3" + ON "t3"."p_partkey" = "t2"."l_partkey" + ) AS "t5" + WHERE + ( + "t5"."p_brand" = 'Brand#23' + ) + AND ( + "t5"."p_container" = 'MED BOX' + ) + AND ( + "t5"."l_quantity" < ( + ( + SELECT + AVG("t6"."l_quantity") AS "Mean(l_quantity)" + FROM ( + SELECT + * + FROM ( + SELECT + "t0"."L_ORDERKEY" AS "l_orderkey", + "t0"."L_PARTKEY" AS "l_partkey", + "t0"."L_SUPPKEY" AS "l_suppkey", + "t0"."L_LINENUMBER" AS "l_linenumber", + "t0"."L_QUANTITY" AS "l_quantity", + "t0"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t0"."L_DISCOUNT" AS "l_discount", + "t0"."L_TAX" AS "l_tax", + "t0"."L_RETURNFLAG" AS "l_returnflag", + "t0"."L_LINESTATUS" AS "l_linestatus", + "t0"."L_SHIPDATE" AS "l_shipdate", + "t0"."L_COMMITDATE" AS "l_commitdate", + "t0"."L_RECEIPTDATE" AS "l_receiptdate", + "t0"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t0"."L_SHIPMODE" AS "l_shipmode", + "t0"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t0" + ) AS "t2" + WHERE + ( + "t2"."l_partkey" = "t5"."p_partkey" + ) + ) AS "t6" + ) * 0.2 + ) + ) +) AS "t8" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/duckdb/h18.sql b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/duckdb/h18.sql index 1f50d0e70368..9d3d4f821010 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/duckdb/h18.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/duckdb/h18.sql @@ -1,52 +1,118 @@ -WITH t0 AS ( - SELECT - t2.l_orderkey AS l_orderkey, - SUM(t2.l_quantity) AS qty_sum - FROM main.lineitem AS t2 - GROUP BY - 1 -) SELECT - t1.c_name, - t1.c_custkey, - t1.o_orderkey, - t1.o_orderdate, - t1.o_totalprice, - t1.sum_qty + t12.c_name, + t12.c_custkey, + t12.o_orderkey, + t12.o_orderdate, + t12.o_totalprice, + t12.sum_qty FROM ( SELECT - t2.c_name AS c_name, - t2.c_custkey AS c_custkey, - t3.o_orderkey AS o_orderkey, - t3.o_orderdate AS o_orderdate, - t3.o_totalprice AS o_totalprice, - SUM(t4.l_quantity) AS sum_qty - FROM main.customer AS t2 - JOIN main.orders AS t3 - ON t2.c_custkey = t3.o_custkey - JOIN main.lineitem AS t4 - ON t3.o_orderkey = t4.l_orderkey - WHERE - t3.o_orderkey IN ( + t11.c_name, + t11.c_custkey, + t11.o_orderkey, + t11.o_orderdate, + t11.o_totalprice, + SUM(t11.l_quantity) AS sum_qty + FROM ( + SELECT + t9.c_custkey, + t9.c_name, + t9.c_address, + t9.c_nationkey, + t9.c_phone, + t9.c_acctbal, + t9.c_mktsegment, + t9.c_comment, + t9.o_orderkey, + t9.o_custkey, + t9.o_orderstatus, + t9.o_totalprice, + t9.o_orderdate, + t9.o_orderpriority, + t9.o_clerk, + t9.o_shippriority, + t9.o_comment, + t9.l_orderkey, + t9.l_partkey, + t9.l_suppkey, + t9.l_linenumber, + t9.l_quantity, + t9.l_extendedprice, + t9.l_discount, + t9.l_tax, + t9.l_returnflag, + t9.l_linestatus, + t9.l_shipdate, + t9.l_commitdate, + t9.l_receiptdate, + t9.l_shipinstruct, + t9.l_shipmode, + t9.l_comment + FROM ( SELECT - t5.l_orderkey - FROM ( + t3.c_custkey, + t3.c_name, + t3.c_address, + t3.c_nationkey, + t3.c_phone, + t3.c_acctbal, + t3.c_mktsegment, + t3.c_comment, + t4.o_orderkey, + t4.o_custkey, + t4.o_orderstatus, + t4.o_totalprice, + t4.o_orderdate, + t4.o_orderpriority, + t4.o_clerk, + t4.o_shippriority, + t4.o_comment, + t5.l_orderkey, + t5.l_partkey, + t5.l_suppkey, + t5.l_linenumber, + t5.l_quantity, + t5.l_extendedprice, + t5.l_discount, + t5.l_tax, + t5.l_returnflag, + t5.l_linestatus, + t5.l_shipdate, + t5.l_commitdate, + t5.l_receiptdate, + t5.l_shipinstruct, + t5.l_shipmode, + t5.l_comment + FROM customer AS t3 + INNER JOIN orders AS t4 + ON t3.c_custkey = t4.o_custkey + INNER JOIN lineitem AS t5 + ON t4.o_orderkey = t5.l_orderkey + ) AS t9 + WHERE + t9.o_orderkey IN ( SELECT - t0.l_orderkey AS l_orderkey, - t0.qty_sum AS qty_sum - FROM t0 + t6.l_orderkey + FROM ( + SELECT + t2.l_orderkey, + SUM(t2.l_quantity) AS qty_sum + FROM lineitem AS t2 + GROUP BY + 1 + ) AS t6 WHERE - t0.qty_sum > CAST(300 AS SMALLINT) - ) AS t5 - ) + t6.qty_sum > CAST(300 AS SMALLINT) + ) + ) AS t11 GROUP BY 1, 2, 3, 4, 5 -) AS t1 +) AS t12 ORDER BY - t1.o_totalprice DESC, - t1.o_orderdate ASC + t12.o_totalprice DESC, + t12.o_orderdate ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/snowflake/h18.sql b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/snowflake/h18.sql index 61aae5b5f00d..e2a86eb98ccf 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/snowflake/h18.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/snowflake/h18.sql @@ -1,94 +1,179 @@ -WITH t2 AS ( - SELECT - t5."C_CUSTKEY" AS "c_custkey", - t5."C_NAME" AS "c_name", - t5."C_ADDRESS" AS "c_address", - t5."C_NATIONKEY" AS "c_nationkey", - t5."C_PHONE" AS "c_phone", - t5."C_ACCTBAL" AS "c_acctbal", - t5."C_MKTSEGMENT" AS "c_mktsegment", - t5."C_COMMENT" AS "c_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" AS t5 -), t1 AS ( - SELECT - t5."O_ORDERKEY" AS "o_orderkey", - t5."O_CUSTKEY" AS "o_custkey", - t5."O_ORDERSTATUS" AS "o_orderstatus", - t5."O_TOTALPRICE" AS "o_totalprice", - t5."O_ORDERDATE" AS "o_orderdate", - t5."O_ORDERPRIORITY" AS "o_orderpriority", - t5."O_CLERK" AS "o_clerk", - t5."O_SHIPPRIORITY" AS "o_shippriority", - t5."O_COMMENT" AS "o_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."ORDERS" AS t5 -), t0 AS ( - SELECT - t5."L_ORDERKEY" AS "l_orderkey", - t5."L_PARTKEY" AS "l_partkey", - t5."L_SUPPKEY" AS "l_suppkey", - t5."L_LINENUMBER" AS "l_linenumber", - t5."L_QUANTITY" AS "l_quantity", - t5."L_EXTENDEDPRICE" AS "l_extendedprice", - t5."L_DISCOUNT" AS "l_discount", - t5."L_TAX" AS "l_tax", - t5."L_RETURNFLAG" AS "l_returnflag", - t5."L_LINESTATUS" AS "l_linestatus", - t5."L_SHIPDATE" AS "l_shipdate", - t5."L_COMMITDATE" AS "l_commitdate", - t5."L_RECEIPTDATE" AS "l_receiptdate", - t5."L_SHIPINSTRUCT" AS "l_shipinstruct", - t5."L_SHIPMODE" AS "l_shipmode", - t5."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t5 -), t3 AS ( - SELECT - t0."l_orderkey" AS "l_orderkey", - SUM(t0."l_quantity") AS "qty_sum" - FROM t0 - GROUP BY - 1 -) SELECT - t4."c_name", - t4."c_custkey", - t4."o_orderkey", - t4."o_orderdate", - t4."o_totalprice", - t4."sum_qty" + "t14"."c_name" AS "c_name", + "t14"."c_custkey" AS "c_custkey", + "t14"."o_orderkey" AS "o_orderkey", + "t14"."o_orderdate" AS "o_orderdate", + "t14"."o_totalprice" AS "o_totalprice", + "t14"."sum_qty" AS "sum_qty" FROM ( SELECT - t2."c_name" AS "c_name", - t2."c_custkey" AS "c_custkey", - t1."o_orderkey" AS "o_orderkey", - t1."o_orderdate" AS "o_orderdate", - t1."o_totalprice" AS "o_totalprice", - SUM(t0."l_quantity") AS "sum_qty" - FROM t2 - JOIN t1 - ON t2."c_custkey" = t1."o_custkey" - JOIN t0 - ON t1."o_orderkey" = t0."l_orderkey" - WHERE - t1."o_orderkey" IN ( + "t13"."c_name" AS "c_name", + "t13"."c_custkey" AS "c_custkey", + "t13"."o_orderkey" AS "o_orderkey", + "t13"."o_orderdate" AS "o_orderdate", + "t13"."o_totalprice" AS "o_totalprice", + SUM("t13"."l_quantity") AS "sum_qty" + FROM ( + SELECT + "t11"."c_custkey" AS "c_custkey", + "t11"."c_name" AS "c_name", + "t11"."c_address" AS "c_address", + "t11"."c_nationkey" AS "c_nationkey", + "t11"."c_phone" AS "c_phone", + "t11"."c_acctbal" AS "c_acctbal", + "t11"."c_mktsegment" AS "c_mktsegment", + "t11"."c_comment" AS "c_comment", + "t11"."o_orderkey" AS "o_orderkey", + "t11"."o_custkey" AS "o_custkey", + "t11"."o_orderstatus" AS "o_orderstatus", + "t11"."o_totalprice" AS "o_totalprice", + "t11"."o_orderdate" AS "o_orderdate", + "t11"."o_orderpriority" AS "o_orderpriority", + "t11"."o_clerk" AS "o_clerk", + "t11"."o_shippriority" AS "o_shippriority", + "t11"."o_comment" AS "o_comment", + "t11"."l_orderkey" AS "l_orderkey", + "t11"."l_partkey" AS "l_partkey", + "t11"."l_suppkey" AS "l_suppkey", + "t11"."l_linenumber" AS "l_linenumber", + "t11"."l_quantity" AS "l_quantity", + "t11"."l_extendedprice" AS "l_extendedprice", + "t11"."l_discount" AS "l_discount", + "t11"."l_tax" AS "l_tax", + "t11"."l_returnflag" AS "l_returnflag", + "t11"."l_linestatus" AS "l_linestatus", + "t11"."l_shipdate" AS "l_shipdate", + "t11"."l_commitdate" AS "l_commitdate", + "t11"."l_receiptdate" AS "l_receiptdate", + "t11"."l_shipinstruct" AS "l_shipinstruct", + "t11"."l_shipmode" AS "l_shipmode", + "t11"."l_comment" AS "l_comment" + FROM ( SELECT - t5."l_orderkey" + "t3"."c_custkey" AS "c_custkey", + "t3"."c_name" AS "c_name", + "t3"."c_address" AS "c_address", + "t3"."c_nationkey" AS "c_nationkey", + "t3"."c_phone" AS "c_phone", + "t3"."c_acctbal" AS "c_acctbal", + "t3"."c_mktsegment" AS "c_mktsegment", + "t3"."c_comment" AS "c_comment", + "t6"."o_orderkey" AS "o_orderkey", + "t6"."o_custkey" AS "o_custkey", + "t6"."o_orderstatus" AS "o_orderstatus", + "t6"."o_totalprice" AS "o_totalprice", + "t6"."o_orderdate" AS "o_orderdate", + "t6"."o_orderpriority" AS "o_orderpriority", + "t6"."o_clerk" AS "o_clerk", + "t6"."o_shippriority" AS "o_shippriority", + "t6"."o_comment" AS "o_comment", + "t7"."l_orderkey" AS "l_orderkey", + "t7"."l_partkey" AS "l_partkey", + "t7"."l_suppkey" AS "l_suppkey", + "t7"."l_linenumber" AS "l_linenumber", + "t7"."l_quantity" AS "l_quantity", + "t7"."l_extendedprice" AS "l_extendedprice", + "t7"."l_discount" AS "l_discount", + "t7"."l_tax" AS "l_tax", + "t7"."l_returnflag" AS "l_returnflag", + "t7"."l_linestatus" AS "l_linestatus", + "t7"."l_shipdate" AS "l_shipdate", + "t7"."l_commitdate" AS "l_commitdate", + "t7"."l_receiptdate" AS "l_receiptdate", + "t7"."l_shipinstruct" AS "l_shipinstruct", + "t7"."l_shipmode" AS "l_shipmode", + "t7"."l_comment" AS "l_comment" FROM ( SELECT - t3."l_orderkey" AS "l_orderkey", - t3."qty_sum" AS "qty_sum" - FROM t3 + "t0"."C_CUSTKEY" AS "c_custkey", + "t0"."C_NAME" AS "c_name", + "t0"."C_ADDRESS" AS "c_address", + "t0"."C_NATIONKEY" AS "c_nationkey", + "t0"."C_PHONE" AS "c_phone", + "t0"."C_ACCTBAL" AS "c_acctbal", + "t0"."C_MKTSEGMENT" AS "c_mktsegment", + "t0"."C_COMMENT" AS "c_comment" + FROM "CUSTOMER" AS "t0" + ) AS "t3" + INNER JOIN ( + SELECT + "t1"."O_ORDERKEY" AS "o_orderkey", + "t1"."O_CUSTKEY" AS "o_custkey", + "t1"."O_ORDERSTATUS" AS "o_orderstatus", + "t1"."O_TOTALPRICE" AS "o_totalprice", + "t1"."O_ORDERDATE" AS "o_orderdate", + "t1"."O_ORDERPRIORITY" AS "o_orderpriority", + "t1"."O_CLERK" AS "o_clerk", + "t1"."O_SHIPPRIORITY" AS "o_shippriority", + "t1"."O_COMMENT" AS "o_comment" + FROM "ORDERS" AS "t1" + ) AS "t6" + ON "t3"."c_custkey" = "t6"."o_custkey" + INNER JOIN ( + SELECT + "t2"."L_ORDERKEY" AS "l_orderkey", + "t2"."L_PARTKEY" AS "l_partkey", + "t2"."L_SUPPKEY" AS "l_suppkey", + "t2"."L_LINENUMBER" AS "l_linenumber", + "t2"."L_QUANTITY" AS "l_quantity", + "t2"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t2"."L_DISCOUNT" AS "l_discount", + "t2"."L_TAX" AS "l_tax", + "t2"."L_RETURNFLAG" AS "l_returnflag", + "t2"."L_LINESTATUS" AS "l_linestatus", + "t2"."L_SHIPDATE" AS "l_shipdate", + "t2"."L_COMMITDATE" AS "l_commitdate", + "t2"."L_RECEIPTDATE" AS "l_receiptdate", + "t2"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t2"."L_SHIPMODE" AS "l_shipmode", + "t2"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t2" + ) AS "t7" + ON "t6"."o_orderkey" = "t7"."l_orderkey" + ) AS "t11" + WHERE + "t11"."o_orderkey" IN (( + SELECT + "t8"."l_orderkey" AS "l_orderkey" + FROM ( + SELECT + "t5"."l_orderkey" AS "l_orderkey", + SUM("t5"."l_quantity") AS "qty_sum" + FROM ( + SELECT + "t2"."L_ORDERKEY" AS "l_orderkey", + "t2"."L_PARTKEY" AS "l_partkey", + "t2"."L_SUPPKEY" AS "l_suppkey", + "t2"."L_LINENUMBER" AS "l_linenumber", + "t2"."L_QUANTITY" AS "l_quantity", + "t2"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t2"."L_DISCOUNT" AS "l_discount", + "t2"."L_TAX" AS "l_tax", + "t2"."L_RETURNFLAG" AS "l_returnflag", + "t2"."L_LINESTATUS" AS "l_linestatus", + "t2"."L_SHIPDATE" AS "l_shipdate", + "t2"."L_COMMITDATE" AS "l_commitdate", + "t2"."L_RECEIPTDATE" AS "l_receiptdate", + "t2"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t2"."L_SHIPMODE" AS "l_shipmode", + "t2"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t2" + ) AS "t5" + GROUP BY + 1 + ) AS "t8" WHERE - t3."qty_sum" > 300 - ) AS t5 - ) + "t8"."qty_sum" > 300 + )) + ) AS "t13" GROUP BY 1, 2, 3, 4, 5 -) AS t4 +) AS "t14" ORDER BY - t4."o_totalprice" DESC, - t4."o_orderdate" ASC + "t14"."o_totalprice" DESC NULLS LAST, + "t14"."o_orderdate" ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/duckdb/h19.sql b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/duckdb/h19.sql index e5d84f17ac70..29adca6df1be 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/duckdb/h19.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/duckdb/h19.sql @@ -1,29 +1,147 @@ SELECT - SUM(t0.l_extendedprice * ( - CAST(1 AS TINYINT) - t0.l_discount + SUM(t6.l_extendedprice * ( + CAST(1 AS TINYINT) - t6.l_discount )) AS revenue -FROM main.lineitem AS t0 -JOIN main.part AS t1 - ON t1.p_partkey = t0.l_partkey -WHERE - t1.p_brand = 'Brand#12' - AND t1.p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') - AND t0.l_quantity >= CAST(1 AS TINYINT) - AND t0.l_quantity <= CAST(11 AS TINYINT) - AND t1.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(5 AS TINYINT) - AND t0.l_shipmode IN ('AIR', 'AIR REG') - AND t0.l_shipinstruct = 'DELIVER IN PERSON' - OR t1.p_brand = 'Brand#23' - AND t1.p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') - AND t0.l_quantity >= CAST(10 AS TINYINT) - AND t0.l_quantity <= CAST(20 AS TINYINT) - AND t1.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(10 AS TINYINT) - AND t0.l_shipmode IN ('AIR', 'AIR REG') - AND t0.l_shipinstruct = 'DELIVER IN PERSON' - OR t1.p_brand = 'Brand#34' - AND t1.p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') - AND t0.l_quantity >= CAST(20 AS TINYINT) - AND t0.l_quantity <= CAST(30 AS TINYINT) - AND t1.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(15 AS TINYINT) - AND t0.l_shipmode IN ('AIR', 'AIR REG') - AND t0.l_shipinstruct = 'DELIVER IN PERSON' \ No newline at end of file +FROM ( + SELECT + t5.l_orderkey, + t5.l_partkey, + t5.l_suppkey, + t5.l_linenumber, + t5.l_quantity, + t5.l_extendedprice, + t5.l_discount, + t5.l_tax, + t5.l_returnflag, + t5.l_linestatus, + t5.l_shipdate, + t5.l_commitdate, + t5.l_receiptdate, + t5.l_shipinstruct, + t5.l_shipmode, + t5.l_comment, + t5.p_partkey, + t5.p_name, + t5.p_mfgr, + t5.p_brand, + t5.p_type, + t5.p_size, + t5.p_container, + t5.p_retailprice, + t5.p_comment + FROM ( + SELECT + t2.l_orderkey, + t2.l_partkey, + t2.l_suppkey, + t2.l_linenumber, + t2.l_quantity, + t2.l_extendedprice, + t2.l_discount, + t2.l_tax, + t2.l_returnflag, + t2.l_linestatus, + t2.l_shipdate, + t2.l_commitdate, + t2.l_receiptdate, + t2.l_shipinstruct, + t2.l_shipmode, + t2.l_comment, + t3.p_partkey, + t3.p_name, + t3.p_mfgr, + t3.p_brand, + t3.p_type, + t3.p_size, + t3.p_container, + t3.p_retailprice, + t3.p_comment + FROM lineitem AS t2 + INNER JOIN part AS t3 + ON t3.p_partkey = t2.l_partkey + ) AS t5 + WHERE + ( + ( + ( + ( + ( + ( + ( + ( + t5.p_brand = 'Brand#12' + ) + AND t5.p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + ) + AND ( + t5.l_quantity >= CAST(1 AS TINYINT) + ) + ) + AND ( + t5.l_quantity <= CAST(11 AS TINYINT) + ) + ) + AND t5.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(5 AS TINYINT) + ) + AND t5.l_shipmode IN ('AIR', 'AIR REG') + ) + AND ( + t5.l_shipinstruct = 'DELIVER IN PERSON' + ) + ) + OR ( + ( + ( + ( + ( + ( + ( + t5.p_brand = 'Brand#23' + ) + AND t5.p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + ) + AND ( + t5.l_quantity >= CAST(10 AS TINYINT) + ) + ) + AND ( + t5.l_quantity <= CAST(20 AS TINYINT) + ) + ) + AND t5.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(10 AS TINYINT) + ) + AND t5.l_shipmode IN ('AIR', 'AIR REG') + ) + AND ( + t5.l_shipinstruct = 'DELIVER IN PERSON' + ) + ) + ) + OR ( + ( + ( + ( + ( + ( + ( + t5.p_brand = 'Brand#34' + ) + AND t5.p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + ) + AND ( + t5.l_quantity >= CAST(20 AS TINYINT) + ) + ) + AND ( + t5.l_quantity <= CAST(30 AS TINYINT) + ) + ) + AND t5.p_size BETWEEN CAST(1 AS TINYINT) AND CAST(15 AS TINYINT) + ) + AND t5.l_shipmode IN ('AIR', 'AIR REG') + ) + AND ( + t5.l_shipinstruct = 'DELIVER IN PERSON' + ) + ) +) AS t6 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/snowflake/h19.sql b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/snowflake/h19.sql index a4c94116b64e..b6db87f2435c 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/snowflake/h19.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/snowflake/h19.sql @@ -1,61 +1,178 @@ -WITH t0 AS ( - SELECT - t2."L_ORDERKEY" AS "l_orderkey", - t2."L_PARTKEY" AS "l_partkey", - t2."L_SUPPKEY" AS "l_suppkey", - t2."L_LINENUMBER" AS "l_linenumber", - t2."L_QUANTITY" AS "l_quantity", - t2."L_EXTENDEDPRICE" AS "l_extendedprice", - t2."L_DISCOUNT" AS "l_discount", - t2."L_TAX" AS "l_tax", - t2."L_RETURNFLAG" AS "l_returnflag", - t2."L_LINESTATUS" AS "l_linestatus", - t2."L_SHIPDATE" AS "l_shipdate", - t2."L_COMMITDATE" AS "l_commitdate", - t2."L_RECEIPTDATE" AS "l_receiptdate", - t2."L_SHIPINSTRUCT" AS "l_shipinstruct", - t2."L_SHIPMODE" AS "l_shipmode", - t2."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t2 -), t1 AS ( - SELECT - t2."P_PARTKEY" AS "p_partkey", - t2."P_NAME" AS "p_name", - t2."P_MFGR" AS "p_mfgr", - t2."P_BRAND" AS "p_brand", - t2."P_TYPE" AS "p_type", - t2."P_SIZE" AS "p_size", - t2."P_CONTAINER" AS "p_container", - t2."P_RETAILPRICE" AS "p_retailprice", - t2."P_COMMENT" AS "p_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS t2 -) SELECT - SUM(t0."l_extendedprice" * ( - 1 - t0."l_discount" + SUM("t7"."l_extendedprice" * ( + 1 - "t7"."l_discount" )) AS "revenue" -FROM t0 -JOIN t1 - ON t1."p_partkey" = t0."l_partkey" -WHERE - t1."p_brand" = 'Brand#12' - AND t1."p_container" IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') - AND t0."l_quantity" >= 1 - AND t0."l_quantity" <= 11 - AND t1."p_size" BETWEEN 1 AND 5 - AND t0."l_shipmode" IN ('AIR', 'AIR REG') - AND t0."l_shipinstruct" = 'DELIVER IN PERSON' - OR t1."p_brand" = 'Brand#23' - AND t1."p_container" IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') - AND t0."l_quantity" >= 10 - AND t0."l_quantity" <= 20 - AND t1."p_size" BETWEEN 1 AND 10 - AND t0."l_shipmode" IN ('AIR', 'AIR REG') - AND t0."l_shipinstruct" = 'DELIVER IN PERSON' - OR t1."p_brand" = 'Brand#34' - AND t1."p_container" IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') - AND t0."l_quantity" >= 20 - AND t0."l_quantity" <= 30 - AND t1."p_size" BETWEEN 1 AND 15 - AND t0."l_shipmode" IN ('AIR', 'AIR REG') - AND t0."l_shipinstruct" = 'DELIVER IN PERSON' \ No newline at end of file +FROM ( + SELECT + "t6"."l_orderkey" AS "l_orderkey", + "t6"."l_partkey" AS "l_partkey", + "t6"."l_suppkey" AS "l_suppkey", + "t6"."l_linenumber" AS "l_linenumber", + "t6"."l_quantity" AS "l_quantity", + "t6"."l_extendedprice" AS "l_extendedprice", + "t6"."l_discount" AS "l_discount", + "t6"."l_tax" AS "l_tax", + "t6"."l_returnflag" AS "l_returnflag", + "t6"."l_linestatus" AS "l_linestatus", + "t6"."l_shipdate" AS "l_shipdate", + "t6"."l_commitdate" AS "l_commitdate", + "t6"."l_receiptdate" AS "l_receiptdate", + "t6"."l_shipinstruct" AS "l_shipinstruct", + "t6"."l_shipmode" AS "l_shipmode", + "t6"."l_comment" AS "l_comment", + "t6"."p_partkey" AS "p_partkey", + "t6"."p_name" AS "p_name", + "t6"."p_mfgr" AS "p_mfgr", + "t6"."p_brand" AS "p_brand", + "t6"."p_type" AS "p_type", + "t6"."p_size" AS "p_size", + "t6"."p_container" AS "p_container", + "t6"."p_retailprice" AS "p_retailprice", + "t6"."p_comment" AS "p_comment" + FROM ( + SELECT + "t2"."l_orderkey" AS "l_orderkey", + "t2"."l_partkey" AS "l_partkey", + "t2"."l_suppkey" AS "l_suppkey", + "t2"."l_linenumber" AS "l_linenumber", + "t2"."l_quantity" AS "l_quantity", + "t2"."l_extendedprice" AS "l_extendedprice", + "t2"."l_discount" AS "l_discount", + "t2"."l_tax" AS "l_tax", + "t2"."l_returnflag" AS "l_returnflag", + "t2"."l_linestatus" AS "l_linestatus", + "t2"."l_shipdate" AS "l_shipdate", + "t2"."l_commitdate" AS "l_commitdate", + "t2"."l_receiptdate" AS "l_receiptdate", + "t2"."l_shipinstruct" AS "l_shipinstruct", + "t2"."l_shipmode" AS "l_shipmode", + "t2"."l_comment" AS "l_comment", + "t4"."p_partkey" AS "p_partkey", + "t4"."p_name" AS "p_name", + "t4"."p_mfgr" AS "p_mfgr", + "t4"."p_brand" AS "p_brand", + "t4"."p_type" AS "p_type", + "t4"."p_size" AS "p_size", + "t4"."p_container" AS "p_container", + "t4"."p_retailprice" AS "p_retailprice", + "t4"."p_comment" AS "p_comment" + FROM ( + SELECT + "t0"."L_ORDERKEY" AS "l_orderkey", + "t0"."L_PARTKEY" AS "l_partkey", + "t0"."L_SUPPKEY" AS "l_suppkey", + "t0"."L_LINENUMBER" AS "l_linenumber", + "t0"."L_QUANTITY" AS "l_quantity", + "t0"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t0"."L_DISCOUNT" AS "l_discount", + "t0"."L_TAX" AS "l_tax", + "t0"."L_RETURNFLAG" AS "l_returnflag", + "t0"."L_LINESTATUS" AS "l_linestatus", + "t0"."L_SHIPDATE" AS "l_shipdate", + "t0"."L_COMMITDATE" AS "l_commitdate", + "t0"."L_RECEIPTDATE" AS "l_receiptdate", + "t0"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t0"."L_SHIPMODE" AS "l_shipmode", + "t0"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t0" + ) AS "t2" + INNER JOIN ( + SELECT + "t1"."P_PARTKEY" AS "p_partkey", + "t1"."P_NAME" AS "p_name", + "t1"."P_MFGR" AS "p_mfgr", + "t1"."P_BRAND" AS "p_brand", + "t1"."P_TYPE" AS "p_type", + "t1"."P_SIZE" AS "p_size", + "t1"."P_CONTAINER" AS "p_container", + "t1"."P_RETAILPRICE" AS "p_retailprice", + "t1"."P_COMMENT" AS "p_comment" + FROM "PART" AS "t1" + ) AS "t4" + ON "t4"."p_partkey" = "t2"."l_partkey" + ) AS "t6" + WHERE + ( + ( + ( + ( + ( + ( + ( + ( + "t6"."p_brand" = 'Brand#12' + ) + AND "t6"."p_container" IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + ) + AND ( + "t6"."l_quantity" >= 1 + ) + ) + AND ( + "t6"."l_quantity" <= 11 + ) + ) + AND "t6"."p_size" BETWEEN 1 AND 5 + ) + AND "t6"."l_shipmode" IN ('AIR', 'AIR REG') + ) + AND ( + "t6"."l_shipinstruct" = 'DELIVER IN PERSON' + ) + ) + OR ( + ( + ( + ( + ( + ( + ( + "t6"."p_brand" = 'Brand#23' + ) + AND "t6"."p_container" IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + ) + AND ( + "t6"."l_quantity" >= 10 + ) + ) + AND ( + "t6"."l_quantity" <= 20 + ) + ) + AND "t6"."p_size" BETWEEN 1 AND 10 + ) + AND "t6"."l_shipmode" IN ('AIR', 'AIR REG') + ) + AND ( + "t6"."l_shipinstruct" = 'DELIVER IN PERSON' + ) + ) + ) + OR ( + ( + ( + ( + ( + ( + ( + "t6"."p_brand" = 'Brand#34' + ) + AND "t6"."p_container" IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + ) + AND ( + "t6"."l_quantity" >= 20 + ) + ) + AND ( + "t6"."l_quantity" <= 30 + ) + ) + AND "t6"."p_size" BETWEEN 1 AND 15 + ) + AND "t6"."l_shipmode" IN ('AIR', 'AIR REG') + ) + AND ( + "t6"."l_shipinstruct" = 'DELIVER IN PERSON' + ) + ) +) AS "t7" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/duckdb/h20.sql b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/duckdb/h20.sql index 4b61b55158f5..111f26421e9a 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/duckdb/h20.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/duckdb/h20.sql @@ -1,73 +1,68 @@ -WITH t0 AS ( +SELECT + t10.s_name, + t10.s_address +FROM ( SELECT - t2.s_suppkey AS s_suppkey, - t2.s_name AS s_name, - t2.s_address AS s_address, - t2.s_nationkey AS s_nationkey, - t2.s_phone AS s_phone, - t2.s_acctbal AS s_acctbal, - t2.s_comment AS s_comment, - t3.n_nationkey AS n_nationkey, - t3.n_name AS n_name, - t3.n_regionkey AS n_regionkey, - t3.n_comment AS n_comment - FROM main.supplier AS t2 - JOIN main.nation AS t3 - ON t2.s_nationkey = t3.n_nationkey - WHERE - t3.n_name = 'CANADA' - AND t2.s_suppkey IN ( - SELECT - t4.ps_suppkey - FROM ( + t5.s_suppkey, + t5.s_name, + t5.s_address, + t5.s_nationkey, + t5.s_phone, + t5.s_acctbal, + t5.s_comment, + t6.n_nationkey, + t6.n_name, + t6.n_regionkey, + t6.n_comment + FROM supplier AS t5 + INNER JOIN nation AS t6 + ON t5.s_nationkey = t6.n_nationkey +) AS t10 +WHERE + t10.n_name = 'CANADA' + AND t10.s_suppkey IN ( + SELECT + t1.ps_suppkey + FROM partsupp AS t1 + WHERE + t1.ps_partkey IN ( SELECT - t5.ps_partkey AS ps_partkey, - t5.ps_suppkey AS ps_suppkey, - t5.ps_availqty AS ps_availqty, - t5.ps_supplycost AS ps_supplycost, - t5.ps_comment AS ps_comment - FROM main.partsupp AS t5 + t3.p_partkey + FROM part AS t3 WHERE - t5.ps_partkey IN ( - SELECT - t6.p_partkey - FROM ( - SELECT - t7.p_partkey AS p_partkey, - t7.p_name AS p_name, - t7.p_mfgr AS p_mfgr, - t7.p_brand AS p_brand, - t7.p_type AS p_type, - t7.p_size AS p_size, - t7.p_container AS p_container, - t7.p_retailprice AS p_retailprice, - t7.p_comment AS p_comment - FROM main.part AS t7 - WHERE - t7.p_name LIKE 'forest%' - ) AS t6 - ) - AND t5.ps_availqty > ( + t3.p_name LIKE 'forest%' + ) + AND t1.ps_availqty > ( + ( + SELECT + SUM(t8.l_quantity) AS "Sum(l_quantity)" + FROM ( SELECT - SUM(t6.l_quantity) AS "Sum(l_quantity)" - FROM main.lineitem AS t6 + t4.l_orderkey, + t4.l_partkey, + t4.l_suppkey, + t4.l_linenumber, + t4.l_quantity, + t4.l_extendedprice, + t4.l_discount, + t4.l_tax, + t4.l_returnflag, + t4.l_linestatus, + t4.l_shipdate, + t4.l_commitdate, + t4.l_receiptdate, + t4.l_shipinstruct, + t4.l_shipmode, + t4.l_comment + FROM lineitem AS t4 WHERE - t6.l_partkey = t5.ps_partkey - AND t6.l_suppkey = t5.ps_suppkey - AND t6.l_shipdate >= MAKE_DATE(1994, 1, 1) - AND t6.l_shipdate < MAKE_DATE(1995, 1, 1) - ) * CAST(0.5 AS REAL(53)) - ) AS t4 - ) -) -SELECT - t1.s_name, - t1.s_address -FROM ( - SELECT - t0.s_name AS s_name, - t0.s_address AS s_address - FROM t0 -) AS t1 + t4.l_partkey = t1.ps_partkey + AND t4.l_suppkey = t1.ps_suppkey + AND t4.l_shipdate >= MAKE_DATE(1994, 1, 1) + AND t4.l_shipdate < MAKE_DATE(1995, 1, 1) + ) AS t8 + ) * CAST(0.5 AS DOUBLE) + ) + ) ORDER BY - t1.s_name ASC \ No newline at end of file + t10.s_name ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/snowflake/h20.sql b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/snowflake/h20.sql index 181c42bf2d75..3b49410ea996 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/snowflake/h20.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/snowflake/h20.sql @@ -1,129 +1,85 @@ -WITH t4 AS ( - SELECT - t7."S_SUPPKEY" AS "s_suppkey", - t7."S_NAME" AS "s_name", - t7."S_ADDRESS" AS "s_address", - t7."S_NATIONKEY" AS "s_nationkey", - t7."S_PHONE" AS "s_phone", - t7."S_ACCTBAL" AS "s_acctbal", - t7."S_COMMENT" AS "s_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER" AS t7 -), t3 AS ( - SELECT - t7."N_NATIONKEY" AS "n_nationkey", - t7."N_NAME" AS "n_name", - t7."N_REGIONKEY" AS "n_regionkey", - t7."N_COMMENT" AS "n_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."NATION" AS t7 -), t1 AS ( - SELECT - t7."PS_PARTKEY" AS "ps_partkey", - t7."PS_SUPPKEY" AS "ps_suppkey", - t7."PS_AVAILQTY" AS "ps_availqty", - t7."PS_SUPPLYCOST" AS "ps_supplycost", - t7."PS_COMMENT" AS "ps_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PARTSUPP" AS t7 -), t2 AS ( - SELECT - t7."P_PARTKEY" AS "p_partkey", - t7."P_NAME" AS "p_name", - t7."P_MFGR" AS "p_mfgr", - t7."P_BRAND" AS "p_brand", - t7."P_TYPE" AS "p_type", - t7."P_SIZE" AS "p_size", - t7."P_CONTAINER" AS "p_container", - t7."P_RETAILPRICE" AS "p_retailprice", - t7."P_COMMENT" AS "p_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."PART" AS t7 -), t0 AS ( - SELECT - t7."L_ORDERKEY" AS "l_orderkey", - t7."L_PARTKEY" AS "l_partkey", - t7."L_SUPPKEY" AS "l_suppkey", - t7."L_LINENUMBER" AS "l_linenumber", - t7."L_QUANTITY" AS "l_quantity", - t7."L_EXTENDEDPRICE" AS "l_extendedprice", - t7."L_DISCOUNT" AS "l_discount", - t7."L_TAX" AS "l_tax", - t7."L_RETURNFLAG" AS "l_returnflag", - t7."L_LINESTATUS" AS "l_linestatus", - t7."L_SHIPDATE" AS "l_shipdate", - t7."L_COMMITDATE" AS "l_commitdate", - t7."L_RECEIPTDATE" AS "l_receiptdate", - t7."L_SHIPINSTRUCT" AS "l_shipinstruct", - t7."L_SHIPMODE" AS "l_shipmode", - t7."L_COMMENT" AS "l_comment" - FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."LINEITEM" AS t7 -), t5 AS ( +SELECT + "t12"."s_name" AS "s_name", + "t12"."s_address" AS "s_address" +FROM ( SELECT - t4."s_suppkey" AS "s_suppkey", - t4."s_name" AS "s_name", - t4."s_address" AS "s_address", - t4."s_nationkey" AS "s_nationkey", - t4."s_phone" AS "s_phone", - t4."s_acctbal" AS "s_acctbal", - t4."s_comment" AS "s_comment", - t3."n_nationkey" AS "n_nationkey", - t3."n_name" AS "n_name", - t3."n_regionkey" AS "n_regionkey", - t3."n_comment" AS "n_comment" - FROM t4 - JOIN t3 - ON t4."s_nationkey" = t3."n_nationkey" - WHERE - t3."n_name" = 'CANADA' - AND t4."s_suppkey" IN ( - SELECT - t7."ps_suppkey" - FROM ( + "t5"."s_suppkey" AS "s_suppkey", + "t5"."s_name" AS "s_name", + "t5"."s_address" AS "s_address", + "t5"."s_nationkey" AS "s_nationkey", + "t5"."s_phone" AS "s_phone", + "t5"."s_acctbal" AS "s_acctbal", + "t5"."s_comment" AS "s_comment", + "t7"."n_nationkey" AS "n_nationkey", + "t7"."n_name" AS "n_name", + "t7"."n_regionkey" AS "n_regionkey", + "t7"."n_comment" AS "n_comment" + FROM ( + SELECT + "t0"."S_SUPPKEY" AS "s_suppkey", + "t0"."S_NAME" AS "s_name", + "t0"."S_ADDRESS" AS "s_address", + "t0"."S_NATIONKEY" AS "s_nationkey", + "t0"."S_PHONE" AS "s_phone", + "t0"."S_ACCTBAL" AS "s_acctbal", + "t0"."S_COMMENT" AS "s_comment" + FROM "SUPPLIER" AS "t0" + ) AS "t5" + INNER JOIN ( + SELECT + "t2"."N_NATIONKEY" AS "n_nationkey", + "t2"."N_NAME" AS "n_name", + "t2"."N_REGIONKEY" AS "n_regionkey", + "t2"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t2" + ) AS "t7" + ON "t5"."s_nationkey" = "t7"."n_nationkey" +) AS "t12" +WHERE + "t12"."n_name" = 'CANADA' + AND "t12"."s_suppkey" IN (( + SELECT + "t1"."PS_SUPPKEY" AS "ps_suppkey" + FROM "PARTSUPP" AS "t1" + WHERE + "t1"."PS_PARTKEY" IN (( SELECT - t1."ps_partkey" AS "ps_partkey", - t1."ps_suppkey" AS "ps_suppkey", - t1."ps_availqty" AS "ps_availqty", - t1."ps_supplycost" AS "ps_supplycost", - t1."ps_comment" AS "ps_comment" - FROM t1 + "t3"."P_PARTKEY" AS "p_partkey" + FROM "PART" AS "t3" WHERE - t1."ps_partkey" IN ( - SELECT - t8."p_partkey" - FROM ( - SELECT - t2."p_partkey" AS "p_partkey", - t2."p_name" AS "p_name", - t2."p_mfgr" AS "p_mfgr", - t2."p_brand" AS "p_brand", - t2."p_type" AS "p_type", - t2."p_size" AS "p_size", - t2."p_container" AS "p_container", - t2."p_retailprice" AS "p_retailprice", - t2."p_comment" AS "p_comment" - FROM t2 - WHERE - t2."p_name" LIKE 'forest%' - ) AS t8 - ) - AND t1."ps_availqty" > ( + "t3"."P_NAME" LIKE 'forest%' + )) + AND "t1"."PS_AVAILQTY" > ( + ( + SELECT + SUM("t9"."l_quantity") AS "Sum(l_quantity)" + FROM ( SELECT - SUM(t0."l_quantity") AS "Sum(l_quantity)" - FROM t0 + "t4"."L_ORDERKEY" AS "l_orderkey", + "t4"."L_PARTKEY" AS "l_partkey", + "t4"."L_SUPPKEY" AS "l_suppkey", + "t4"."L_LINENUMBER" AS "l_linenumber", + "t4"."L_QUANTITY" AS "l_quantity", + "t4"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t4"."L_DISCOUNT" AS "l_discount", + "t4"."L_TAX" AS "l_tax", + "t4"."L_RETURNFLAG" AS "l_returnflag", + "t4"."L_LINESTATUS" AS "l_linestatus", + "t4"."L_SHIPDATE" AS "l_shipdate", + "t4"."L_COMMITDATE" AS "l_commitdate", + "t4"."L_RECEIPTDATE" AS "l_receiptdate", + "t4"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t4"."L_SHIPMODE" AS "l_shipmode", + "t4"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t4" WHERE - t0."l_partkey" = t1."ps_partkey" - AND t0."l_suppkey" = t1."ps_suppkey" - AND t0."l_shipdate" >= DATE_FROM_PARTS(1994, 1, 1) - AND t0."l_shipdate" < DATE_FROM_PARTS(1995, 1, 1) - ) * 0.5 - ) AS t7 - ) -) -SELECT - t6."s_name", - t6."s_address" -FROM ( - SELECT - t5."s_name" AS "s_name", - t5."s_address" AS "s_address" - FROM t5 -) AS t6 + "t4"."L_PARTKEY" = "t1"."PS_PARTKEY" + AND "t4"."L_SUPPKEY" = "t1"."PS_SUPPKEY" + AND "t4"."L_SHIPDATE" >= DATEFROMPARTS(1994, 1, 1) + AND "t4"."L_SHIPDATE" < DATEFROMPARTS(1995, 1, 1) + ) AS "t9" + ) * 0.5 + ) + )) ORDER BY - t6."s_name" ASC \ No newline at end of file + "t12"."s_name" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/duckdb/h21.sql b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/duckdb/h21.sql index 3963cacc039e..72dd9ea9697b 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/duckdb/h21.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/duckdb/h21.sql @@ -1,56 +1,74 @@ -WITH t0 AS ( - SELECT - t3.l_orderkey AS l1_orderkey, - t4.o_orderstatus AS o_orderstatus, - t3.l_receiptdate AS l_receiptdate, - t3.l_commitdate AS l_commitdate, - t3.l_suppkey AS l1_suppkey, - t2.s_name AS s_name, - t5.n_name AS n_name - FROM main.supplier AS t2 - JOIN main.lineitem AS t3 - ON t2.s_suppkey = t3.l_suppkey - JOIN main.orders AS t4 - ON t4.o_orderkey = t3.l_orderkey - JOIN main.nation AS t5 - ON t2.s_nationkey = t5.n_nationkey -) SELECT - t1.s_name, - t1.numwait + t17.s_name, + t17.numwait FROM ( SELECT - t0.s_name AS s_name, + t16.s_name, COUNT(*) AS numwait - FROM t0 - WHERE - t0.o_orderstatus = 'F' - AND t0.l_receiptdate > t0.l_commitdate - AND t0.n_name = 'SAUDI ARABIA' - AND ( - EXISTS( + FROM ( + SELECT + t13.l1_orderkey, + t13.o_orderstatus, + t13.l_receiptdate, + t13.l_commitdate, + t13.l1_suppkey, + t13.s_name, + t13.n_name + FROM ( + SELECT + t5.l_orderkey AS l1_orderkey, + t8.o_orderstatus, + t5.l_receiptdate, + t5.l_commitdate, + t5.l_suppkey AS l1_suppkey, + t4.s_name, + t9.n_name + FROM supplier AS t4 + INNER JOIN lineitem AS t5 + ON t4.s_suppkey = t5.l_suppkey + INNER JOIN orders AS t8 + ON t8.o_orderkey = t5.l_orderkey + INNER JOIN nation AS t9 + ON t4.s_nationkey = t9.n_nationkey + ) AS t13 + WHERE + t13.o_orderstatus = 'F' + AND t13.l_receiptdate > t13.l_commitdate + AND t13.n_name = 'SAUDI ARABIA' + AND EXISTS( SELECT - CAST(1 AS TINYINT) AS anon_1 - FROM main.lineitem AS t2 + CAST(1 AS TINYINT) AS "1" + FROM lineitem AS t6 WHERE - t2.l_orderkey = t0.l1_orderkey AND t2.l_suppkey <> t0.l1_suppkey + ( + t6.l_orderkey = t13.l1_orderkey + ) AND ( + t6.l_suppkey <> t13.l1_suppkey + ) ) - ) - AND NOT ( - EXISTS( - SELECT - CAST(1 AS TINYINT) AS anon_2 - FROM main.lineitem AS t2 - WHERE - t2.l_orderkey = t0.l1_orderkey - AND t2.l_suppkey <> t0.l1_suppkey - AND t2.l_receiptdate > t2.l_commitdate + AND NOT ( + EXISTS( + SELECT + CAST(1 AS TINYINT) AS "1" + FROM lineitem AS t7 + WHERE + ( + ( + t7.l_orderkey = t13.l1_orderkey + ) AND ( + t7.l_suppkey <> t13.l1_suppkey + ) + ) + AND ( + t7.l_receiptdate > t7.l_commitdate + ) + ) ) - ) + ) AS t16 GROUP BY 1 -) AS t1 +) AS t17 ORDER BY - t1.numwait DESC, - t1.s_name ASC + t17.numwait DESC, + t17.s_name ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/snowflake/h21.sql b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/snowflake/h21.sql new file mode 100644 index 000000000000..89f5d7d5071c --- /dev/null +++ b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/snowflake/h21.sql @@ -0,0 +1,166 @@ +SELECT + "t20"."s_name" AS "s_name", + "t20"."numwait" AS "numwait" +FROM ( + SELECT + "t19"."s_name" AS "s_name", + COUNT(*) AS "numwait" + FROM ( + SELECT + "t16"."l1_orderkey" AS "l1_orderkey", + "t16"."o_orderstatus" AS "o_orderstatus", + "t16"."l_receiptdate" AS "l_receiptdate", + "t16"."l_commitdate" AS "l_commitdate", + "t16"."l1_suppkey" AS "l1_suppkey", + "t16"."s_name" AS "s_name", + "t16"."n_name" AS "n_name" + FROM ( + SELECT + "t8"."l_orderkey" AS "l1_orderkey", + "t11"."o_orderstatus" AS "o_orderstatus", + "t8"."l_receiptdate" AS "l_receiptdate", + "t8"."l_commitdate" AS "l_commitdate", + "t8"."l_suppkey" AS "l1_suppkey", + "t4"."s_name" AS "s_name", + "t12"."n_name" AS "n_name" + FROM ( + SELECT + "t0"."S_SUPPKEY" AS "s_suppkey", + "t0"."S_NAME" AS "s_name", + "t0"."S_ADDRESS" AS "s_address", + "t0"."S_NATIONKEY" AS "s_nationkey", + "t0"."S_PHONE" AS "s_phone", + "t0"."S_ACCTBAL" AS "s_acctbal", + "t0"."S_COMMENT" AS "s_comment" + FROM "SUPPLIER" AS "t0" + ) AS "t4" + INNER JOIN ( + SELECT + "t1"."L_ORDERKEY" AS "l_orderkey", + "t1"."L_PARTKEY" AS "l_partkey", + "t1"."L_SUPPKEY" AS "l_suppkey", + "t1"."L_LINENUMBER" AS "l_linenumber", + "t1"."L_QUANTITY" AS "l_quantity", + "t1"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t1"."L_DISCOUNT" AS "l_discount", + "t1"."L_TAX" AS "l_tax", + "t1"."L_RETURNFLAG" AS "l_returnflag", + "t1"."L_LINESTATUS" AS "l_linestatus", + "t1"."L_SHIPDATE" AS "l_shipdate", + "t1"."L_COMMITDATE" AS "l_commitdate", + "t1"."L_RECEIPTDATE" AS "l_receiptdate", + "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t1"."L_SHIPMODE" AS "l_shipmode", + "t1"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t1" + ) AS "t8" + ON "t4"."s_suppkey" = "t8"."l_suppkey" + INNER JOIN ( + SELECT + "t2"."O_ORDERKEY" AS "o_orderkey", + "t2"."O_CUSTKEY" AS "o_custkey", + "t2"."O_ORDERSTATUS" AS "o_orderstatus", + "t2"."O_TOTALPRICE" AS "o_totalprice", + "t2"."O_ORDERDATE" AS "o_orderdate", + "t2"."O_ORDERPRIORITY" AS "o_orderpriority", + "t2"."O_CLERK" AS "o_clerk", + "t2"."O_SHIPPRIORITY" AS "o_shippriority", + "t2"."O_COMMENT" AS "o_comment" + FROM "ORDERS" AS "t2" + ) AS "t11" + ON "t11"."o_orderkey" = "t8"."l_orderkey" + INNER JOIN ( + SELECT + "t3"."N_NATIONKEY" AS "n_nationkey", + "t3"."N_NAME" AS "n_name", + "t3"."N_REGIONKEY" AS "n_regionkey", + "t3"."N_COMMENT" AS "n_comment" + FROM "NATION" AS "t3" + ) AS "t12" + ON "t4"."s_nationkey" = "t12"."n_nationkey" + ) AS "t16" + WHERE + "t16"."o_orderstatus" = 'F' + AND "t16"."l_receiptdate" > "t16"."l_commitdate" + AND "t16"."n_name" = 'SAUDI ARABIA' + AND EXISTS( + ( + SELECT + 1 AS "1" + FROM ( + SELECT + "t1"."L_ORDERKEY" AS "l_orderkey", + "t1"."L_PARTKEY" AS "l_partkey", + "t1"."L_SUPPKEY" AS "l_suppkey", + "t1"."L_LINENUMBER" AS "l_linenumber", + "t1"."L_QUANTITY" AS "l_quantity", + "t1"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t1"."L_DISCOUNT" AS "l_discount", + "t1"."L_TAX" AS "l_tax", + "t1"."L_RETURNFLAG" AS "l_returnflag", + "t1"."L_LINESTATUS" AS "l_linestatus", + "t1"."L_SHIPDATE" AS "l_shipdate", + "t1"."L_COMMITDATE" AS "l_commitdate", + "t1"."L_RECEIPTDATE" AS "l_receiptdate", + "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t1"."L_SHIPMODE" AS "l_shipmode", + "t1"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t1" + ) AS "t9" + WHERE + ( + "t9"."l_orderkey" = "t16"."l1_orderkey" + ) + AND ( + "t9"."l_suppkey" <> "t16"."l1_suppkey" + ) + ) + ) + AND NOT ( + EXISTS( + ( + SELECT + 1 AS "1" + FROM ( + SELECT + "t1"."L_ORDERKEY" AS "l_orderkey", + "t1"."L_PARTKEY" AS "l_partkey", + "t1"."L_SUPPKEY" AS "l_suppkey", + "t1"."L_LINENUMBER" AS "l_linenumber", + "t1"."L_QUANTITY" AS "l_quantity", + "t1"."L_EXTENDEDPRICE" AS "l_extendedprice", + "t1"."L_DISCOUNT" AS "l_discount", + "t1"."L_TAX" AS "l_tax", + "t1"."L_RETURNFLAG" AS "l_returnflag", + "t1"."L_LINESTATUS" AS "l_linestatus", + "t1"."L_SHIPDATE" AS "l_shipdate", + "t1"."L_COMMITDATE" AS "l_commitdate", + "t1"."L_RECEIPTDATE" AS "l_receiptdate", + "t1"."L_SHIPINSTRUCT" AS "l_shipinstruct", + "t1"."L_SHIPMODE" AS "l_shipmode", + "t1"."L_COMMENT" AS "l_comment" + FROM "LINEITEM" AS "t1" + ) AS "t10" + WHERE + ( + ( + "t10"."l_orderkey" = "t16"."l1_orderkey" + ) + AND ( + "t10"."l_suppkey" <> "t16"."l1_suppkey" + ) + ) + AND ( + "t10"."l_receiptdate" > "t10"."l_commitdate" + ) + ) + ) + ) + ) AS "t19" + GROUP BY + 1 +) AS "t20" +ORDER BY + "t20"."numwait" DESC NULLS LAST, + "t20"."s_name" ASC +LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/duckdb/h22.sql b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/duckdb/h22.sql index a9c96a30d190..323185ab0e0d 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/duckdb/h22.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/duckdb/h22.sql @@ -1,62 +1,62 @@ -WITH t0 AS ( - SELECT - CASE - WHEN ( - CAST(0 AS TINYINT) + 1 >= 1 - ) - THEN SUBSTR(t2.c_phone, CAST(0 AS TINYINT) + 1, CAST(2 AS TINYINT)) - ELSE SUBSTR(t2.c_phone, CAST(0 AS TINYINT) + 1 + LENGTH(t2.c_phone), CAST(2 AS TINYINT)) - END AS cntrycode, - t2.c_acctbal AS c_acctbal - FROM main.customer AS t2 - WHERE - CASE - WHEN ( - CAST(0 AS TINYINT) + 1 >= 1 - ) - THEN SUBSTR(t2.c_phone, CAST(0 AS TINYINT) + 1, CAST(2 AS TINYINT)) - ELSE SUBSTR(t2.c_phone, CAST(0 AS TINYINT) + 1 + LENGTH(t2.c_phone), CAST(2 AS TINYINT)) - END IN ('13', '31', '23', '29', '30', '18', '17') - AND t2.c_acctbal > ( - SELECT - anon_1.avg_bal - FROM ( - SELECT - AVG(t2.c_acctbal) AS avg_bal - FROM main.customer AS t2 - WHERE - t2.c_acctbal > CAST(0.0 AS REAL(53)) - AND CASE - WHEN ( - CAST(0 AS TINYINT) + 1 >= 1 - ) - THEN SUBSTR(t2.c_phone, CAST(0 AS TINYINT) + 1, CAST(2 AS TINYINT)) - ELSE SUBSTR(t2.c_phone, CAST(0 AS TINYINT) + 1 + LENGTH(t2.c_phone), CAST(2 AS TINYINT)) - END IN ('13', '31', '23', '29', '30', '18', '17') - ) AS anon_1 - ) - AND NOT ( - EXISTS( - SELECT - CAST(1 AS TINYINT) AS anon_2 - FROM main.orders AS t3 - WHERE - t3.o_custkey = t2.c_custkey - ) - ) -) SELECT - t1.cntrycode, - t1.numcust, - t1.totacctbal + t6.cntrycode, + t6.numcust, + t6.totacctbal FROM ( SELECT - t0.cntrycode AS cntrycode, + t5.cntrycode, COUNT(*) AS numcust, - SUM(t0.c_acctbal) AS totacctbal - FROM t0 + SUM(t5.c_acctbal) AS totacctbal + FROM ( + SELECT + CASE + WHEN CAST(0 AS TINYINT) >= 0 + THEN SUBSTRING(t0.c_phone, CAST(0 AS TINYINT) + 1, CAST(2 AS TINYINT)) + ELSE SUBSTRING(t0.c_phone, CAST(0 AS TINYINT), CAST(2 AS TINYINT)) + END AS cntrycode, + t0.c_acctbal + FROM customer AS t0 + WHERE + CASE + WHEN CAST(0 AS TINYINT) >= 0 + THEN SUBSTRING(t0.c_phone, CAST(0 AS TINYINT) + 1, CAST(2 AS TINYINT)) + ELSE SUBSTRING(t0.c_phone, CAST(0 AS TINYINT), CAST(2 AS TINYINT)) + END IN ('13', '31', '23', '29', '30', '18', '17') + AND t0.c_acctbal > ( + SELECT + AVG(t3.c_acctbal) AS "Mean(c_acctbal)" + FROM ( + SELECT + t0.c_custkey, + t0.c_name, + t0.c_address, + t0.c_nationkey, + t0.c_phone, + t0.c_acctbal, + t0.c_mktsegment, + t0.c_comment + FROM customer AS t0 + WHERE + t0.c_acctbal > CAST(0.0 AS DOUBLE) + AND CASE + WHEN CAST(0 AS TINYINT) >= 0 + THEN SUBSTRING(t0.c_phone, CAST(0 AS TINYINT) + 1, CAST(2 AS TINYINT)) + ELSE SUBSTRING(t0.c_phone, CAST(0 AS TINYINT), CAST(2 AS TINYINT)) + END IN ('13', '31', '23', '29', '30', '18', '17') + ) AS t3 + ) + AND NOT ( + EXISTS( + SELECT + CAST(1 AS TINYINT) AS "1" + FROM orders AS t1 + WHERE + t1.o_custkey = t0.c_custkey + ) + ) + ) AS t5 GROUP BY 1 -) AS t1 +) AS t6 ORDER BY - t1.cntrycode ASC \ No newline at end of file + t6.cntrycode ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/snowflake/h22.sql b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/snowflake/h22.sql new file mode 100644 index 000000000000..a8e109a09208 --- /dev/null +++ b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/snowflake/h22.sql @@ -0,0 +1,52 @@ +SELECT + "t6"."cntrycode" AS "cntrycode", + "t6"."numcust" AS "numcust", + "t6"."totacctbal" AS "totacctbal" +FROM ( + SELECT + "t5"."cntrycode" AS "cntrycode", + COUNT(*) AS "numcust", + SUM("t5"."c_acctbal") AS "totacctbal" + FROM ( + SELECT + IFF(0 >= 0, SUBSTRING("t0"."C_PHONE", 0 + 1, 2), SUBSTRING("t0"."C_PHONE", 0, 2)) AS "cntrycode", + "t0"."C_ACCTBAL" AS "c_acctbal" + FROM "CUSTOMER" AS "t0" + WHERE + IFF(0 >= 0, SUBSTRING("t0"."C_PHONE", 0 + 1, 2), SUBSTRING("t0"."C_PHONE", 0, 2)) IN ('13', '31', '23', '29', '30', '18', '17') + AND "t0"."C_ACCTBAL" > ( + SELECT + AVG("t3"."c_acctbal") AS "Mean(c_acctbal)" + FROM ( + SELECT + "t0"."C_CUSTKEY" AS "c_custkey", + "t0"."C_NAME" AS "c_name", + "t0"."C_ADDRESS" AS "c_address", + "t0"."C_NATIONKEY" AS "c_nationkey", + "t0"."C_PHONE" AS "c_phone", + "t0"."C_ACCTBAL" AS "c_acctbal", + "t0"."C_MKTSEGMENT" AS "c_mktsegment", + "t0"."C_COMMENT" AS "c_comment" + FROM "CUSTOMER" AS "t0" + WHERE + "t0"."C_ACCTBAL" > 0.0 + AND IFF(0 >= 0, SUBSTRING("t0"."C_PHONE", 0 + 1, 2), SUBSTRING("t0"."C_PHONE", 0, 2)) IN ('13', '31', '23', '29', '30', '18', '17') + ) AS "t3" + ) + AND NOT ( + EXISTS( + ( + SELECT + 1 AS "1" + FROM "ORDERS" AS "t1" + WHERE + "t1"."O_CUSTKEY" = "t0"."C_CUSTKEY" + ) + ) + ) + ) AS "t5" + GROUP BY + 1 +) AS "t6" +ORDER BY + "t6"."cntrycode" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/test_h01.py b/ibis/backends/tests/tpch/test_h01.py index 4f136266b00e..2fd02b86eb54 100644 --- a/ibis/backends/tests/tpch/test_h01.py +++ b/ibis/backends/tests/tpch/test_h01.py @@ -30,7 +30,7 @@ def test_tpc_h01(lineitem): avg_qty=t.l_quantity.mean(), avg_price=t.l_extendedprice.mean(), avg_disc=t.l_discount.mean(), - count_order=t.count(), + count_order=lambda t: t.count(), ) q = q.order_by(["l_returnflag", "l_linestatus"]) return q diff --git a/ibis/backends/tests/tpch/test_h04.py b/ibis/backends/tests/tpch/test_h04.py index bf7e40adec6d..536eaa2a1913 100644 --- a/ibis/backends/tests/tpch/test_h04.py +++ b/ibis/backends/tests/tpch/test_h04.py @@ -20,6 +20,6 @@ def test_tpc_h04(orders, lineitem): ] ) q = q.group_by([orders.o_orderpriority]) - q = q.aggregate(order_count=orders.count()) + q = q.aggregate(order_count=lambda t: t.count()) q = q.order_by([orders.o_orderpriority]) return q diff --git a/ibis/backends/tests/tpch/test_h08.py b/ibis/backends/tests/tpch/test_h08.py index 18bcf10168d2..3ab657c3bec7 100644 --- a/ibis/backends/tests/tpch/test_h08.py +++ b/ibis/backends/tests/tpch/test_h08.py @@ -14,7 +14,6 @@ reason="ibis doesn't preserve decimal types in aggregations", ) @pytest.mark.xfail_version( - duckdb=["sqlalchemy>=2"], trino=["sqlalchemy>=2"], reason="slightly different code is generated for sqlalchemy 2 for aggregations", ) diff --git a/ibis/backends/tests/tpch/test_h14.py b/ibis/backends/tests/tpch/test_h14.py index bf78c57481f6..cb57d9911577 100644 --- a/ibis/backends/tests/tpch/test_h14.py +++ b/ibis/backends/tests/tpch/test_h14.py @@ -14,7 +14,6 @@ reason="ibis doesn't preserve decimal types in aggregations", ) @pytest.mark.xfail_version( - duckdb=["sqlalchemy>=2"], trino=["sqlalchemy>=2"], reason="slightly different code is generated for sqlalchemy 2 for aggregations", ) diff --git a/ibis/backends/tests/tpch/test_h17.py b/ibis/backends/tests/tpch/test_h17.py index 1eed92064476..fbe50eb78f7e 100644 --- a/ibis/backends/tests/tpch/test_h17.py +++ b/ibis/backends/tests/tpch/test_h17.py @@ -12,7 +12,6 @@ reason="ibis doesn't preserve decimal types in aggregations", ) @pytest.mark.xfail_version( - duckdb=["sqlalchemy>=2"], trino=["sqlalchemy>=2"], reason="slightly different code is generated for sqlalchemy 2 for aggregations", ) diff --git a/ibis/examples/tests/test_examples.py b/ibis/examples/tests/test_examples.py index b17da0ed3227..c9d0b9567dc4 100644 --- a/ibis/examples/tests/test_examples.py +++ b/ibis/examples/tests/test_examples.py @@ -11,7 +11,6 @@ pytestmark = pytest.mark.examples duckdb = pytest.importorskip("duckdb") -pytest.importorskip("pins") # large files or files that are used elsewhere ignored = frozenset( @@ -95,27 +94,3 @@ def test_table_name_arg(): name = f"penguins-{uuid.uuid4().hex}" t = ibis.examples.penguins.fetch(backend=con, table_name=name) assert t.get_name() == name - - -@pytest.mark.pandas -@pytest.mark.duckdb -@pytest.mark.backend -@skip_linux_nix -@pytest.mark.parametrize( - ("example", "columns"), - [ - ("ml_latest_small_links", ["movieId", "imdbId", "tmdbId"]), - ("band_instruments", ["name", "plays"]), - ( - "AwardsManagers", - ["player_id", "award_id", "year_id", "lg_id", "tie", "notes"], - ), - ], - ids=["parquet", "csv", "csv-all-null"], -) -@pytest.mark.parametrize("backend_name", ["duckdb", "polars", "pandas"]) -def test_load_example(backend_name, example, columns): - pytest.importorskip(backend_name) - con = getattr(ibis, backend_name).connect() - t = getattr(ibis.examples, example).fetch(backend=con) - assert t.columns == columns diff --git a/ibis/expr/decompile.py b/ibis/expr/decompile.py index 3b27c166852a..6c96648b0ff9 100644 --- a/ibis/expr/decompile.py +++ b/ibis/expr/decompile.py @@ -132,7 +132,10 @@ def _wrap_alias(values, rendered): for k, v in values.items(): text = rendered[k] if v.name != k: - text = f"{text}.name({k!r})" + if isinstance(v, ops.Binary): + text = f"({text}).name({k!r})" + else: + text = f"{text}.name({k!r})" result.append(text) return result @@ -189,6 +192,11 @@ def self_reference(op, parent, identifier): return f"{parent}.view()" +@translate.register(ops.Distinct) +def distinct(op, parent): + return f"{parent}.distinct()" + + @translate.register(ops.JoinTable) def join_table(op, parent, index): return parent @@ -202,7 +210,12 @@ def join_link(op, table, predicates, how): @translate.register(ops.JoinChain) def join(op, first, rest, values): calls = "".join(rest) - return f"{first}{calls}" + pieces = [f"{first}{calls}"] + if values: + values = _wrap_alias(op.values, values) + pieces.append(f"select({_inline(values)})") + result = ".".join(pieces) + return result @translate.register(ops.Set) @@ -224,7 +237,9 @@ def limit(op, parent, n, offset): @translate.register(ops.Field) def table_column(op, rel, name): - return f"{rel}.{name}" + if name.isidentifier(): + return f"{rel}.{name}" + return f"{rel}[{name!r}]" @translate.register(ops.SortKey) @@ -337,10 +352,11 @@ def isin(op, value, options): class CodeContext: always_assign = ( ops.ScalarParameter, - ops.UnboundTable, ops.Aggregate, + ops.PhysicalTable, ops.SelfReference, ) + always_ignore = ( ops.JoinTable, ops.Field, diff --git a/ibis/expr/operations/relations.py b/ibis/expr/operations/relations.py index a4c37ce2912b..5ab1ee37c595 100644 --- a/ibis/expr/operations/relations.py +++ b/ibis/expr/operations/relations.py @@ -20,7 +20,6 @@ from ibis.expr.operations.sortkeys import SortKey # noqa: TCH001 from ibis.expr.schema import Schema from ibis.formats import TableProxy # noqa: TCH001 -from ibis.util import gen_name T = TypeVar("T") @@ -111,13 +110,10 @@ def __init__(self, rel, **kwargs): ) super().__init__(rel=rel, **kwargs) - @attribute - def name(self): - return self.rel.schema.names[0] - @attribute def value(self): - return self.rel.values[self.name] + name = self.rel.schema.names[0] + return self.rel.values[name] @attribute def relations(self): @@ -208,12 +204,6 @@ def __init__(self, parent, identifier): identifier = next(self._uid_counter) super().__init__(parent=parent, identifier=identifier) - @attribute - def name(self) -> str: - if (name := getattr(self.parent, "name", None)) is not None: - return f"{name}_ref" - return gen_name("self_ref") - JoinKind = Literal[ "inner", @@ -427,6 +417,18 @@ def schema(self): return backend._get_schema_using_query(self.query) +@public +class View(PhysicalTable): + """A view created from an expression.""" + + child: Relation + name: str + + @attribute + def schema(self): + return self.child.schema + + @public class DummyTable(Relation): values: FrozenDict[str, Value] diff --git a/ibis/expr/rewrites.py b/ibis/expr/rewrites.py index 12a314379d9a..5cac1708e89b 100644 --- a/ibis/expr/rewrites.py +++ b/ibis/expr/rewrites.py @@ -1,8 +1,13 @@ """Some common rewrite functions to be shared between backends.""" from __future__ import annotations +import functools +from collections.abc import Mapping + import toolz +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt import ibis.expr.operations as ops from ibis.common.deferred import Item, _, deferred, var from ibis.common.exceptions import ExpressionError @@ -22,6 +27,85 @@ def peel_join_field(_): return _.rel.values[_.name] +@replace(p.Alias(p.ScalarParameter)) +def unwrap_scalar_parameter(_): + """Replace aliased scalar parameters with the parameter itself.""" + return _.arg + + +def replace_scalar_parameter(params): + """Replace scalar parameters with their values.""" + + @replace(p.ScalarParameter) + def repl(_): + return ops.Literal(value=params[_], dtype=_.dtype) + + return repl + + +@replace(p.FillNa) +def rewrite_fillna(_): + """Rewrite FillNa expressions to use more common operations.""" + if isinstance(_.replacements, Mapping): + mapping = _.replacements + else: + mapping = { + name: _.replacements + for name, type in _.parent.schema.items() + if type.nullable + } + + if not mapping: + return _.parent + + selections = [] + for name in _.parent.schema.names: + col = ops.TableColumn(_.parent, name) + if (value := mapping.get(name)) is not None: + col = ops.Alias(ops.Coalesce((col, value)), name) + selections.append(col) + + return ops.Project(_.parent, selections) + + +@replace(p.DropNa) +def rewrite_dropna(_): + """Rewrite DropNa expressions to use more common operations.""" + if _.subset is None: + columns = [ops.TableColumn(_.parent, name) for name in _.parent.schema.names] + else: + columns = _.subset + + if columns: + preds = [ + functools.reduce( + ops.And if _.how == "any" else ops.Or, + [ops.NotNull(c) for c in columns], + ) + ] + elif _.how == "all": + preds = [ops.Literal(False, dtype=dt.bool)] + else: + return _.parent + + return ops.Filter(_.parent, tuple(preds)) + + +@replace(p.Sample) +def rewrite_sample(_): + """Rewrite Sample as `t.filter(random() <= fraction)`. + + Errors as unsupported if a `seed` is specified. + """ + + if _.seed is not None: + raise com.UnsupportedOperationError( + "`Table.sample` with a random seed is unsupported" + ) + + return ops.Filter(_.parent, (ops.LessEqual(ops.RandomScalar(), _.fraction),)) + + @replace(ops.Analytic) def project_wrap_analytic(_, rel): # Wrap analytic functions in a window function @@ -118,6 +202,43 @@ def rewrite_window_input(value, frame): return node.replace(window_merge_frames, filter=p.Value, context=context) +@replace(p.InValues(..., ())) +def empty_in_values_right_side(_): + """Replace checks against an empty right side with `False`.""" + return ops.Literal(False, dtype=dt.bool) + + +@replace( + p.WindowFunction( + p.PercentRank(y) | p.RankBase(y) | p.CumeDist(y) | p.NTile(y), + p.WindowFrame(..., order_by=()) >> _.copy(order_by=(y,)), + ) +) +def add_order_by_to_empty_ranking_window_functions(_): + """Add an ORDER BY clause to rank window functions that don't have one.""" + return _ + + +@replace( + p.WindowFunction(p.RankBase | p.NTile) + | p.StringFind + | p.FindInSet + | p.ArrayPosition +) +def one_to_zero_index(_, **__): + """Subtract one from one-index functions.""" + return ops.Subtract(_, 1) + + +@replace(ops.NthValue) +def add_one_to_nth_value_input(_, **__): + if isinstance(_.nth, ops.Literal): + nth = ops.Literal(_.nth.value + 1, dtype=_.nth.dtype) + else: + nth = ops.Add(_.nth, 1) + return _.copy(nth=nth) + + # TODO(kszucs): schema comparison should be updated to not distinguish between # different column order @replace(p.Project(y @ p.Relation) & Check(_.schema == y.schema)) diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_aggregation_with_multiple_joins/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_aggregation_with_multiple_joins/decompiled.py index 499385aab514..379751112619 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_aggregation_with_multiple_joins/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_aggregation_with_multiple_joins/decompiled.py @@ -18,8 +18,21 @@ call_outcome = ibis.table( name="call_outcome", schema={"outcome_text": "string", "id": "int64"} ) -joinchain = employee.inner_join(call, employee.id == call.employee_id).inner_join( - call_outcome, call.call_outcome_id == call_outcome.id +joinchain = ( + employee.inner_join(call, employee.id == call.employee_id) + .inner_join(call_outcome, call.call_outcome_id == call_outcome.id) + .select( + employee.first_name, + employee.last_name, + employee.id, + call.start_time, + call.end_time, + call.employee_id, + call.call_outcome_id, + call.call_attempts, + call_outcome.outcome_text, + call_outcome.id.name("id_right"), + ) ) result = joinchain.aggregate( diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_aggregation_with_join/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_aggregation_with_join/decompiled.py index 0b23d1687445..42fdcbfec8e7 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_aggregation_with_join/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_aggregation_with_join/decompiled.py @@ -15,7 +15,16 @@ "call_attempts": "int64", }, ) -joinchain = employee.left_join(call, employee.id == call.employee_id) +joinchain = employee.left_join(call, employee.id == call.employee_id).select( + employee.first_name, + employee.last_name, + employee.id, + call.start_time, + call.end_time, + call.employee_id, + call.call_outcome_id, + call.call_attempts, +) result = joinchain.aggregate( [joinchain.call_attempts.sum().name("attempts")], by=[joinchain.id] diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/inner/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/inner/decompiled.py index 8439fd762875..290fddcfd9d3 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/inner/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/inner/decompiled.py @@ -15,7 +15,16 @@ "call_attempts": "int64", }, ) -joinchain = employee.inner_join(call, employee.id == call.employee_id) +joinchain = employee.inner_join(call, employee.id == call.employee_id).select( + employee.first_name, + employee.last_name, + employee.id, + call.start_time, + call.end_time, + call.employee_id, + call.call_outcome_id, + call.call_attempts, +) f = joinchain.filter(joinchain.id < 5) s = f.order_by(f.id.desc()) diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/left/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/left/decompiled.py index 3e375cd052d2..4b3a6f52c08b 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/left/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/left/decompiled.py @@ -15,7 +15,16 @@ "call_attempts": "int64", }, ) -joinchain = employee.left_join(call, employee.id == call.employee_id) +joinchain = employee.left_join(call, employee.id == call.employee_id).select( + employee.first_name, + employee.last_name, + employee.id, + call.start_time, + call.end_time, + call.employee_id, + call.call_outcome_id, + call.call_attempts, +) f = joinchain.filter(joinchain.id < 5) s = f.order_by(f.id.desc()) diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/right/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/right/decompiled.py index e9a8b2082dc1..f7f22e528a2a 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/right/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_basic_join/right/decompiled.py @@ -15,7 +15,16 @@ "call_attempts": "int64", }, ) -joinchain = employee.right_join(call, employee.id == call.employee_id) +joinchain = employee.right_join(call, employee.id == call.employee_id).select( + employee.first_name, + employee.last_name, + employee.id, + call.start_time, + call.end_time, + call.employee_id, + call.call_outcome_id, + call.call_attempts, +) f = joinchain.filter(joinchain.id < 5) s = f.order_by(f.id.desc()) diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_join_with_filter/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_join_with_filter/decompiled.py index 3e375cd052d2..4b3a6f52c08b 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_join_with_filter/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_join_with_filter/decompiled.py @@ -15,7 +15,16 @@ "call_attempts": "int64", }, ) -joinchain = employee.left_join(call, employee.id == call.employee_id) +joinchain = employee.left_join(call, employee.id == call.employee_id).select( + employee.first_name, + employee.last_name, + employee.id, + call.start_time, + call.end_time, + call.employee_id, + call.call_outcome_id, + call.call_attempts, +) f = joinchain.filter(joinchain.id < 5) s = f.order_by(f.id.desc()) diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_multiple_joins/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_multiple_joins/decompiled.py index d6df17717b27..68a7ecaed136 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_multiple_joins/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_multiple_joins/decompiled.py @@ -19,6 +19,19 @@ name="call_outcome", schema={"outcome_text": "string", "id": "int64"} ) -result = employee.inner_join(call, employee.id == call.employee_id).inner_join( - call_outcome, call.call_outcome_id == call_outcome.id +result = ( + employee.inner_join(call, employee.id == call.employee_id) + .inner_join(call_outcome, call.call_outcome_id == call_outcome.id) + .select( + employee.first_name, + employee.last_name, + employee.id, + call.start_time, + call.end_time, + call.employee_id, + call.call_outcome_id, + call.call_attempts, + call_outcome.outcome_text, + call_outcome.id.name("id_right"), + ) ) diff --git a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_scalar_subquery/decompiled.py b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_scalar_subquery/decompiled.py index f73ace43c0d8..6080950c3d24 100644 --- a/ibis/expr/tests/snapshots/test_sql/test_parse_sql_scalar_subquery/decompiled.py +++ b/ibis/expr/tests/snapshots/test_sql/test_parse_sql_scalar_subquery/decompiled.py @@ -13,4 +13,13 @@ ) agg = call.aggregate([call.call_attempts.mean().name("mean")]) -result = call.inner_join(agg, [call.call_attempts > agg.mean, ibis.literal(True)]) +result = call.inner_join( + agg, [agg.mean < call.call_attempts, ibis.literal(True)] +).select( + call.start_time, + call.end_time, + call.employee_id, + call.call_outcome_id, + call.call_attempts, + agg.mean, +) diff --git a/ibis/expr/types/joins.py b/ibis/expr/types/joins.py index 4afaca288980..0b2aa972e948 100644 --- a/ibis/expr/types/joins.py +++ b/ibis/expr/types/joins.py @@ -185,6 +185,11 @@ def join( ) preds = flatten_predicates(list(preds)) + # if there are no predicates, default to every row matching unless the + # join is a cross join, because a cross join already has this behavior + if not preds and how != "cross": + preds.append(ops.Literal(True, dtype="bool")) + # calculate the fields based in lname and rname, this should be a best # effort to avoid collisions, but does not raise if there are any # if no disambiaution happens using a final .select() call, then diff --git a/ibis/formats/pandas.py b/ibis/formats/pandas.py index 7fa0e4f37137..7dc7f92adf11 100644 --- a/ibis/formats/pandas.py +++ b/ibis/formats/pandas.py @@ -3,6 +3,7 @@ import contextlib import datetime import warnings +from importlib.util import find_spec as _find_spec import numpy as np import pandas as pd @@ -24,6 +25,8 @@ "Install pandas >= 1.5.0 for interop with pandas and arrow dtype support" ) +geospatial_supported = _find_spec("geopandas") is not None + class PandasType(NumpyType): @classmethod @@ -118,6 +121,23 @@ def convert_table(cls, df, schema): # return data with the schema's columns which may be different than the # input columns df.columns = schema.names + + if geospatial_supported: + from geopandas import GeoDataFrame + from geopandas.array import GeometryDtype + + if ( + # pluck out the first geometry column if it exists + geom := next( + ( + name + for name, c in df.items() + if isinstance(c.dtype, GeometryDtype) + ), + None, + ) + ) is not None: + return GeoDataFrame(df, geometry=geom) return df @classmethod @@ -141,7 +161,11 @@ def convert_scalar(cls, obj, dtype): @classmethod def convert_GeoSpatial(cls, s, dtype, pandas_type): - return s + import geopandas as gpd + + if isinstance(s.dtype, gpd.array.GeometryDtype): + return gpd.GeoSeries(s) + return gpd.GeoSeries.from_wkb(s) convert_Point = ( convert_LineString diff --git a/ibis/tests/expr/mocks.py b/ibis/tests/expr/mocks.py index cfb7e7e4aa5c..bab44651beef 100644 --- a/ibis/tests/expr/mocks.py +++ b/ibis/tests/expr/mocks.py @@ -47,6 +47,11 @@ def list_tables(self): def list_databases(self): return ["mockdb"] + def _to_sql(self, expr, **kwargs): + import ibis + + return ibis.to_sql(expr, dialect="duckdb", **kwargs) + def fetch_from_cursor(self, cursor, schema): pass diff --git a/ibis/tests/util.py b/ibis/tests/util.py index f51dfca04ab7..47df8e59ebf9 100644 --- a/ibis/tests/util.py +++ b/ibis/tests/util.py @@ -30,7 +30,9 @@ def assert_pickle_roundtrip(obj): def schemas_eq(left: ir.Expr, right: ir.Expr) -> bool: - assert left.as_table().schema().equals(right.as_table().schema()) + left_schema = left.as_table().schema() + right_schema = right.as_table().schema() + return left_schema == right_schema def assert_decompile_roundtrip( diff --git a/pyproject.toml b/pyproject.toml index 52cba96bf84f..739a7473fa85 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,6 @@ datafusion = { version = ">=0.6,<34", optional = true } db-dtypes = { version = ">=0.3,<2", optional = true } deltalake = { version = ">=0.9.0,<1", optional = true } duckdb = { version = ">=0.8.1,<1", optional = true } -duckdb-engine = { version = ">=0.1.8,<1", optional = true } geoalchemy2 = { version = ">=0.6.3,<1", optional = true } geopandas = { version = ">=0.6,<1", optional = true } google-cloud-bigquery = { version = ">=3,<4", optional = true } @@ -155,7 +154,6 @@ all = [ "datafusion", "db-dtypes", "duckdb", - "duckdb-engine", "deltalake", "geoalchemy2", "geopandas", @@ -187,11 +185,11 @@ bigquery = [ "google-cloud-bigquery-storage", "pydata-google-auth", ] -clickhouse = ["clickhouse-connect", "sqlalchemy"] +clickhouse = ["clickhouse-connect"] dask = ["dask", "regex"] datafusion = ["datafusion"] druid = ["pydruid", "sqlalchemy"] -duckdb = ["duckdb", "duckdb-engine", "sqlalchemy", "sqlalchemy-views"] +duckdb = ["duckdb"] exasol = ["sqlalchemy", "sqlalchemy-exasol", "sqlalchemy-views"] flink = [] geospatial = ["geoalchemy2", "geopandas", "shapely"] @@ -295,9 +293,6 @@ filterwarnings = [ 'ignore:`np\.bool` is a deprecated alias for the builtin `bool`:DeprecationWarning', # numpy, coming from a pandas call 'ignore:In the future `np\.bool` will be defined as the corresponding NumPy scalar:FutureWarning', - # duckdb-engine - 'ignore:Dialect .+ does \*not\* support Decimal:', - "ignore:duckdb-engine doesn't yet support reflection on indices:", # druid 'ignore:Dialect druid.rest will not make use of SQL compilation caching:', # ibis