Skip to content

docs: blog for the 1 billion row challenge #18121

docs: blog for the 1 billion row challenge

docs: blog for the 1 billion row challenge #18121

Workflow file for this run

name: Docs/Linting/Benchmarks
on:
push:
branches:
- main
- "*.x.x"
pull_request:
branches:
- main
- "*.x.x"
merge_group:
concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true
permissions:
# increase the rate limit for github operations, but limit token permissions
# to read-only
contents: read
jobs:
lint:
runs-on: ubuntu-latest
steps:
- name: checkout
uses: actions/checkout@v4
- name: install nix
uses: cachix/install-nix-action@v25
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
- name: setup cachix
uses: cachix/cachix-action@v14
with:
name: ibis
authToken: ${{ secrets.CACHIX_AUTH_TOKEN }}
extraPullNames: nix-community,poetry2nix
# run against the full shell.nix on push so it gets pushed to cachix
- name: pre-commit checks
run: nix develop '.#preCommit' --ignore-environment --keep-going -c pre-commit run --all-files --show-diff-on-failure --color=always
release_notes_spellcheck:
runs-on: ubuntu-latest
steps:
- name: checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: install nix
uses: cachix/install-nix-action@v25
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
- name: setup cachix
uses: cachix/cachix-action@v14
with:
name: ibis
authToken: ${{ secrets.CACHIX_AUTH_TOKEN }}
extraPullNames: nix-community,poetry2nix
- name: check generated release notes spelling
run: nix run '.#check-release-notes-spelling'
benchmarks:
runs-on: ubuntu-latest
if: github.event_name == 'push'
steps:
- name: checkout
uses: actions/checkout@v4
- name: install python
uses: actions/setup-python@v5
id: install_python
with:
python-version: "3.11"
- name: install system dependencies
run: sudo apt-get install -qq -y build-essential libgeos-dev freetds-dev unixodbc-dev
- uses: syphar/restore-virtualenv@v1
with:
requirement_files: poetry.lock
custom_cache_key_element: benchmarks
- uses: syphar/restore-pip-download-cache@v1
with:
requirement_files: poetry.lock
custom_cache_key_element: benchmarks-${{ steps.install_python.outputs.python-version }}
- run: python -m pip install --upgrade pip 'poetry==1.7.1'
- name: install ibis
run: poetry install --without dev --without docs --all-extras
- name: make benchmark output dir
run: mkdir .benchmarks
- name: benchmark
run: poetry run pytest --benchmark-enable --benchmark-json .benchmarks/output.json ibis/tests/benchmarks
- uses: google-github-actions/auth@v2
with:
credentials_json: ${{ secrets.GCP_CREDENTIALS }}
- uses: google-github-actions/setup-gcloud@v2
- name: show gcloud info
run: gcloud info
- name: download the latest duckdb release
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
set -euo pipefail
gh release download -R duckdb/duckdb --pattern 'duckdb_cli-linux-amd64.zip'
unzip duckdb_cli-linux-amd64.zip
- name: convert json data to parquet
run: |
set -euo pipefail
# sort json keys
jq --sort-keys -rcM < "$PWD/.benchmarks/output.json" > output.json
# connect to a file to allow spilling to disk
./duckdb json2parquet.ddb <<EOF
COPY (
SELECT * FROM read_ndjson_auto('output.json', maximum_object_size=2**27)
) TO 'output.parquet' (FORMAT PARQUET, COMPRESSION ZSTD)
EOF
- name: copy data to gcs
run: |
set -euo pipefail
timestamp="$(date --iso-8601=ns --utc | tr ',' '.')"
gsutil cp output.parquet "gs://ibis-benchmark-data/ci/${timestamp}.parquet"
docs_pr:
runs-on: ubuntu-latest
if: github.event_name == 'pull_request'
concurrency: docs_pr-${{ github.repository }}-${{ github.head_ref || github.sha }}
steps:
- name: install nix
uses: cachix/install-nix-action@v25
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
- name: setup cachix
uses: cachix/cachix-action@v14
with:
name: ibis
authToken: ${{ secrets.CACHIX_AUTH_TOKEN }}
extraPullNames: nix-community,poetry2nix
- name: checkout
uses: actions/checkout@v4
- name: run doctest
# keep HOME because duckdb (which we use for doctests) wants to use
# that for extensions
run: nix develop --ignore-environment --keep HOME -c just doctest
- name: generate api docs
run: nix develop --ignore-environment -c just docs-apigen --verbose
- name: build docs
run: nix develop --ignore-environment --keep HOME -c just docs-render
- name: check that all frozen computations were done before push
run: git diff --exit-code --stat
- name: verify internal links
run: nix develop --ignore-environment '.#links' -c just checklinks --offline --no-progress
docs_push:
runs-on: ubuntu-latest
if: github.event_name == 'push'
concurrency: docs-${{ github.repository }}
steps:
- name: install nix
uses: cachix/install-nix-action@v25
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
- name: setup cachix
uses: cachix/cachix-action@v14
with:
name: ibis
authToken: ${{ secrets.CACHIX_AUTH_TOKEN }}
extraPullNames: nix-community,poetry2nix
- name: checkout
uses: actions/checkout@v4
- name: run doctests
# keep HOME because duckdb (which we use for doctests) wants to use
# that for extensions
run: nix develop --ignore-environment --keep HOME -c just doctest
- name: build api docs
run: nix develop --ignore-environment -c just docs-apigen --verbose
- name: build docs
run: nix develop --ignore-environment --keep HOME -c just docs-render
- name: check that all frozen computations were done before push
run: git diff --exit-code --stat
- name: verify internal links
run: nix develop --ignore-environment '.#links' -c just checklinks --offline --no-progress
- name: build and push quarto docs
run: nix develop --ignore-environment --keep NETLIFY_AUTH_TOKEN -c just docs-deploy
env:
NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }}
simulate_release:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: cachix/install-nix-action@v25
with:
extra_nix_config: |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}
- uses: cachix/cachix-action@v14
with:
name: ibis
authToken: ${{ secrets.CACHIX_AUTH_TOKEN }}
extraPullNames: nix-community,poetry2nix
- name: Configure git info
run: |
set -euo pipefail
# not incredibly important what user we use here
#
# we're making a commit in a temporary worktree that is thrown away
# if the process exits successfully
#
# git requires user information to make commits
git config user.name 'ibis-squawk-bot[bot]'
git config user.email 'ibis-squawk-bot[bot]@users.noreply.github.com'
- name: run semantic-release
run: ./ci/release/dry_run.sh