docs: blog for the 1 billion row challenge #18121
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Docs/Linting/Benchmarks | |
on: | |
push: | |
branches: | |
- main | |
- "*.x.x" | |
pull_request: | |
branches: | |
- main | |
- "*.x.x" | |
merge_group: | |
concurrency: | |
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} | |
cancel-in-progress: true | |
permissions: | |
# increase the rate limit for github operations, but limit token permissions | |
# to read-only | |
contents: read | |
jobs: | |
lint: | |
runs-on: ubuntu-latest | |
steps: | |
- name: checkout | |
uses: actions/checkout@v4 | |
- name: install nix | |
uses: cachix/install-nix-action@v25 | |
with: | |
extra_nix_config: | | |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }} | |
- name: setup cachix | |
uses: cachix/cachix-action@v14 | |
with: | |
name: ibis | |
authToken: ${{ secrets.CACHIX_AUTH_TOKEN }} | |
extraPullNames: nix-community,poetry2nix | |
# run against the full shell.nix on push so it gets pushed to cachix | |
- name: pre-commit checks | |
run: nix develop '.#preCommit' --ignore-environment --keep-going -c pre-commit run --all-files --show-diff-on-failure --color=always | |
release_notes_spellcheck: | |
runs-on: ubuntu-latest | |
steps: | |
- name: checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: install nix | |
uses: cachix/install-nix-action@v25 | |
with: | |
extra_nix_config: | | |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }} | |
- name: setup cachix | |
uses: cachix/cachix-action@v14 | |
with: | |
name: ibis | |
authToken: ${{ secrets.CACHIX_AUTH_TOKEN }} | |
extraPullNames: nix-community,poetry2nix | |
- name: check generated release notes spelling | |
run: nix run '.#check-release-notes-spelling' | |
benchmarks: | |
runs-on: ubuntu-latest | |
if: github.event_name == 'push' | |
steps: | |
- name: checkout | |
uses: actions/checkout@v4 | |
- name: install python | |
uses: actions/setup-python@v5 | |
id: install_python | |
with: | |
python-version: "3.11" | |
- name: install system dependencies | |
run: sudo apt-get install -qq -y build-essential libgeos-dev freetds-dev unixodbc-dev | |
- uses: syphar/restore-virtualenv@v1 | |
with: | |
requirement_files: poetry.lock | |
custom_cache_key_element: benchmarks | |
- uses: syphar/restore-pip-download-cache@v1 | |
with: | |
requirement_files: poetry.lock | |
custom_cache_key_element: benchmarks-${{ steps.install_python.outputs.python-version }} | |
- run: python -m pip install --upgrade pip 'poetry==1.7.1' | |
- name: install ibis | |
run: poetry install --without dev --without docs --all-extras | |
- name: make benchmark output dir | |
run: mkdir .benchmarks | |
- name: benchmark | |
run: poetry run pytest --benchmark-enable --benchmark-json .benchmarks/output.json ibis/tests/benchmarks | |
- uses: google-github-actions/auth@v2 | |
with: | |
credentials_json: ${{ secrets.GCP_CREDENTIALS }} | |
- uses: google-github-actions/setup-gcloud@v2 | |
- name: show gcloud info | |
run: gcloud info | |
- name: download the latest duckdb release | |
env: | |
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
run: | | |
set -euo pipefail | |
gh release download -R duckdb/duckdb --pattern 'duckdb_cli-linux-amd64.zip' | |
unzip duckdb_cli-linux-amd64.zip | |
- name: convert json data to parquet | |
run: | | |
set -euo pipefail | |
# sort json keys | |
jq --sort-keys -rcM < "$PWD/.benchmarks/output.json" > output.json | |
# connect to a file to allow spilling to disk | |
./duckdb json2parquet.ddb <<EOF | |
COPY ( | |
SELECT * FROM read_ndjson_auto('output.json', maximum_object_size=2**27) | |
) TO 'output.parquet' (FORMAT PARQUET, COMPRESSION ZSTD) | |
EOF | |
- name: copy data to gcs | |
run: | | |
set -euo pipefail | |
timestamp="$(date --iso-8601=ns --utc | tr ',' '.')" | |
gsutil cp output.parquet "gs://ibis-benchmark-data/ci/${timestamp}.parquet" | |
docs_pr: | |
runs-on: ubuntu-latest | |
if: github.event_name == 'pull_request' | |
concurrency: docs_pr-${{ github.repository }}-${{ github.head_ref || github.sha }} | |
steps: | |
- name: install nix | |
uses: cachix/install-nix-action@v25 | |
with: | |
extra_nix_config: | | |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }} | |
- name: setup cachix | |
uses: cachix/cachix-action@v14 | |
with: | |
name: ibis | |
authToken: ${{ secrets.CACHIX_AUTH_TOKEN }} | |
extraPullNames: nix-community,poetry2nix | |
- name: checkout | |
uses: actions/checkout@v4 | |
- name: run doctest | |
# keep HOME because duckdb (which we use for doctests) wants to use | |
# that for extensions | |
run: nix develop --ignore-environment --keep HOME -c just doctest | |
- name: generate api docs | |
run: nix develop --ignore-environment -c just docs-apigen --verbose | |
- name: build docs | |
run: nix develop --ignore-environment --keep HOME -c just docs-render | |
- name: check that all frozen computations were done before push | |
run: git diff --exit-code --stat | |
- name: verify internal links | |
run: nix develop --ignore-environment '.#links' -c just checklinks --offline --no-progress | |
docs_push: | |
runs-on: ubuntu-latest | |
if: github.event_name == 'push' | |
concurrency: docs-${{ github.repository }} | |
steps: | |
- name: install nix | |
uses: cachix/install-nix-action@v25 | |
with: | |
extra_nix_config: | | |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }} | |
- name: setup cachix | |
uses: cachix/cachix-action@v14 | |
with: | |
name: ibis | |
authToken: ${{ secrets.CACHIX_AUTH_TOKEN }} | |
extraPullNames: nix-community,poetry2nix | |
- name: checkout | |
uses: actions/checkout@v4 | |
- name: run doctests | |
# keep HOME because duckdb (which we use for doctests) wants to use | |
# that for extensions | |
run: nix develop --ignore-environment --keep HOME -c just doctest | |
- name: build api docs | |
run: nix develop --ignore-environment -c just docs-apigen --verbose | |
- name: build docs | |
run: nix develop --ignore-environment --keep HOME -c just docs-render | |
- name: check that all frozen computations were done before push | |
run: git diff --exit-code --stat | |
- name: verify internal links | |
run: nix develop --ignore-environment '.#links' -c just checklinks --offline --no-progress | |
- name: build and push quarto docs | |
run: nix develop --ignore-environment --keep NETLIFY_AUTH_TOKEN -c just docs-deploy | |
env: | |
NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }} | |
simulate_release: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- uses: cachix/install-nix-action@v25 | |
with: | |
extra_nix_config: | | |
access-tokens = github.com=${{ secrets.GITHUB_TOKEN }} | |
- uses: cachix/cachix-action@v14 | |
with: | |
name: ibis | |
authToken: ${{ secrets.CACHIX_AUTH_TOKEN }} | |
extraPullNames: nix-community,poetry2nix | |
- name: Configure git info | |
run: | | |
set -euo pipefail | |
# not incredibly important what user we use here | |
# | |
# we're making a commit in a temporary worktree that is thrown away | |
# if the process exits successfully | |
# | |
# git requires user information to make commits | |
git config user.name 'ibis-squawk-bot[bot]' | |
git config user.email 'ibis-squawk-bot[bot]@users.noreply.github.com' | |
- name: run semantic-release | |
run: ./ci/release/dry_run.sh |