Skip to content

Enable string-based column projections from Parquet files #8766

Enable string-based column projections from Parquet files

Enable string-based column projections from Parquet files #8766

Workflow file for this run

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
---
# tests for parquet crate
name: "parquet"
concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true
# trigger for all PRs that touch certain files and changes to main
on:
push:
branches:
- main
pull_request:
paths:
- arrow/**
- arrow-array/**
- arrow-buffer/**
- arrow-cast/**
- arrow-data/**
- arrow-schema/**
- arrow-select/**
- arrow-ipc/**
- arrow-csv/**
- arrow-json/**
- arrow-avro/**
- parquet/**
- .github/**
jobs:
# test the crate
linux-test:
name: Test
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
- name: Test
run: cargo test -p parquet
- name: Test --all-features
run: cargo test -p parquet --all-features
- name: Run examples
run: |
# Test parquet examples
cargo run -p parquet --example read_parquet
cargo run -p parquet --example async_read_parquet --features="async"
cargo run -p parquet --example read_with_rowgroup --features="async"
# test compilation
linux-features:
name: Check Compilation
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
# Run different tests for the library on its own as well as
# all targets to ensure that it still works in the absence of
# features that might be enabled by dev-dependencies of other
# targets.
#
# This for each of (library and all-targets), check
# 1. compiles with default features
# 1. compiles with no default features
# 3. compiles with just arrow feature
# 3. compiles with all features
- name: Check compilation
run: cargo check -p parquet
- name: Check compilation --no-default-features
run: cargo check -p parquet --no-default-features
- name: Check compilation --no-default-features --features arrow
run: cargo check -p parquet --no-default-features --features arrow
- name: Check compilation --no-default-features --all-features
run: cargo check -p parquet --all-features
- name: Check compilation --all-targets
run: cargo check -p parquet --all-targets
- name: Check compilation --all-targets --no-default-features
run: cargo check -p parquet --all-targets --no-default-features
- name: Check compilation --all-targets --no-default-features --features arrow
run: cargo check -p parquet --all-targets --no-default-features --features arrow
- name: Check compilation --all-targets --all-features
run: cargo check -p parquet --all-targets --all-features
- name: Check compilation --all-targets --no-default-features --features json
run: cargo check -p parquet --all-targets --no-default-features --features json
# test the parquet crate builds against wasm32 in stable rust
wasm32-build:
name: Build wasm32
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
target: wasm32-unknown-unknown,wasm32-wasi
- name: Install clang # Needed for zlib compilation
run: apt-get update && apt-get install -y clang gcc-multilib
- name: Build wasm32-unknown-unknown
run: cargo build -p parquet --target wasm32-unknown-unknown
- name: Build wasm32-wasi
run: cargo build -p parquet --target wasm32-wasi
pyspark-integration-test:
name: PySpark Integration Test
runs-on: ubuntu-latest
strategy:
matrix:
rust: [ stable ]
steps:
- uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: "3.10"
cache: "pip"
- name: Install Python dependencies
run: |
cd parquet/pytest
pip install -r requirements.txt
- name: Black check the test files
run: |
cd parquet/pytest
black --check *.py --verbose
- name: Setup Rust toolchain
run: |
rustup toolchain install ${{ matrix.rust }}
rustup default ${{ matrix.rust }}
- name: Install binary for checking
run: |
cargo install --path parquet --bin parquet-show-bloom-filter --features=cli
cargo install --path parquet --bin parquet-fromcsv --features=arrow,cli
- name: Run pytest
run: |
cd parquet/pytest
pytest -v
clippy:
name: Clippy
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
- name: Setup Clippy
run: rustup component add clippy
- name: Run clippy
run: cargo clippy -p parquet --all-targets --all-features -- -D warnings