mirror of
https://github.com/langchain-ai/datafusion.git
synced 2026-07-01 21:24:06 -04:00
ddee4717af
* chore: fix license header and add checker Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * add CI checker Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * ignore generated files Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * replace RAT with hawkeye Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix new header errors Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
606 lines
20 KiB
YAML
606 lines
20 KiB
YAML
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
name: Rust
|
|
|
|
concurrency:
|
|
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
|
|
cancel-in-progress: true
|
|
|
|
on:
|
|
push:
|
|
paths-ignore:
|
|
- "docs/**"
|
|
- "**.md"
|
|
- ".github/ISSUE_TEMPLATE/**"
|
|
- ".github/pull_request_template.md"
|
|
pull_request:
|
|
paths-ignore:
|
|
- "docs/**"
|
|
- "**.md"
|
|
- ".github/ISSUE_TEMPLATE/**"
|
|
- ".github/pull_request_template.md"
|
|
# manual trigger
|
|
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
|
|
workflow_dispatch:
|
|
|
|
jobs:
|
|
# Check license header
|
|
license-header-check:
|
|
runs-on: ubuntu-20.04
|
|
name: Check License Header
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- uses: korandoru/hawkeye@v5
|
|
|
|
# Check crate compiles
|
|
linux-build-lib:
|
|
name: cargo check
|
|
runs-on: ubuntu-latest
|
|
container:
|
|
image: amd64/rust
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- name: Setup Rust toolchain
|
|
uses: ./.github/actions/setup-builder
|
|
with:
|
|
rust-version: stable
|
|
|
|
- name: Cache Cargo
|
|
uses: actions/cache@v4
|
|
with:
|
|
path: |
|
|
~/.cargo/bin/
|
|
~/.cargo/registry/index/
|
|
~/.cargo/registry/cache/
|
|
~/.cargo/git/db/
|
|
./target/
|
|
./datafusion-cli/target/
|
|
key: cargo-cache-${{ hashFiles('**/Cargo.toml', '**/Cargo.lock') }}
|
|
|
|
- name: Check datafusion without default features
|
|
# Some of the test binaries require the parquet feature still
|
|
#run: cargo check --all-targets --no-default-features -p datafusion
|
|
run: cargo check --no-default-features -p datafusion
|
|
|
|
- name: Check datafusion-common without default features
|
|
run: cargo check --all-targets --no-default-features -p datafusion-common
|
|
|
|
- name: Check datafusion-functions
|
|
run: cargo check --all-targets --no-default-features -p datafusion-functions
|
|
|
|
- name: Check workspace in debug mode
|
|
run: cargo check --all-targets --workspace
|
|
|
|
- name: Check workspace with avro,json features
|
|
run: cargo check --workspace --benches --features avro,json
|
|
|
|
- name: Check Cargo.lock for datafusion-cli
|
|
run: |
|
|
# If this test fails, try running `cargo update` in the `datafusion-cli` directory
|
|
# and check in the updated Cargo.lock file.
|
|
cargo check --manifest-path datafusion-cli/Cargo.toml --locked
|
|
|
|
# Ensure that the datafusion crate can be built with only a subset of the function
|
|
# packages enabled.
|
|
- name: Check datafusion (nested_expressions)
|
|
run: cargo check --no-default-features --features=nested_expressions -p datafusion
|
|
|
|
- name: Check datafusion (crypto)
|
|
run: cargo check --no-default-features --features=crypto_expressions -p datafusion
|
|
|
|
- name: Check datafusion (datetime_expressions)
|
|
run: cargo check --no-default-features --features=datetime_expressions -p datafusion
|
|
|
|
- name: Check datafusion (encoding_expressions)
|
|
run: cargo check --no-default-features --features=encoding_expressions -p datafusion
|
|
|
|
- name: Check datafusion (math_expressions)
|
|
run: cargo check --no-default-features --features=math_expressions -p datafusion
|
|
|
|
- name: Check datafusion (regex_expressions)
|
|
run: cargo check --no-default-features --features=regex_expressions -p datafusion
|
|
|
|
- name: Check datafusion (string_expressions)
|
|
run: cargo check --no-default-features --features=string_expressions -p datafusion
|
|
|
|
# Ensure that the datafusion-functions crate can be built with only a subset of the function
|
|
# packages enabled.
|
|
- name: Check datafusion-functions (crypto)
|
|
run: cargo check --all-targets --no-default-features --features=crypto_expressions -p datafusion-functions
|
|
|
|
- name: Check datafusion-functions (datetime_expressions)
|
|
run: cargo check --all-targets --no-default-features --features=datetime_expressions -p datafusion-functions
|
|
|
|
- name: Check datafusion-functions (encoding_expressions)
|
|
run: cargo check --all-targets --no-default-features --features=encoding_expressions -p datafusion-functions
|
|
|
|
- name: Check datafusion-functions (math_expressions)
|
|
run: cargo check --all-targets --no-default-features --features=math_expressions -p datafusion-functions
|
|
|
|
- name: Check datafusion-functions (regex_expressions)
|
|
run: cargo check --all-targets --no-default-features --features=regex_expressions -p datafusion-functions
|
|
|
|
- name: Check datafusion-functions (string_expressions)
|
|
run: cargo check --all-targets --no-default-features --features=string_expressions -p datafusion-functions
|
|
|
|
# Run tests
|
|
linux-test:
|
|
name: cargo test (amd64)
|
|
needs: [ linux-build-lib ]
|
|
runs-on: ubuntu-latest
|
|
container:
|
|
image: amd64/rust
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
submodules: true
|
|
- name: Setup Rust toolchain
|
|
uses: ./.github/actions/setup-builder
|
|
with:
|
|
rust-version: stable
|
|
- name: Run tests (excluding doctests)
|
|
run: cargo test --lib --tests --bins --features avro,json,backtrace
|
|
- name: Verify Working Directory Clean
|
|
run: git diff --exit-code
|
|
|
|
linux-test-datafusion-cli:
|
|
name: cargo test datafusion-cli (amd64)
|
|
needs: [ linux-build-lib ]
|
|
runs-on: ubuntu-latest
|
|
container:
|
|
image: amd64/rust
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
submodules: true
|
|
- name: Setup Rust toolchain
|
|
uses: ./.github/actions/setup-builder
|
|
with:
|
|
rust-version: stable
|
|
- name: Run tests (excluding doctests)
|
|
run: |
|
|
cd datafusion-cli
|
|
cargo test --lib --tests --bins --all-features
|
|
- name: Verify Working Directory Clean
|
|
run: git diff --exit-code
|
|
|
|
linux-test-example:
|
|
name: cargo examples (amd64)
|
|
needs: [ linux-build-lib ]
|
|
runs-on: ubuntu-latest
|
|
container:
|
|
image: amd64/rust
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
submodules: true
|
|
- name: Setup Rust toolchain
|
|
uses: ./.github/actions/setup-builder
|
|
with:
|
|
rust-version: stable
|
|
- name: Run examples
|
|
run: |
|
|
# test datafusion-sql examples
|
|
cargo run --example sql
|
|
# test datafusion-examples
|
|
ci/scripts/rust_example.sh
|
|
- name: Verify Working Directory Clean
|
|
run: git diff --exit-code
|
|
|
|
|
|
|
|
# Run `cargo test doc` (test documentation examples)
|
|
linux-test-doc:
|
|
name: cargo test doc (amd64)
|
|
needs: [ linux-build-lib ]
|
|
runs-on: ubuntu-latest
|
|
container:
|
|
image: amd64/rust
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
submodules: true
|
|
- name: Setup Rust toolchain
|
|
uses: ./.github/actions/setup-builder
|
|
with:
|
|
rust-version: stable
|
|
- name: Run doctests
|
|
run: |
|
|
cargo test --doc --features avro,json
|
|
cd datafusion-cli
|
|
cargo test --doc --all-features
|
|
- name: Verify Working Directory Clean
|
|
run: git diff --exit-code
|
|
|
|
# Run `cargo doc` to ensure the rustdoc is clean
|
|
linux-rustdoc:
|
|
name: cargo doc
|
|
needs: [ linux-build-lib ]
|
|
runs-on: ubuntu-latest
|
|
container:
|
|
image: amd64/rust
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- name: Setup Rust toolchain
|
|
uses: ./.github/actions/setup-builder
|
|
with:
|
|
rust-version: stable
|
|
- name: Run cargo doc
|
|
run: ci/scripts/rust_docs.sh
|
|
|
|
linux-wasm-pack:
|
|
name: build with wasm-pack
|
|
runs-on: ubuntu-latest
|
|
container:
|
|
image: amd64/rust
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- name: Setup Rust toolchain
|
|
uses: ./.github/actions/setup-builder
|
|
with:
|
|
rust-version: stable
|
|
- name: Install wasm-pack
|
|
run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
|
|
- name: Build with wasm-pack
|
|
working-directory: ./datafusion/wasmtest
|
|
run: wasm-pack build --dev
|
|
|
|
# verify that the benchmark queries return the correct results
|
|
verify-benchmark-results:
|
|
name: verify benchmark results (amd64)
|
|
needs: [ linux-build-lib ]
|
|
runs-on: ubuntu-latest
|
|
container:
|
|
image: amd64/rust
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
submodules: true
|
|
- name: Setup Rust toolchain
|
|
uses: ./.github/actions/setup-builder
|
|
with:
|
|
rust-version: stable
|
|
- name: Generate benchmark data and expected query results
|
|
run: |
|
|
mkdir -p datafusion/sqllogictest/test_files/tpch/data
|
|
git clone https://github.com/databricks/tpch-dbgen.git
|
|
cd tpch-dbgen
|
|
make
|
|
./dbgen -f -s 0.1
|
|
mv *.tbl ../datafusion/sqllogictest/test_files/tpch/data
|
|
- name: Verify that benchmark queries return expected results
|
|
run: |
|
|
export TPCH_DATA=`realpath datafusion/sqllogictest/test_files/tpch/data`
|
|
# use release build for plan verificaton because debug build causes stack overflow
|
|
cargo test plan_q --package datafusion-benchmarks --profile release-nonlto --features=ci -- --test-threads=1
|
|
INCLUDE_TPCH=true cargo test --test sqllogictests
|
|
- name: Verify Working Directory Clean
|
|
run: git diff --exit-code
|
|
|
|
sqllogictest-postgres:
|
|
name: "Run sqllogictest with Postgres runner"
|
|
needs: [ linux-build-lib ]
|
|
runs-on: ubuntu-latest
|
|
services:
|
|
postgres:
|
|
image: postgres:15
|
|
env:
|
|
POSTGRES_PASSWORD: postgres
|
|
POSTGRES_DB: db_test
|
|
POSTGRES_INITDB_ARGS: --encoding=UTF-8 --lc-collate=C --lc-ctype=C
|
|
ports:
|
|
- 5432/tcp
|
|
options: >-
|
|
--health-cmd pg_isready
|
|
--health-interval 10s
|
|
--health-timeout 5s
|
|
--health-retries 5
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
submodules: true
|
|
- name: Setup toolchain
|
|
run: |
|
|
rustup toolchain install stable
|
|
rustup default stable
|
|
- name: Run sqllogictest
|
|
run: PG_COMPAT=true PG_URI="postgresql://postgres:postgres@localhost:$POSTGRES_PORT/db_test" cargo test --features=postgres --test sqllogictests
|
|
env:
|
|
POSTGRES_PORT: ${{ job.services.postgres.ports[5432] }}
|
|
|
|
windows:
|
|
name: cargo test (win64)
|
|
runs-on: windows-latest
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
submodules: true
|
|
- name: Setup Rust toolchain
|
|
uses: ./.github/actions/setup-windows-builder
|
|
- name: Run tests (excluding doctests)
|
|
shell: bash
|
|
run: |
|
|
export PATH=$PATH:$HOME/d/protoc/bin
|
|
cargo test --lib --tests --bins --features avro,json,backtrace
|
|
cd datafusion-cli
|
|
cargo test --lib --tests --bins --all-features
|
|
|
|
macos:
|
|
name: cargo test (macos)
|
|
runs-on: macos-latest
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
submodules: true
|
|
- name: Setup Rust toolchain
|
|
uses: ./.github/actions/setup-macos-builder
|
|
- name: Run tests (excluding doctests)
|
|
shell: bash
|
|
run: |
|
|
cargo test --lib --tests --bins --features avro,json,backtrace
|
|
cd datafusion-cli
|
|
cargo test --lib --tests --bins --all-features
|
|
|
|
macos-aarch64:
|
|
name: cargo test (macos-aarch64)
|
|
runs-on: macos-14
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
submodules: true
|
|
- name: Setup Rust toolchain
|
|
uses: ./.github/actions/setup-macos-aarch64-builder
|
|
- name: Run tests (excluding doctests)
|
|
shell: bash
|
|
run: |
|
|
cargo test --lib --tests --bins --features avro,json,backtrace
|
|
cd datafusion-cli
|
|
cargo test --lib --tests --bins --all-features
|
|
|
|
test-datafusion-pyarrow:
|
|
name: cargo test pyarrow (amd64)
|
|
needs: [ linux-build-lib ]
|
|
runs-on: ubuntu-20.04
|
|
container:
|
|
image: amd64/rust:bullseye # Workaround https://github.com/actions/setup-python/issues/721
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
submodules: true
|
|
- uses: actions/setup-python@v5
|
|
with:
|
|
python-version: "3.8"
|
|
- name: Install PyArrow
|
|
run: |
|
|
echo "LIBRARY_PATH=$LD_LIBRARY_PATH" >> $GITHUB_ENV
|
|
python -m pip install pyarrow
|
|
- name: Setup Rust toolchain
|
|
uses: ./.github/actions/setup-builder
|
|
with:
|
|
rust-version: stable
|
|
- name: Run datafusion-common tests
|
|
run: cargo test -p datafusion-common --features=pyarrow
|
|
|
|
vendor:
|
|
name: Verify Vendored Code
|
|
runs-on: ubuntu-latest
|
|
container:
|
|
image: amd64/rust
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- name: Setup Rust toolchain
|
|
uses: ./.github/actions/setup-builder
|
|
- name: Run gen
|
|
run: ./regen.sh
|
|
working-directory: ./datafusion/proto
|
|
- name: Verify workspace clean (if this fails, run ./datafusion/proto/regen.sh and check in results)
|
|
run: git diff --exit-code
|
|
|
|
check-fmt:
|
|
name: Check cargo fmt
|
|
runs-on: ubuntu-latest
|
|
container:
|
|
image: amd64/rust
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- name: Setup Rust toolchain
|
|
uses: ./.github/actions/setup-builder
|
|
with:
|
|
rust-version: stable
|
|
- name: Run
|
|
run: |
|
|
echo '' > datafusion/proto/src/generated/datafusion.rs
|
|
ci/scripts/rust_fmt.sh
|
|
|
|
# Coverage job disabled due to
|
|
# https://github.com/apache/datafusion/issues/3678
|
|
|
|
# coverage:
|
|
# name: coverage
|
|
# runs-on: ubuntu-latest
|
|
# steps:
|
|
# - uses: actions/checkout@v4
|
|
# with:
|
|
# submodules: true
|
|
# - name: Install protobuf compiler
|
|
# shell: bash
|
|
# run: |
|
|
# mkdir -p $HOME/d/protoc
|
|
# cd $HOME/d/protoc
|
|
# export PROTO_ZIP="protoc-21.4-linux-x86_64.zip"
|
|
# curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v21.4/$PROTO_ZIP
|
|
# unzip $PROTO_ZIP
|
|
# export PATH=$PATH:$HOME/d/protoc/bin
|
|
# protoc --version
|
|
# - name: Setup Rust toolchain
|
|
# run: |
|
|
# rustup toolchain install stable
|
|
# rustup default stable
|
|
# rustup component add rustfmt clippy
|
|
# - name: Cache Cargo
|
|
# uses: actions/cache@v4
|
|
# with:
|
|
# path: /home/runner/.cargo
|
|
# # this key is not equal because the user is different than on a container (runner vs github)
|
|
# key: cargo-coverage-cache3-
|
|
# - name: Run coverage
|
|
# run: |
|
|
# export PATH=$PATH:$HOME/d/protoc/bin
|
|
# rustup toolchain install stable
|
|
# rustup default stable
|
|
# cargo install --version 0.20.1 cargo-tarpaulin
|
|
# cargo tarpaulin --all --out Xml
|
|
# - name: Report coverage
|
|
# continue-on-error: true
|
|
# run: bash <(curl -s https://codecov.io/bash)
|
|
|
|
clippy:
|
|
name: clippy
|
|
needs: [ linux-build-lib ]
|
|
runs-on: ubuntu-latest
|
|
container:
|
|
image: amd64/rust
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
submodules: true
|
|
- name: Setup Rust toolchain
|
|
uses: ./.github/actions/setup-builder
|
|
with:
|
|
rust-version: stable
|
|
- name: Install Clippy
|
|
run: rustup component add clippy
|
|
- name: Run clippy
|
|
run: ci/scripts/rust_clippy.sh
|
|
|
|
# Check answers are correct when hash values collide
|
|
hash-collisions:
|
|
name: cargo test hash collisions (amd64)
|
|
needs: [ linux-build-lib ]
|
|
runs-on: ubuntu-latest
|
|
container:
|
|
image: amd64/rust
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
submodules: true
|
|
- name: Setup Rust toolchain
|
|
uses: ./.github/actions/setup-builder
|
|
with:
|
|
rust-version: stable
|
|
- name: Run tests
|
|
run: |
|
|
cd datafusion
|
|
cargo test --lib --tests --features=force_hash_collisions,avro
|
|
|
|
cargo-toml-formatting-checks:
|
|
name: check Cargo.toml formatting
|
|
needs: [ linux-build-lib ]
|
|
runs-on: ubuntu-latest
|
|
container:
|
|
image: amd64/rust
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
submodules: true
|
|
- name: Setup Rust toolchain
|
|
uses: ./.github/actions/setup-builder
|
|
with:
|
|
rust-version: stable
|
|
- name: Install taplo
|
|
run: cargo +stable install taplo-cli --version ^0.9 --locked
|
|
# if you encounter an error, try running 'taplo format' to fix the formatting automatically.
|
|
- name: Check Cargo.toml formatting
|
|
run: taplo format --check
|
|
|
|
config-docs-check:
|
|
name: check configs.md and ***_functions.md is up-to-date
|
|
needs: [ linux-build-lib ]
|
|
runs-on: ubuntu-latest
|
|
container:
|
|
image: amd64/rust
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
submodules: true
|
|
- name: Setup Rust toolchain
|
|
uses: ./.github/actions/setup-builder
|
|
with:
|
|
rust-version: stable
|
|
- uses: actions/setup-node@v4
|
|
with:
|
|
node-version: "20"
|
|
- name: Check if configs.md has been modified
|
|
run: |
|
|
# If you encounter an error, run './dev/update_config_docs.sh' and commit
|
|
./dev/update_config_docs.sh
|
|
git diff --exit-code
|
|
- name: Check if any of the ***_functions.md has been modified
|
|
run: |
|
|
# If you encounter an error, run './dev/update_function_docs.sh' and commit
|
|
./dev/update_function_docs.sh
|
|
git diff --exit-code
|
|
|
|
# Verify MSRV for the crates which are directly used by other projects:
|
|
# - datafusion
|
|
# - datafusion-substrait
|
|
# - datafusion-proto
|
|
# - datafusion-cli
|
|
msrv:
|
|
name: Verify MSRV (Min Supported Rust Version)
|
|
runs-on: ubuntu-latest
|
|
container:
|
|
image: amd64/rust
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- name: Setup Rust toolchain
|
|
uses: ./.github/actions/setup-builder
|
|
- name: Install cargo-msrv
|
|
run: cargo install cargo-msrv
|
|
- name: Check datafusion
|
|
working-directory: datafusion/core
|
|
run: |
|
|
# If you encounter an error with any of the commands below it means
|
|
# your code or some crate in the dependency tree has a higher MSRV
|
|
# (Min Supported Rust Version) than the one specified in the
|
|
# `rust-version` key of `Cargo.toml`.
|
|
#
|
|
# To reproduce:
|
|
# 1. Install the version of Rust that is failing. Example:
|
|
# rustup install 1.79.0
|
|
# 2. Run the command that failed with that version. Example:
|
|
# cargo +1.79.0 check -p datafusion
|
|
#
|
|
# To resolve, either:
|
|
# 1. Change your code to use older Rust features,
|
|
# 2. Revert dependency update
|
|
# 3. Update the MSRV version in `Cargo.toml`
|
|
#
|
|
# Please see the DataFusion Rust Version Compatibility Policy before
|
|
# updating Cargo.toml. You may have to update the code instead.
|
|
# https://github.com/apache/datafusion/blob/main/README.md#rust-version-compatibility-policy
|
|
cargo msrv --output-format json --log-target stdout verify
|
|
- name: Check datafusion-substrait
|
|
working-directory: datafusion/substrait
|
|
run: cargo msrv --output-format json --log-target stdout verify
|
|
- name: Check datafusion-proto
|
|
working-directory: datafusion/proto
|
|
run: cargo msrv --output-format json --log-target stdout verify
|
|
- name: Check datafusion-cli
|
|
working-directory: datafusion-cli
|
|
run: cargo msrv --output-format json --log-target stdout verify |