Runs-on for extended CI checks (#20511)

part of https://github.com/apache/datafusion/issues/20052

## Which issue does this PR close?


example run:
https://github.com/apache/datafusion/actions/runs/22325922758

this recused the run time from 3h to 1h. still a lot (on my mac it runs
in 5m!) but that's a start

---------

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Dmitrii Blaginin
2026-02-24 10:34:49 +00:00
committed by GitHub
parent d59cdfe999
commit 11ef486e6c
2 changed files with 57 additions and 8 deletions
+16 -6
View File
@@ -66,9 +66,10 @@ jobs:
# Check crate compiles and base cargo check passes
linux-build-lib:
name: linux build test
runs-on: ubuntu-latest
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=8,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
# note: do not use amd/rust container to preserve disk space
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push
@@ -80,7 +81,9 @@ jobs:
source $HOME/.cargo/env
rustup toolchain install
- name: Install Protobuf Compiler
run: sudo apt-get install -y protobuf-compiler
run: |
sudo apt-get update
sudo apt-get install -y protobuf-compiler
- name: Prepare cargo build
run: |
cargo check --profile ci --all-targets
@@ -90,9 +93,11 @@ jobs:
linux-test-extended:
name: cargo test 'extended_tests' (amd64)
needs: [linux-build-lib]
runs-on: ubuntu-latest
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=32,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion,spot=false', github.run_id) || 'ubuntu-latest' }}
# spot=false because the tests are long, https://runs-on.com/configuration/spot-instances/#disable-spot-pricing
# note: do not use amd/rust container to preserve disk space
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push
@@ -106,7 +111,9 @@ jobs:
source $HOME/.cargo/env
rustup toolchain install
- name: Install Protobuf Compiler
run: sudo apt-get install -y protobuf-compiler
run: |
sudo apt-get update
sudo apt-get install -y protobuf-compiler
# For debugging, test binaries can be large.
- name: Show available disk space
run: |
@@ -133,10 +140,11 @@ jobs:
# Check answers are correct when hash values collide
hash-collisions:
name: cargo test hash collisions (amd64)
runs-on: ubuntu-latest
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
container:
image: amd64/rust
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push
@@ -154,10 +162,12 @@ jobs:
sqllogictest-sqlite:
name: "Run sqllogictests with the sqlite test suite"
runs-on: ubuntu-latest
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=48,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion,spot=false', github.run_id) || 'ubuntu-latest' }}
# spot=false because the tests are long, https://runs-on.com/configuration/spot-instances/#disable-spot-pricing
container:
image: amd64/rust
steps:
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push
+41 -2
View File
@@ -44,9 +44,11 @@ use datafusion::common::runtime::SpawnedTask;
use futures::FutureExt;
use std::ffi::OsStr;
use std::fs;
use std::io::{IsTerminal, stdout};
use std::io::{IsTerminal, stderr, stdout};
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
#[cfg(feature = "postgres")]
mod postgres_container;
@@ -110,6 +112,13 @@ async fn run_tests() -> Result<()> {
options.warn_on_ignored();
// Print parallelism info for debugging CI performance
eprintln!(
"Running with {} test threads (available parallelism: {})",
options.test_threads,
get_available_parallelism()
);
#[cfg(feature = "postgres")]
initialize_postgres_container(&options).await?;
@@ -147,6 +156,10 @@ async fn run_tests() -> Result<()> {
}
let num_tests = test_files.len();
// For CI environments without TTY, print progress periodically
let is_ci = !stderr().is_terminal();
let completed_count = Arc::new(AtomicUsize::new(0));
let errors: Vec<_> = futures::stream::iter(test_files)
.map(|test_file| {
let validator = if options.include_sqlite
@@ -162,10 +175,12 @@ async fn run_tests() -> Result<()> {
let filters = options.filters.clone();
let relative_path = test_file.relative_path.clone();
let relative_path_for_timing = test_file.relative_path.clone();
let currently_running_sql_tracker = CurrentlyExecutingSqlTracker::new();
let currently_running_sql_tracker_clone =
currently_running_sql_tracker.clone();
let file_start = Instant::now();
SpawnedTask::spawn(async move {
match (
options.postgres_runner,
@@ -227,14 +242,38 @@ async fn run_tests() -> Result<()> {
)
.await?
}
};
// Log slow files (>30s) for CI debugging
let elapsed = file_start.elapsed();
if elapsed.as_secs() > 30 {
eprintln!(
"Slow file: {} took {:.1}s",
relative_path_for_timing.display(),
elapsed.as_secs_f64()
);
}
Ok(()) as Result<()>
Ok(())
})
.join()
.map(move |result| (result, relative_path, currently_running_sql_tracker))
})
// run up to num_cpus streams in parallel
.buffer_unordered(options.test_threads)
.inspect({
let completed_count = Arc::clone(&completed_count);
move |_| {
let completed = completed_count.fetch_add(1, Ordering::Relaxed) + 1;
// In CI (no TTY), print progress every 10% or every 50 files
if is_ci && (completed.is_multiple_of(50) || completed == num_tests) {
eprintln!(
"Progress: {}/{} files completed ({:.0}%)",
completed,
num_tests,
(completed as f64 / num_tests as f64) * 100.0
);
}
}
})
.flat_map(|(result, test_file_path, current_sql)| {
// Filter out any Ok() leaving only the DataFusionErrors
futures::stream::iter(match result {