mirror of
https://github.com/langchain-ai/datafusion.git
synced 2026-07-01 21:24:06 -04:00
Runs-on for extended CI checks (#20511)
part of https://github.com/apache/datafusion/issues/20052 ## Which issue does this PR close? example run: https://github.com/apache/datafusion/actions/runs/22325922758 this recused the run time from 3h to 1h. still a lot (on my mac it runs in 5m!) but that's a start --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -66,9 +66,10 @@ jobs:
|
||||
# Check crate compiles and base cargo check passes
|
||||
linux-build-lib:
|
||||
name: linux build test
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=8,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
|
||||
# note: do not use amd/rust container to preserve disk space
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push
|
||||
@@ -80,7 +81,9 @@ jobs:
|
||||
source $HOME/.cargo/env
|
||||
rustup toolchain install
|
||||
- name: Install Protobuf Compiler
|
||||
run: sudo apt-get install -y protobuf-compiler
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y protobuf-compiler
|
||||
- name: Prepare cargo build
|
||||
run: |
|
||||
cargo check --profile ci --all-targets
|
||||
@@ -90,9 +93,11 @@ jobs:
|
||||
linux-test-extended:
|
||||
name: cargo test 'extended_tests' (amd64)
|
||||
needs: [linux-build-lib]
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=32,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion,spot=false', github.run_id) || 'ubuntu-latest' }}
|
||||
# spot=false because the tests are long, https://runs-on.com/configuration/spot-instances/#disable-spot-pricing
|
||||
# note: do not use amd/rust container to preserve disk space
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push
|
||||
@@ -106,7 +111,9 @@ jobs:
|
||||
source $HOME/.cargo/env
|
||||
rustup toolchain install
|
||||
- name: Install Protobuf Compiler
|
||||
run: sudo apt-get install -y protobuf-compiler
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y protobuf-compiler
|
||||
# For debugging, test binaries can be large.
|
||||
- name: Show available disk space
|
||||
run: |
|
||||
@@ -133,10 +140,11 @@ jobs:
|
||||
# Check answers are correct when hash values collide
|
||||
hash-collisions:
|
||||
name: cargo test hash collisions (amd64)
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion', github.run_id) || 'ubuntu-latest' }}
|
||||
container:
|
||||
image: amd64/rust
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push
|
||||
@@ -154,10 +162,12 @@ jobs:
|
||||
|
||||
sqllogictest-sqlite:
|
||||
name: "Run sqllogictests with the sqlite test suite"
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a,cpu=48,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion,spot=false', github.run_id) || 'ubuntu-latest' }}
|
||||
# spot=false because the tests are long, https://runs-on.com/configuration/spot-instances/#disable-spot-pricing
|
||||
container:
|
||||
image: amd64/rust
|
||||
steps:
|
||||
- uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # v2.0.3
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push
|
||||
|
||||
@@ -44,9 +44,11 @@ use datafusion::common::runtime::SpawnedTask;
|
||||
use futures::FutureExt;
|
||||
use std::ffi::OsStr;
|
||||
use std::fs;
|
||||
use std::io::{IsTerminal, stdout};
|
||||
use std::io::{IsTerminal, stderr, stdout};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
#[cfg(feature = "postgres")]
|
||||
mod postgres_container;
|
||||
@@ -110,6 +112,13 @@ async fn run_tests() -> Result<()> {
|
||||
|
||||
options.warn_on_ignored();
|
||||
|
||||
// Print parallelism info for debugging CI performance
|
||||
eprintln!(
|
||||
"Running with {} test threads (available parallelism: {})",
|
||||
options.test_threads,
|
||||
get_available_parallelism()
|
||||
);
|
||||
|
||||
#[cfg(feature = "postgres")]
|
||||
initialize_postgres_container(&options).await?;
|
||||
|
||||
@@ -147,6 +156,10 @@ async fn run_tests() -> Result<()> {
|
||||
}
|
||||
|
||||
let num_tests = test_files.len();
|
||||
// For CI environments without TTY, print progress periodically
|
||||
let is_ci = !stderr().is_terminal();
|
||||
let completed_count = Arc::new(AtomicUsize::new(0));
|
||||
|
||||
let errors: Vec<_> = futures::stream::iter(test_files)
|
||||
.map(|test_file| {
|
||||
let validator = if options.include_sqlite
|
||||
@@ -162,10 +175,12 @@ async fn run_tests() -> Result<()> {
|
||||
let filters = options.filters.clone();
|
||||
|
||||
let relative_path = test_file.relative_path.clone();
|
||||
let relative_path_for_timing = test_file.relative_path.clone();
|
||||
|
||||
let currently_running_sql_tracker = CurrentlyExecutingSqlTracker::new();
|
||||
let currently_running_sql_tracker_clone =
|
||||
currently_running_sql_tracker.clone();
|
||||
let file_start = Instant::now();
|
||||
SpawnedTask::spawn(async move {
|
||||
match (
|
||||
options.postgres_runner,
|
||||
@@ -227,14 +242,38 @@ async fn run_tests() -> Result<()> {
|
||||
)
|
||||
.await?
|
||||
}
|
||||
};
|
||||
// Log slow files (>30s) for CI debugging
|
||||
let elapsed = file_start.elapsed();
|
||||
if elapsed.as_secs() > 30 {
|
||||
eprintln!(
|
||||
"Slow file: {} took {:.1}s",
|
||||
relative_path_for_timing.display(),
|
||||
elapsed.as_secs_f64()
|
||||
);
|
||||
}
|
||||
Ok(()) as Result<()>
|
||||
Ok(())
|
||||
})
|
||||
.join()
|
||||
.map(move |result| (result, relative_path, currently_running_sql_tracker))
|
||||
})
|
||||
// run up to num_cpus streams in parallel
|
||||
.buffer_unordered(options.test_threads)
|
||||
.inspect({
|
||||
let completed_count = Arc::clone(&completed_count);
|
||||
move |_| {
|
||||
let completed = completed_count.fetch_add(1, Ordering::Relaxed) + 1;
|
||||
// In CI (no TTY), print progress every 10% or every 50 files
|
||||
if is_ci && (completed.is_multiple_of(50) || completed == num_tests) {
|
||||
eprintln!(
|
||||
"Progress: {}/{} files completed ({:.0}%)",
|
||||
completed,
|
||||
num_tests,
|
||||
(completed as f64 / num_tests as f64) * 100.0
|
||||
);
|
||||
}
|
||||
}
|
||||
})
|
||||
.flat_map(|(result, test_file_path, current_sql)| {
|
||||
// Filter out any Ok() leaving only the DataFusionErrors
|
||||
futures::stream::iter(match result {
|
||||
|
||||
Reference in New Issue
Block a user