Update github repo links (#10167)

* Update github repo link

* Format markdown

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
This commit is contained in:
张林伟
2024-04-23 01:11:31 +08:00
committed by GitHub
parent 0b5bfe2cfe
commit 465c89f7f1
166 changed files with 6317 additions and 6317 deletions
@@ -38,7 +38,7 @@ runs:
- name: Setup Rust toolchain
shell: bash
run: |
# Avoid self update to avoid CI failures: https://github.com/apache/arrow-datafusion/issues/9653
# Avoid self update to avoid CI failures: https://github.com/apache/datafusion/issues/9653
rustup toolchain install stable --no-self-update
rustup default stable
rustup component add rustfmt
+1 -1
View File
@@ -34,7 +34,7 @@ jobs:
runs-on: ubuntu-latest
# only run for users whose permissions allow them to update PRs
# otherwise labeler is failing:
# https://github.com/apache/arrow-datafusion/issues/3743
# https://github.com/apache/datafusion/issues/3743
permissions:
contents: read
pull-requests: write
+1 -1
View File
@@ -425,7 +425,7 @@ jobs:
ci/scripts/rust_fmt.sh
# Coverage job disabled due to
# https://github.com/apache/arrow-datafusion/issues/3678
# https://github.com/apache/datafusion/issues/3678
# coverage:
# name: coverage
+2 -2
View File
@@ -46,10 +46,10 @@ resolver = "2"
[workspace.package]
authors = ["Apache Arrow <dev@arrow.apache.org>"]
edition = "2021"
homepage = "https://github.com/apache/arrow-datafusion"
homepage = "https://github.com/apache/datafusion"
license = "Apache-2.0"
readme = "README.md"
repository = "https://github.com/apache/arrow-datafusion"
repository = "https://github.com/apache/datafusion"
rust-version = "1.73"
version = "37.1.0"
+8 -8
View File
@@ -27,14 +27,14 @@
[crates-badge]: https://img.shields.io/crates/v/datafusion.svg
[crates-url]: https://crates.io/crates/datafusion
[license-badge]: https://img.shields.io/badge/license-Apache%20v2-blue.svg
[license-url]: https://github.com/apache/arrow-datafusion/blob/main/LICENSE.txt
[actions-badge]: https://github.com/apache/arrow-datafusion/actions/workflows/rust.yml/badge.svg
[actions-url]: https://github.com/apache/arrow-datafusion/actions?query=branch%3Amain
[license-url]: https://github.com/apache/datafusion/blob/main/LICENSE.txt
[actions-badge]: https://github.com/apache/datafusion/actions/workflows/rust.yml/badge.svg
[actions-url]: https://github.com/apache/datafusion/actions?query=branch%3Amain
[discord-badge]: https://img.shields.io/discord/885562378132000778.svg?logo=discord&style=flat-square
[discord-url]: https://discord.com/invite/Qw5gKqHxUM
[Website](https://github.com/apache/arrow-datafusion) |
[Guides](https://github.com/apache/arrow-datafusion/tree/main/docs) |
[Website](https://github.com/apache/datafusion) |
[Guides](https://github.com/apache/datafusion/tree/main/docs) |
[API Docs](https://docs.rs/datafusion/latest/datafusion/) |
[Chat](https://discord.com/channels/885562378132000778/885562378132000781)
@@ -42,7 +42,7 @@
Apache DataFusion is a very fast, extensible query engine for building high-quality data-centric systems in
[Rust](http://rustlang.org), using the [Apache Arrow](https://arrow.apache.org)
in-memory format. [Python Bindings](https://github.com/apache/arrow-datafusion-python) are also available. DataFusion offers SQL and Dataframe APIs, excellent [performance](https://benchmark.clickhouse.com/), built-in support for CSV, Parquet, JSON, and Avro, extensive customization, and a great community.
in-memory format. [Python Bindings](https://github.com/apache/datafusion-python) are also available. DataFusion offers SQL and Dataframe APIs, excellent [performance](https://benchmark.clickhouse.com/), built-in support for CSV, Parquet, JSON, and Avro, extensive customization, and a great community.
Here are links to some important information
@@ -51,7 +51,7 @@ Here are links to some important information
- [Rust Getting Started](https://arrow.apache.org/datafusion/user-guide/example-usage.html)
- [Rust DataFrame API](https://arrow.apache.org/datafusion/user-guide/dataframe.html)
- [Rust API docs](https://docs.rs/datafusion/latest/datafusion)
- [Rust Examples](https://github.com/apache/arrow-datafusion/tree/master/datafusion-examples)
- [Rust Examples](https://github.com/apache/datafusion/tree/master/datafusion-examples)
- [Python DataFrame API](https://arrow.apache.org/datafusion-python/)
- [Architecture](https://docs.rs/datafusion/latest/datafusion/index.html#architecture)
@@ -102,4 +102,4 @@ each stable Rust version for 6 months after it is
[released](https://github.com/rust-lang/rust/blob/master/RELEASES.md). This
generally translates to support for the most recent 3 to 4 stable Rust versions.
We enforce this policy using a [MSRV CI Check](https://github.com/search?q=repo%3Aapache%2Farrow-datafusion+rust-version+language%3ATOML+path%3A%2F%5ECargo.toml%2F&type=code)
We enforce this policy using a [MSRV CI Check](https://github.com/search?q=repo%3Aapache%2Fdatafusion+rust-version+language%3ATOML+path%3A%2F%5ECargo.toml%2F&type=code)
+1 -1
View File
@@ -47,7 +47,7 @@ enum TpchOpt {
/// use `dbbench` instead.
///
/// Note: this is kept to be backwards compatible with the benchmark names prior to
/// <https://github.com/apache/arrow-datafusion/issues/6994>
/// <https://github.com/apache/datafusion/issues/6994>
#[tokio::main]
async fn main() -> Result<()> {
env_logger::init();
+2 -2
View File
@@ -1,6 +1,6 @@
disallowed-methods = [
{ path = "tokio::task::spawn", reason = "To provide cancel-safety, use `SpawnedTask::spawn` instead (https://github.com/apache/arrow-datafusion/issues/6513)" },
{ path = "tokio::task::spawn_blocking", reason = "To provide cancel-safety, use `SpawnedTask::spawn_blocking` instead (https://github.com/apache/arrow-datafusion/issues/6513)" },
{ path = "tokio::task::spawn", reason = "To provide cancel-safety, use `SpawnedTask::spawn` instead (https://github.com/apache/datafusion/issues/6513)" },
{ path = "tokio::task::spawn_blocking", reason = "To provide cancel-safety, use `SpawnedTask::spawn_blocking` instead (https://github.com/apache/datafusion/issues/6513)" },
]
disallowed-types = [
+2 -2
View File
@@ -23,8 +23,8 @@ authors = ["Apache Arrow <dev@arrow.apache.org>"]
edition = "2021"
keywords = ["arrow", "datafusion", "query", "sql"]
license = "Apache-2.0"
homepage = "https://github.com/apache/arrow-datafusion"
repository = "https://github.com/apache/arrow-datafusion"
homepage = "https://github.com/apache/datafusion"
repository = "https://github.com/apache/datafusion"
# Specify MSRV here as `cargo msrv` doesn't support workspace version
rust-version = "1.73"
readme = "README.md"
+1 -1
View File
@@ -43,4 +43,4 @@ checked in `Cargo.lock` file to ensure reproducible builds.
However, the `datafusion` and sub crates are intended for use as libraries and
thus do not have a `Cargo.lock` file checked in.
[`datafusion cargo.toml`]: https://github.com/apache/arrow-datafusion/blob/main/Cargo.toml
[`datafusion cargo.toml`]: https://github.com/apache/datafusion/blob/main/Cargo.toml
+1 -1
View File
@@ -30,7 +30,7 @@ Run `git submodule update --init` to init test files.
To run the examples, use the `cargo run` command, such as:
```bash
git clone https://github.com/apache/arrow-datafusion
git clone https://github.com/apache/datafusion
cd arrow-datafusion
# Download test data
git submodule update --init
+2 -2
View File
@@ -93,13 +93,13 @@ fn criterion_benchmark(c: &mut Criterion) {
let ctx = create_context();
// Test simplest
// https://github.com/apache/arrow-datafusion/issues/5157
// https://github.com/apache/datafusion/issues/5157
c.bench_function("logical_select_one_from_700", |b| {
b.iter(|| logical_plan(&ctx, "SELECT c1 FROM t700"))
});
// Test simplest
// https://github.com/apache/arrow-datafusion/issues/5157
// https://github.com/apache/datafusion/issues/5157
c.bench_function("physical_select_one_from_700", |b| {
b.iter(|| physical_plan(&ctx, "SELECT c1 FROM t700"))
});
+2 -2
View File
@@ -176,8 +176,8 @@ impl CatalogProviderList for MemoryCatalogProviderList {
/// read from Delta Lake tables
///
/// [`datafusion-cli`]: https://arrow.apache.org/datafusion/user-guide/cli.html
/// [`DynamicFileCatalogProvider`]: https://github.com/apache/arrow-datafusion/blob/31b9b48b08592b7d293f46e75707aad7dadd7cbc/datafusion-cli/src/catalog.rs#L75
/// [`catalog.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/catalog.rs
/// [`DynamicFileCatalogProvider`]: https://github.com/apache/datafusion/blob/31b9b48b08592b7d293f46e75707aad7dadd7cbc/datafusion-cli/src/catalog.rs#L75
/// [`catalog.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/catalog.rs
/// [delta-rs]: https://github.com/delta-io/delta-rs
/// [`UnityCatalogProvider`]: https://github.com/delta-io/delta-rs/blob/951436ecec476ce65b5ed3b58b50fb0846ca7b91/crates/deltalake-core/src/data_catalog/unity/datafusion.rs#L111-L123
///
+2 -2
View File
@@ -2423,7 +2423,7 @@ mod tests {
Ok(())
}
// Test issue: https://github.com/apache/arrow-datafusion/issues/7790
// Test issue: https://github.com/apache/datafusion/issues/7790
// The join operation outputs two identical column names, but they belong to different relations.
#[tokio::test]
async fn with_column_join_same_columns() -> Result<()> {
@@ -2503,7 +2503,7 @@ mod tests {
}
// Table 't1' self join
// Supplementary test of issue: https://github.com/apache/arrow-datafusion/issues/7790
// Supplementary test of issue: https://github.com/apache/datafusion/issues/7790
#[tokio::test]
async fn with_column_self_join() -> Result<()> {
let df = test_table().await?.select_columns(&["c1"])?;
@@ -38,7 +38,7 @@ use crate::execution::context::SessionState;
/// See here for more details: www.postgresql.org/docs/11/queries-with.html#id-1.5.6.12.5.4
pub struct CteWorkTable {
/// The name of the CTE work table
// WIP, see https://github.com/apache/arrow-datafusion/issues/462
// WIP, see https://github.com/apache/datafusion/issues/462
#[allow(dead_code)]
name: String,
/// This schema must be shared across both the static and recursive terms of a recursive query
@@ -212,7 +212,7 @@ impl FileFormat for ParquetFormat {
// object stores (like local file systems) the order returned from list
// is not deterministic. Thus, to ensure deterministic schema inference
// sort the files first.
// https://github.com/apache/arrow-datafusion/pull/6629
// https://github.com/apache/datafusion/pull/6629
schemas.sort_by(|(location1, _), (location2, _)| location1.cmp(location2));
let schemas = schemas
@@ -1040,7 +1040,7 @@ pub(crate) mod test_util {
multi_page: bool,
) -> Result<(Vec<ObjectMeta>, Vec<NamedTempFile>)> {
// we need the tmp files to be sorted as some tests rely on the how the returning files are ordered
// https://github.com/apache/arrow-datafusion/pull/6629
// https://github.com/apache/datafusion/pull/6629
let tmp_files = {
let mut tmp_files: Vec<_> = (0..batches.len())
.map(|_| NamedTempFile::new().expect("creating temp file"))
@@ -57,7 +57,7 @@ type DemuxedStreamReceiver = UnboundedReceiver<(Path, RecordBatchReceiver)>;
/// the demux task for errors and abort accordingly. The single_file_ouput parameter
/// overrides all other settings to force only a single file to be written.
/// partition_by parameter will additionally split the input based on the unique
/// values of a specific column `<https://github.com/apache/arrow-datafusion/issues/7744>``
/// values of a specific column `<https://github.com/apache/datafusion/issues/7744>``
/// ┌───────────┐ ┌────────────┐ ┌─────────────┐
/// ┌──────▶ │ batch 1 ├────▶...──────▶│ Batch a │ │ Output File1│
/// │ └───────────┘ └────────────┘ └─────────────┘
@@ -244,7 +244,7 @@ pub struct ListingOptions {
/// the future be automatically determined, for example using
/// parquet metadata.
///
/// See <https://github.com/apache/arrow-datafusion/issues/4177>
/// See <https://github.com/apache/datafusion/issues/4177>
/// NOTE: This attribute stores all equivalent orderings (the outer `Vec`)
/// where each ordering consists of an individual lexicographic
/// ordering (encapsulated by a `Vec<Expr>`). If there aren't
@@ -457,7 +457,7 @@ mod tests {
test("/a/b*.txt", Some(("/a/", "b*.txt")));
test("/a/b/**/c*.txt", Some(("/a/b/", "**/c*.txt")));
// https://github.com/apache/arrow-datafusion/issues/2465
// https://github.com/apache/datafusion/issues/2465
test(
"/a/b/c//alltypes_plain*.parquet",
Some(("/a/b/c//", "alltypes_plain*.parquet")),
@@ -769,7 +769,7 @@ mod tests {
assert_eq!(14, csv.base_config.file_schema.fields().len());
assert_eq!(14, csv.schema().fields().len());
// errors due to https://github.com/apache/arrow-datafusion/issues/4918
// errors due to https://github.com/apache/datafusion/issues/4918
let mut it = csv.execute(0, task_ctx)?;
let err = it.next().await.unwrap().unwrap_err().strip_backtrace();
assert_eq!(
@@ -49,7 +49,7 @@ use super::ParquetFileMetrics;
/// did not filter out that row group.
///
/// Note: This method currently ignores ColumnOrder
/// <https://github.com/apache/arrow-datafusion/issues/8335>
/// <https://github.com/apache/datafusion/issues/8335>
pub(crate) fn prune_row_groups_by_statistics(
arrow_schema: &Schema,
parquet_schema: &SchemaDescriptor,
@@ -63,7 +63,7 @@ pub(crate) fn prune_row_groups_by_statistics(
if let Some(range) = &range {
// figure out where the first dictionary page (or first data page are)
// note don't use the location of metadata
// <https://github.com/apache/arrow-datafusion/issues/5995>
// <https://github.com/apache/datafusion/issues/5995>
let col = metadata.column(0);
let offset = col
.dictionary_page_offset()
@@ -360,7 +360,7 @@ mod test {
#[should_panic(
expected = "Inconsistent types in ScalarValue::iter_to_array. Expected Int64, got TimestampNanosecond(NULL, None)"
)]
// Due to https://github.com/apache/arrow-datafusion/issues/8295
// Due to https://github.com/apache/datafusion/issues/8295
fn roundtrip_timestamp() {
Test {
input: timestamp_array([
@@ -470,7 +470,7 @@ mod test {
(None, None),
]),
};
// Due to https://github.com/apache/arrow-datafusion/issues/8334,
// Due to https://github.com/apache/datafusion/issues/8334,
// statistics for struct arrays are not supported
test.expected_min =
new_null_array(test.input.data_type(), test.expected_min.len());
@@ -483,7 +483,7 @@ mod test {
#[should_panic(
expected = "Inconsistent types in ScalarValue::iter_to_array. Expected Utf8, got Binary(NULL)"
)]
// Due to https://github.com/apache/arrow-datafusion/issues/8295
// Due to https://github.com/apache/datafusion/issues/8295
fn roundtrip_binary() {
Test {
input: Arc::new(BinaryArray::from_opt_vec(vec![
+1 -1
View File
@@ -158,7 +158,7 @@ mod tests {
#[tokio::test]
async fn issue_3242() -> Result<()> {
// regression test for https://github.com/apache/arrow-datafusion/pull/3242
// regression test for https://github.com/apache/datafusion/pull/3242
let session_ctx = SessionContext::new_with_config(
SessionConfig::new().with_information_schema(true),
);
@@ -65,7 +65,7 @@ mod tests {
use async_trait::async_trait;
// Test for compilation error when calling read_* functions from an #[async_trait] function.
// See https://github.com/apache/arrow-datafusion/issues/1154
// See https://github.com/apache/datafusion/issues/1154
#[async_trait]
trait CallReadTrait {
async fn call_read_avro(&self) -> DataFrame;
+1 -1
View File
@@ -127,7 +127,7 @@ mod tests {
}
// Test for compilation error when calling read_* functions from an #[async_trait] function.
// See https://github.com/apache/arrow-datafusion/issues/1154
// See https://github.com/apache/datafusion/issues/1154
#[async_trait]
trait CallReadTrait {
async fn call_read_csv(&self) -> DataFrame;
@@ -333,7 +333,7 @@ mod tests {
}
// Test for compilation error when calling read_* functions from an #[async_trait] function.
// See https://github.com/apache/arrow-datafusion/issues/1154
// See https://github.com/apache/datafusion/issues/1154
#[async_trait]
trait CallReadTrait {
async fn call_read_parquet(&self) -> DataFrame;
+5 -5
View File
@@ -128,7 +128,7 @@
//!
//! There are many additional annotated examples of using DataFusion in the [datafusion-examples] directory.
//!
//! [datafusion-examples]: https://github.com/apache/arrow-datafusion/tree/main/datafusion-examples
//! [datafusion-examples]: https://github.com/apache/datafusion/tree/main/datafusion-examples
//!
//! ## Customization and Extension
//!
@@ -170,7 +170,7 @@
//! You can find a formal description of DataFusion's architecture in our
//! [SIGMOD 2024 Paper].
//!
//! [SIGMOD 2024 Paper]: https://github.com/apache/arrow-datafusion/files/14789704/DataFusion_Query_Engine___SIGMOD_2024-FINAL.pdf
//! [SIGMOD 2024 Paper]: https://github.com/apache/datafusion/files/14789704/DataFusion_Query_Engine___SIGMOD_2024-FINAL.pdf
//!
//! ## Overview Presentations
//!
@@ -306,7 +306,7 @@
//! [`TreeNode`]: datafusion_common::tree_node::TreeNode
//! [`tree_node module`]: datafusion_expr::logical_plan::tree_node
//! [`ExprSimplifier`]: crate::optimizer::simplify_expressions::ExprSimplifier
//! [`expr_api`.rs]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/expr_api.rs
//! [`expr_api`.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/expr_api.rs
//!
//! ### Physical Plans
//!
@@ -379,7 +379,7 @@
//! [`RepartitionExec`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/repartition/struct.RepartitionExec.html
//! [Volcano style]: https://w6113.github.io/files/papers/volcanoparallelism-89.pdf
//! [Morsel-Driven Parallelism]: https://db.in.tum.de/~leis/papers/morsels.pdf
//! [DataFusion paper submitted SIGMOD]: https://github.com/apache/arrow-datafusion/files/13874720/DataFusion_Query_Engine___SIGMOD_2024.pdf
//! [DataFusion paper submitted SIGMOD]: https://github.com/apache/datafusion/files/13874720/DataFusion_Query_Engine___SIGMOD_2024.pdf
//! [implementors of `ExecutionPlan`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html#implementors
//!
//! ## Thread Scheduling
@@ -488,7 +488,7 @@ pub use parquet;
// re-export DataFusion sub-crates at the top level. Use `pub use *`
// so that the contents of the subcrates appears in rustdocs
// for details, see https://github.com/apache/arrow-datafusion/issues/6648
// for details, see https://github.com/apache/datafusion/issues/6648
/// re-export of [`datafusion_common`] crate
pub mod common {
@@ -59,7 +59,7 @@ impl PhysicalOptimizerRule for CoalesceBatches {
// The goal here is to detect operators that could produce small batches and only
// wrap those ones with a CoalesceBatchesExec operator. An alternate approach here
// would be to build the coalescing logic directly into the operators
// See https://github.com/apache/arrow-datafusion/issues/139
// See https://github.com/apache/datafusion/issues/139
let wrap_in_coalesce = plan_any.downcast_ref::<FilterExec>().is_some()
|| plan_any.downcast_ref::<HashJoinExec>().is_some()
// Don't need to add CoalesceBatchesExec after a round robin RepartitionExec
@@ -3029,7 +3029,7 @@ pub(crate) mod tests {
#[test]
fn merge_does_not_need_sort() -> Result<()> {
// see https://github.com/apache/arrow-datafusion/issues/4331
// see https://github.com/apache/datafusion/issues/4331
let schema = schema();
let sort_key = vec![PhysicalSortExpr {
expr: col("a", &schema).unwrap(),
@@ -3647,7 +3647,7 @@ pub(crate) mod tests {
// The groups must have only contiguous ranges of rows from the same file
// if any group has rows from multiple files, the data is no longer sorted destroyed
// https://github.com/apache/arrow-datafusion/issues/8451
// https://github.com/apache/datafusion/issues/8451
let expected = [
"SortRequiredExec: [a@0 ASC]",
"FilterExec: c@2 = 0",
@@ -287,7 +287,7 @@ fn try_unifying_projections(
// Merging these projections is not beneficial, e.g
// If an expression is not trivial and it is referred more than 1, unifies projections will be
// beneficial as caching mechanism for non-trivial computations.
// See discussion in: https://github.com/apache/arrow-datafusion/issues/8296
// See discussion in: https://github.com/apache/datafusion/issues/8296
if column_ref_map.iter().any(|(column, count)| {
*count > 1 && !is_expr_trivial(&child.expr()[column.index()].0.clone())
}) {
@@ -185,7 +185,7 @@ pub trait PruningStatistics {
/// example of how to use `PruningPredicate` to prune files based on min/max
/// values.
///
/// [`pruning.rs` example in the `datafusion-examples`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/pruning.rs
/// [`pruning.rs` example in the `datafusion-examples`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/pruning.rs
///
/// Given an expression like `x = 5` and statistics for 3 containers (Row
/// Groups, files, etc) `A`, `B`, and `C`:
+4 -4
View File
@@ -95,7 +95,7 @@ async fn test_count_wildcard_on_where_in() -> Result<()> {
.await?;
// In the same SessionContext, AliasGenerator will increase subquery_alias id by 1
// https://github.com/apache/arrow-datafusion/blame/cf45eb9020092943b96653d70fafb143cc362e19/datafusion/optimizer/src/alias.rs#L40-L43
// https://github.com/apache/datafusion/blame/cf45eb9020092943b96653d70fafb143cc362e19/datafusion/optimizer/src/alias.rs#L40-L43
// for compare difference betwwen sql and df logical plan, we need to create a new SessionContext here
let ctx = create_join_context()?;
let df_results = ctx
@@ -110,7 +110,7 @@ async fn test_count_wildcard_on_where_in() -> Result<()> {
.select(vec![count(wildcard())])?
.into_unoptimized_plan(),
// Usually, into_optimized_plan() should be used here, but due to
// https://github.com/apache/arrow-datafusion/issues/5771,
// https://github.com/apache/datafusion/issues/5771,
// subqueries in SQL cannot be optimized, resulting in differences in logical_plan. Therefore, into_unoptimized_plan() is temporarily used here.
),
))?
@@ -147,7 +147,7 @@ async fn test_count_wildcard_on_where_exist() -> Result<()> {
.select(vec![count(wildcard())])?
.into_unoptimized_plan(),
// Usually, into_optimized_plan() should be used here, but due to
// https://github.com/apache/arrow-datafusion/issues/5771,
// https://github.com/apache/datafusion/issues/5771,
// subqueries in SQL cannot be optimized, resulting in differences in logical_plan. Therefore, into_unoptimized_plan() is temporarily used here.
)))?
.select(vec![col("a"), col("b")])?
@@ -245,7 +245,7 @@ async fn test_count_wildcard_on_where_scalar_subquery() -> Result<()> {
.await?;
// In the same SessionContext, AliasGenerator will increase subquery_alias id by 1
// https://github.com/apache/arrow-datafusion/blame/cf45eb9020092943b96653d70fafb143cc362e19/datafusion/optimizer/src/alias.rs#L40-L43
// https://github.com/apache/datafusion/blame/cf45eb9020092943b96653d70fafb143cc362e19/datafusion/optimizer/src/alias.rs#L40-L43
// for compare difference between sql and df logical plan, we need to create a new SessionContext here
let ctx = create_join_context()?;
let df_results = ctx
@@ -288,7 +288,7 @@ async fn prune_disabled() {
// $bits: number of bits of the integer to test (8, 16, 32, 64)
// $correct_bloom_filters: if false, replicates the
// https://github.com/apache/arrow-datafusion/issues/9779 bug so that tests pass
// https://github.com/apache/datafusion/issues/9779 bug so that tests pass
// if and only if Bloom filters on Int8 and Int16 columns are still buggy.
macro_rules! int_tests {
($bits:expr) => {
@@ -448,13 +448,13 @@ macro_rules! int_tests {
};
}
// int8/int16 are incorrect: https://github.com/apache/arrow-datafusion/issues/9779
// int8/int16 are incorrect: https://github.com/apache/datafusion/issues/9779
int_tests!(32);
int_tests!(64);
// $bits: number of bits of the integer to test (8, 16, 32, 64)
// $correct_bloom_filters: if false, replicates the
// https://github.com/apache/arrow-datafusion/issues/9779 bug so that tests pass
// https://github.com/apache/datafusion/issues/9779 bug so that tests pass
// if and only if Bloom filters on UInt8 and UInt16 columns are still buggy.
macro_rules! uint_tests {
($bits:expr) => {
@@ -585,7 +585,7 @@ macro_rules! uint_tests {
};
}
// uint8/uint16 are incorrect: https://github.com/apache/arrow-datafusion/issues/9779
// uint8/uint16 are incorrect: https://github.com/apache/datafusion/issues/9779
uint_tests!(32);
uint_tests!(64);
+1 -1
View File
@@ -299,7 +299,7 @@ fn select_date_plus_interval() -> Result<()> {
#[test]
fn simplify_project_scalar_fn() -> Result<()> {
// Issue https://github.com/apache/arrow-datafusion/issues/5996
// Issue https://github.com/apache/datafusion/issues/5996
let schema = Schema::new(vec![Field::new("f", DataType::Float64, false)]);
let plan = table_scan(Some("test"), &schema, None)?
.project(vec![power(col("f"), lit(1.0))])?
+2 -2
View File
@@ -570,7 +570,7 @@ async fn csv_explain_verbose_plans() {
#[rstest]
#[tokio::test]
async fn explain_analyze_runs_optimizers(#[values("*", "1")] count_expr: &str) {
// repro for https://github.com/apache/arrow-datafusion/issues/917
// repro for https://github.com/apache/datafusion/issues/917
// where EXPLAIN ANALYZE was not correctly running optiimizer
let ctx = SessionContext::new();
register_alltypes_parquet(&ctx).await;
@@ -719,7 +719,7 @@ async fn csv_explain_analyze_order_by() {
.to_string();
// Ensure that the ordering is not optimized away from the plan
// https://github.com/apache/arrow-datafusion/issues/6379
// https://github.com/apache/datafusion/issues/6379
let needle =
"SortExec: expr=[c1@0 ASC NULLS LAST], metrics=[output_rows=100, elapsed_compute";
assert_contains!(&formatted, needle);
+1 -1
View File
@@ -231,7 +231,7 @@ async fn tpcds_logical_q40() -> Result<()> {
#[tokio::test]
#[ignore]
// Optimizer rule 'scalar_subquery_to_join' failed: Optimizing disjunctions not supported!
// issue: https://github.com/apache/arrow-datafusion/issues/5368
// issue: https://github.com/apache/datafusion/issues/5368
async fn tpcds_logical_q41() -> Result<()> {
create_logical_plan(41).await
}
+1 -1
View File
@@ -422,7 +422,7 @@ mod tests {
// For each variant in AggregateFuncion, it converts the variant to a string
// and then back to a variant. The test asserts that the original variant and
// the reconstructed variant are the same. This assertion is also necessary for
// function suggestion. See https://github.com/apache/arrow-datafusion/issues/8082
// function suggestion. See https://github.com/apache/datafusion/issues/8082
fn test_display_and_from_str() {
for func_original in AggregateFunction::iter() {
let func_name = func_original.to_string();
@@ -196,7 +196,7 @@ mod tests {
// For each variant in BuiltInWindowFunction, it converts the variant to a string
// and then back to a variant. The test asserts that the original variant and
// the reconstructed variant are the same. This assertion is also necessary for
// function suggestion. See https://github.com/apache/arrow-datafusion/issues/8082
// function suggestion. See https://github.com/apache/datafusion/issues/8082
fn test_display_and_from_str() {
for func_original in BuiltInWindowFunction::iter() {
let func_name = func_original.to_string();
+1 -1
View File
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
//! [DataFusion](https://github.com/apache/arrow-datafusion)
//! [DataFusion](https://github.com/apache/datafusion)
//! is an extensible query execution framework that uses
//! [Apache Arrow](https://arrow.apache.org) as its in-memory format.
//!
+3 -3
View File
@@ -434,7 +434,7 @@ impl LogicalPlanBuilder {
/// But Distinct (A, B, C) --> (1, 2, 3), (1, 2, 4)
/// (which will appear as a (1, 2), (1, 2) if a and b are projected
///
/// See <https://github.com/apache/arrow-datafusion/issues/5065> for more details
/// See <https://github.com/apache/datafusion/issues/5065> for more details
fn add_missing_columns(
curr_plan: LogicalPlan,
missing_cols: &[Column],
@@ -495,7 +495,7 @@ impl LogicalPlanBuilder {
// This handles the special case for
// SELECT col as <alias> ORDER BY <alias>
//
// As described in https://github.com/apache/arrow-datafusion/issues/5293
// As described in https://github.com/apache/datafusion/issues/5293
let all_aliases = missing_exprs.iter().all(|e| {
projection_exprs.iter().any(|proj_expr| {
if let Expr::Alias(Alias { expr, .. }) = proj_expr {
@@ -1475,7 +1475,7 @@ pub fn wrap_projection_for_join_if_necessary(
// join keys: [cast(a as int)]
//
// then a and cast(a as int) will use the same field name - `a` in projection schema.
// https://github.com/apache/arrow-datafusion/issues/4478
// https://github.com/apache/datafusion/issues/4478
if matches!(key, Expr::Cast(_)) || matches!(key, Expr::TryCast(_)) {
let alias = format!("{key}");
key.clone().alias(alias)
+1 -1
View File
@@ -48,7 +48,7 @@ pub trait SimplifyInfo {
/// # Example
/// See the `simplify_demo` in the [`expr_api` example]
///
/// [`expr_api` example]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/expr_api.rs
/// [`expr_api` example]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/expr_api.rs
#[derive(Debug, Clone)]
pub struct SimplifyContext<'a> {
schema: Option<DFSchemaRef>,
+4 -4
View File
@@ -54,12 +54,12 @@ use std::vec;
/// This is a separate struct from `AggregateUDFImpl` to maintain backwards
/// compatibility with the older API.
///
/// [the examples]: https://github.com/apache/arrow-datafusion/tree/main/datafusion-examples#single-process
/// [the examples]: https://github.com/apache/datafusion/tree/main/datafusion-examples#single-process
/// [aggregate function]: https://en.wikipedia.org/wiki/Aggregate_function
/// [`Accumulator`]: crate::Accumulator
/// [`create_udaf`]: crate::expr_fn::create_udaf
/// [`simple_udaf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/simple_udaf.rs
/// [`advanced_udaf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs
/// [`simple_udaf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udaf.rs
/// [`advanced_udaf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs
#[derive(Debug, Clone)]
pub struct AggregateUDF {
inner: Arc<dyn AggregateUDFImpl>,
@@ -214,7 +214,7 @@ where
/// See [`advanced_udaf.rs`] for a full example with complete implementation and
/// [`AggregateUDF`] for other available options.
///
/// [`advanced_udaf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs
/// [`advanced_udaf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs
///
/// # Basic Example
/// ```
+4 -4
View File
@@ -48,8 +48,8 @@ use std::sync::Arc;
/// compatibility with the older API.
///
/// [`create_udf`]: crate::expr_fn::create_udf
/// [`simple_udf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/simple_udf.rs
/// [`advanced_udf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs
/// [`simple_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udf.rs
/// [`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs
#[derive(Debug, Clone)]
pub struct ScalarUDF {
inner: Arc<dyn ScalarUDFImpl>,
@@ -213,7 +213,7 @@ where
/// [`ScalarUDF`] for other available options.
///
///
/// [`advanced_udf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs
/// [`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs
/// # Basic Example
/// ```
/// # use std::any::Any;
@@ -424,7 +424,7 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl {
}
/// Implementation of [`ScalarUDFImpl`] that wraps the function style pointers
/// of the older API (see <https://github.com/apache/arrow-datafusion/pull/8578>
/// of the older API (see <https://github.com/apache/datafusion/pull/8578>
/// for more details)
struct ScalarUdfLegacyWrapper {
/// The name of the function
+4 -4
View File
@@ -46,8 +46,8 @@ use std::{
///
/// [`PartitionEvaluator`]: crate::PartitionEvaluator
/// [`create_udwf`]: crate::expr_fn::create_udwf
/// [`simple_udwf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/simple_udwf.rs
/// [`advanced_udwf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udwf.rs
/// [`simple_udwf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udwf.rs
/// [`advanced_udwf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udwf.rs
#[derive(Debug, Clone)]
pub struct WindowUDF {
inner: Arc<dyn WindowUDFImpl>,
@@ -194,7 +194,7 @@ where
/// [`WindowUDF`] for other available options.
///
///
/// [`advanced_udwf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udwf.rs
/// [`advanced_udwf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udwf.rs
/// # Basic Example
/// ```
/// # use std::any::Any;
@@ -315,7 +315,7 @@ impl WindowUDFImpl for AliasedWindowUDFImpl {
}
/// Implementation of [`WindowUDFImpl`] that wraps the function style pointers
/// of the older API (see <https://github.com/apache/arrow-datafusion/pull/8719>
/// of the older API (see <https://github.com/apache/datafusion/pull/8719>
/// for more details)
pub struct WindowUDFLegacyWrapper {
/// name
+2 -2
View File
@@ -158,8 +158,8 @@ Looking at the `EXPLAIN` output we can see that the optimizer has effectively re
+---------------+-------------------------------------------------+
```
If the expression name is not preserved, bugs such as [#3704](https://github.com/apache/arrow-datafusion/issues/3704)
and [#3555](https://github.com/apache/arrow-datafusion/issues/3555) occur where the expected columns can not be found.
If the expression name is not preserved, bugs such as [#3704](https://github.com/apache/datafusion/issues/3704)
and [#3555](https://github.com/apache/datafusion/issues/3555) occur where the expected columns can not be found.
### Building Expression Names
@@ -29,7 +29,7 @@ use datafusion_expr::{lit, Expr, LogicalPlan, WindowFunctionDefinition};
/// Rewrite `Count(Expr:Wildcard)` to `Count(Expr:Literal)`.
///
/// Resolves issue: <https://github.com/apache/arrow-datafusion/issues/5473>
/// Resolves issue: <https://github.com/apache/datafusion/issues/5473>
#[derive(Default)]
pub struct CountWildcardRule {}
@@ -108,7 +108,7 @@ fn analyze_internal(
.into_iter()
.map(|expr| {
// ensure aggregate names don't change:
// https://github.com/apache/arrow-datafusion/issues/3555
// https://github.com/apache/datafusion/issues/3555
rewrite_preserving_name(expr, &mut expr_rewrite)
})
.collect::<Result<Vec<_>>>()?;
@@ -662,7 +662,7 @@ impl TreeNodeVisitor for ExprIdentifierVisitor<'_> {
type Node = Expr;
fn f_down(&mut self, expr: &Expr) -> Result<TreeNodeRecursion> {
// related to https://github.com/apache/arrow-datafusion/issues/8814
// related to https://github.com/apache/datafusion/issues/8814
// If the expr contain volatile expression or is a short-circuit expression, skip it.
if expr.short_circuits() || is_volatile_expression(expr)? {
self.visit_stack
@@ -155,7 +155,7 @@ fn try_flatten_join_inputs(
LogicalPlan::Join(join) if join.join_type == JoinType::Inner => {
if join.filter.is_some() {
// The filter of inner join will lost, skip this rule.
// issue: https://github.com/apache/arrow-datafusion/issues/4844
// issue: https://github.com/apache/datafusion/issues/4844
return Ok(false);
}
possible_join_keys.extend(join.on.clone());
@@ -541,7 +541,7 @@ mod tests {
}
#[test]
/// See https://github.com/apache/arrow-datafusion/issues/7530
/// See https://github.com/apache/datafusion/issues/7530
fn eliminate_cross_not_possible_nested_inner_join_with_filter() -> Result<()> {
let t1 = test_table_scan_with_name("t1")?;
let t2 = test_table_scan_with_name("t2")?;
@@ -463,7 +463,7 @@ fn merge_consecutive_projections(proj: &Projection) -> Result<Option<Projection>
// If an expression is non-trivial and appears more than once, consecutive
// projections will benefit from a compute-once approach. For details, see:
// https://github.com/apache/arrow-datafusion/issues/8296
// https://github.com/apache/datafusion/issues/8296
if column_referral_map.into_iter().any(|(col, usage)| {
usage > 1
&& !is_expr_trivial(
@@ -1351,7 +1351,7 @@ mod tests {
}
// Test outer projection isn't discarded despite the same schema as inner
// https://github.com/apache/arrow-datafusion/issues/8942
// https://github.com/apache/datafusion/issues/8942
#[test]
fn test_derived_column() -> Result<()> {
let table_scan = test_table_scan()?;
+1 -1
View File
@@ -411,7 +411,7 @@ impl Optimizer {
(Err(e), Some(orig_plan)) => {
// Note to future readers: if you see this warning it signals a
// bug in the DataFusion optimizer. Please consider filing a ticket
// https://github.com/apache/arrow-datafusion
// https://github.com/apache/datafusion
warn!(
"Skipping optimizer rule '{}' due to unexpected error: {}",
rule.name(),
@@ -186,7 +186,7 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
// TODO iterate until no changes are made during rewrite
// (evaluating constants can enable new simplifications and
// simplifications can enable new constant evaluation)
// https://github.com/apache/arrow-datafusion/issues/1160
// https://github.com/apache/datafusion/issues/1160
expr.rewrite(&mut const_evaluator)
.data()?
.rewrite(&mut simplifier)
@@ -212,7 +212,7 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
// Would be nice if this API could use the SimplifyInfo
// rather than creating an DFSchemaRef coerces rather than doing
// it manually.
// https://github.com/apache/arrow-datafusion/issues/3793
// https://github.com/apache/datafusion/issues/3793
pub fn coerce(&self, expr: Expr, schema: DFSchemaRef) -> Result<Expr> {
let mut expr_rewrite = TypeCoercionRewriter { schema };
@@ -1728,7 +1728,7 @@ mod tests {
// Would be nice if this API could use the SimplifyInfo
// rather than creating an DFSchemaRef coerces rather than doing
// it manually.
// https://github.com/apache/arrow-datafusion/issues/3793
// https://github.com/apache/datafusion/issues/3793
let expr = simplifier.coerce(expr, schema).unwrap();
assert_eq!(expected, simplifier.simplify(expr).unwrap());
@@ -3085,7 +3085,7 @@ mod tests {
// c2
//
// Need to call simplify 2x due to
// https://github.com/apache/arrow-datafusion/issues/1160
// https://github.com/apache/datafusion/issues/1160
assert_eq!(
simplify(simplify(Expr::Case(Case::new(
None,
@@ -3103,7 +3103,7 @@ mod tests {
// ISNULL(c2) OR c2
//
// Need to call simplify 2x due to
// https://github.com/apache/arrow-datafusion/issues/1160
// https://github.com/apache/datafusion/issues/1160
assert_eq!(
simplify(simplify(Expr::Case(Case::new(
None,
@@ -3121,7 +3121,7 @@ mod tests {
// --> c1 OR NOT(c2)
//
// Need to call simplify 2x due to
// https://github.com/apache/arrow-datafusion/issues/1160
// https://github.com/apache/datafusion/issues/1160
assert_eq!(
simplify(simplify(Expr::Case(Case::new(
None,
@@ -3140,7 +3140,7 @@ mod tests {
// --> c1 OR c2
//
// Need to call simplify 2x due to
// https://github.com/apache/arrow-datafusion/issues/1160
// https://github.com/apache/datafusion/issues/1160
assert_eq!(
simplify(simplify(Expr::Case(Case::new(
None,
@@ -3392,7 +3392,7 @@ mod tests {
true,
)));
// TODO: Further simplify this expression
// https://github.com/apache/arrow-datafusion/issues/8970
// https://github.com/apache/datafusion/issues/8970
// assert_eq!(simplify(expr.clone()), lit(true));
assert_eq!(simplify(expr.clone()), expr);
}
@@ -120,7 +120,7 @@ impl SimplifyExpressions {
//
// This is likely related to the fact that order of the columns must
// match the order of the children. see
// https://github.com/apache/arrow-datafusion/pull/8780 for more details
// https://github.com/apache/datafusion/pull/8780 for more details
let simplifier = if let LogicalPlan::Join(_) = plan {
simplifier.with_canonicalize(false)
} else {
+1 -1
View File
@@ -294,7 +294,7 @@ pub fn merge_schema(inputs: Vec<&LogicalPlan>) -> DFSchema {
/// For example, if an expression `1 + 2` is rewritten to `3`, the name of the
/// expression should be preserved: `3 as "1 + 2"`
///
/// See <https://github.com/apache/arrow-datafusion/issues/3555> for details
/// See <https://github.com/apache/datafusion/issues/3555> for details
pub struct NamePreserver {
use_alias: bool,
}
@@ -58,7 +58,7 @@ fn case_when() -> Result<()> {
#[test]
fn subquery_filter_with_cast() -> Result<()> {
// regression test for https://github.com/apache/arrow-datafusion/issues/3760
// regression test for https://github.com/apache/datafusion/issues/3760
let sql = "SELECT col_int32 FROM test \
WHERE col_int32 > (\
SELECT AVG(col_int32) FROM test \
@@ -102,7 +102,7 @@ fn unsigned_target_type() -> Result<()> {
#[test]
fn distribute_by() -> Result<()> {
// regression test for https://github.com/apache/arrow-datafusion/issues/3234
// regression test for https://github.com/apache/datafusion/issues/3234
let sql = "SELECT col_int32, col_utf8 FROM test DISTRIBUTE BY (col_utf8)";
let plan = test_sql(sql)?;
let expected = "Repartition: DistributeBy(col_utf8)\
@@ -113,7 +113,7 @@ fn distribute_by() -> Result<()> {
#[test]
fn semi_join_with_join_filter() -> Result<()> {
// regression test for https://github.com/apache/arrow-datafusion/issues/2888
// regression test for https://github.com/apache/datafusion/issues/2888
let sql = "SELECT col_utf8 FROM test WHERE EXISTS (\
SELECT col_utf8 FROM test t2 WHERE test.col_int32 = t2.col_int32 \
AND test.col_uint32 != t2.col_uint32)";
@@ -130,7 +130,7 @@ fn semi_join_with_join_filter() -> Result<()> {
#[test]
fn anti_join_with_join_filter() -> Result<()> {
// regression test for https://github.com/apache/arrow-datafusion/issues/2888
// regression test for https://github.com/apache/datafusion/issues/2888
let sql = "SELECT col_utf8 FROM test WHERE NOT EXISTS (\
SELECT col_utf8 FROM test t2 WHERE test.col_int32 = t2.col_int32 \
AND test.col_uint32 != t2.col_uint32)";
@@ -262,7 +262,7 @@ fn push_down_filter_groupby_expr_contains_alias() {
}
#[test]
// issue: https://github.com/apache/arrow-datafusion/issues/5334
// issue: https://github.com/apache/datafusion/issues/5334
fn test_same_name_but_not_ambiguous() {
let sql = "SELECT t1.col_int32 AS col_int32 FROM test t1 intersect SELECT col_int32 FROM test t2";
let plan = test_sql(sql).unwrap();
@@ -89,7 +89,7 @@ impl AggregateExpr for ApproxDistinct {
let accumulator: Box<dyn Accumulator> = match &self.input_data_type {
// TODO u8, i8, u16, i16 shall really be done using bitmap, not HLL
// TODO support for boolean (trivial case)
// https://github.com/apache/arrow-datafusion/issues/1109
// https://github.com/apache/datafusion/issues/1109
DataType::UInt8 => Box::new(NumericHLLAccumulator::<UInt8Type>::new()),
DataType::UInt16 => Box::new(NumericHLLAccumulator::<UInt16Type>::new()),
DataType::UInt32 => Box::new(NumericHLLAccumulator::<UInt32Type>::new()),
@@ -248,7 +248,7 @@ impl AggregateExpr for Max {
}
// It would be nice to have a fast implementation for Strings as well
// https://github.com/apache/arrow-datafusion/issues/6906
// https://github.com/apache/datafusion/issues/6906
// This is only reached if groups_accumulator_supported is out of sync
_ => internal_err!(
@@ -390,7 +390,7 @@ impl EquivalenceGroup {
});
// TODO: Convert the algorithm below to a version that uses `HashMap`.
// once `Arc<dyn PhysicalExpr>` can be stored in `HashMap`.
// See issue: https://github.com/apache/arrow-datafusion/issues/8027
// See issue: https://github.com/apache/datafusion/issues/8027
let mut new_classes = vec![];
for (source, target) in mapping.iter() {
if new_classes.is_empty() {
@@ -490,7 +490,7 @@ impl EquivalenceProperties {
/// with A and B, we could surely use the ordering of the original ordering, However, if the A has been changed,
/// for example, A-> Cast(A, Int64) or any other form, it is invalid if we continue using the original ordering
/// Since it would cause bug in dependency constructions, we should substitute the input order in order to get correct
/// dependency map, happen in issue 8838: <https://github.com/apache/arrow-datafusion/issues/8838>
/// dependency map, happen in issue 8838: <https://github.com/apache/datafusion/issues/8838>
pub fn substitute_oeq_class(&mut self, mapping: &ProjectionMapping) -> Result<()> {
let orderings = &self.oeq_class.orderings;
let new_order = orderings
@@ -2940,7 +2940,7 @@ mod tests {
#[test]
fn relatively_deeply_nested() {
// Reproducer for https://github.com/apache/arrow-datafusion/issues/419
// Reproducer for https://github.com/apache/datafusion/issues/419
// where even relatively shallow binary expressions overflowed
// the stack in debug builds
@@ -3408,7 +3408,7 @@ mod tests {
.unwrap();
// is distinct: float64array is distinct decimal array
// TODO: now we do not refactor the `is distinct or is not distinct` rule of coercion.
// traced by https://github.com/apache/arrow-datafusion/issues/1590
// traced by https://github.com/apache/datafusion/issues/1590
// the decimal array will be casted to float64array
apply_logic_op(
&schema,
@@ -704,7 +704,7 @@ mod tests {
}
#[test]
#[ignore] // TODO: https://github.com/apache/arrow-datafusion/issues/5396
#[ignore] // TODO: https://github.com/apache/datafusion/issues/5396
fn test_cast_decimal() -> Result<()> {
let schema = Schema::new(vec![Field::new("a", DataType::Int64, false)]);
let a = Int64Array::from(vec![100]);
@@ -72,7 +72,7 @@ pub fn physical_exprs_bag_equal(
pub fn deduplicate_physical_exprs(exprs: &mut Vec<Arc<dyn PhysicalExpr>>) {
// TODO: Once we can use `HashSet`s with `Arc<dyn PhysicalExpr>`, this
// function should use a `HashSet` to reduce computational complexity.
// See issue: https://github.com/apache/arrow-datafusion/issues/8027
// See issue: https://github.com/apache/datafusion/issues/8027
let mut idx = 0;
while idx < exprs.len() {
let mut rest_idx = idx + 1;
@@ -101,7 +101,7 @@ impl<O: OffsetSizeTrait> GroupValues for GroupValuesByes<O> {
// if we only wanted to take the first n, insert the rest back
// into the map we could potentially avoid this reallocation, at
// the expense of much more complex code.
// see https://github.com/apache/arrow-datafusion/issues/9195
// see https://github.com/apache/datafusion/issues/9195
let emit_group_values = map_contents.slice(0, n);
let remaining_group_values =
map_contents.slice(n, map_contents.len() - n);
+1 -1
View File
@@ -244,7 +244,7 @@ impl ExecutionPlan for DataSinkExec {
// Maintains ordering in the sense that the written file will reflect
// the ordering of the input. For more context, see:
//
// https://github.com/apache/arrow-datafusion/pull/6354#discussion_r1195284178
// https://github.com/apache/datafusion/pull/6354#discussion_r1195284178
vec![true]
}
@@ -805,11 +805,11 @@ impl RepartitionExec {
// If the input stream is endless, we may spin forever and
// never yield back to tokio. See
// https://github.com/apache/arrow-datafusion/issues/5278.
// https://github.com/apache/datafusion/issues/5278.
//
// However, yielding on every batch causes a bottleneck
// when running with multiple cores. See
// https://github.com/apache/arrow-datafusion/issues/6290
// https://github.com/apache/datafusion/issues/6290
//
// Thus, heuristically yield after producing num_partition
// batches
+1 -1
View File
@@ -614,7 +614,7 @@ mod tests {
valid.append(true);
// NULL with non-zero value length
// Issue https://github.com/apache/arrow-datafusion/issues/9932
// Issue https://github.com/apache/datafusion/issues/9932
values.push(Some("?"));
offsets.push(OffsetSize::from_usize(values.len()).unwrap());
valid.append(false);
+1 -1
View File
@@ -252,7 +252,7 @@ mod tests {
let _ = ValuesExec::try_new_from_batches(invalid_schema, batches).unwrap_err();
}
// Test issue: https://github.com/apache/arrow-datafusion/issues/8763
// Test issue: https://github.com/apache/datafusion/issues/8763
#[test]
fn new_exec_with_non_nullable_schema() {
let schema = Arc::new(Schema::new(vec![Field::new(
@@ -393,7 +393,7 @@ trait PartitionSearcher: Send {
// Use input_schema for the buffer schema, not `record_batch.schema()`
// as it may not have the "correct" schema in terms of output
// nullability constraints. For details, see the following issue:
// https://github.com/apache/arrow-datafusion/issues/9320
// https://github.com/apache/datafusion/issues/9320
.or_insert_with(|| PartitionBatchState::new(self.input_schema().clone()));
partition_batch_state.extend(&partition_batch)?;
}
+1 -1
View File
@@ -99,7 +99,7 @@ impl Serializeable for Expr {
let bytes: Bytes = buffer.into();
// the produced byte stream may lead to "recursion limit" errors, see
// https://github.com/apache/arrow-datafusion/issues/3968
// https://github.com/apache/datafusion/issues/3968
// Until the underlying prost issue ( https://github.com/tokio-rs/prost/issues/736 ) is fixed, we try to
// deserialize the data here and check for errors.
//
@@ -980,7 +980,7 @@ pub fn serialize_expr(
| Expr::Exists { .. }
| Expr::OuterReferenceColumn { .. } => {
// we would need to add logical plan operators to datafusion.proto to support this
// see discussion in https://github.com/apache/arrow-datafusion/issues/2565
// see discussion in https://github.com/apache/datafusion/issues/2565
return Err(Error::General("Proto serialization error: Expr::ScalarSubquery(_) | Expr::InSubquery(_) | Expr::Exists { .. } | Exp:OuterReferenceColumn not supported".to_string()));
}
Expr::GetIndexedField(GetIndexedField { expr, field }) => {
+1 -1
View File
@@ -60,7 +60,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
// Virtual stack machine to convert SQLExpr to Expr
// This allows visiting the expr tree in a depth-first manner which
// produces expressions in postfix notations, i.e. `a + b` => `a b +`.
// See https://github.com/apache/arrow-datafusion/issues/1444
// See https://github.com/apache/datafusion/issues/1444
let mut stack = vec![StackEntry::SQLExpr(Box::new(sql))];
let mut eval_stack = vec![];
+1 -1
View File
@@ -440,7 +440,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
SQLDataType::Interval => Ok(DataType::Interval(IntervalUnit::MonthDayNano)),
// Explicitly list all other types so that if sqlparser
// adds/changes the `SQLDataType` the compiler will tell us on upgrade
// and avoid bugs like https://github.com/apache/arrow-datafusion/issues/3059
// and avoid bugs like https://github.com/apache/datafusion/issues/3059
SQLDataType::Nvarchar(_)
| SQLDataType::JSON
| SQLDataType::Uuid
+1 -1
View File
@@ -154,7 +154,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
/// * `Result<i64>` - An `Ok` variant containing the constant result if evaluation is successful,
/// or an `Err` variant containing an error message if evaluation fails.
///
/// <https://github.com/apache/arrow-datafusion/issues/9821> tracks a more general solution
/// <https://github.com/apache/datafusion/issues/9821> tracks a more general solution
fn get_constant_result(expr: &Expr, arg_name: &str) -> Result<i64> {
match expr {
Expr::Literal(ScalarValue::Int64(Some(s))) => Ok(*s),
+1 -1
View File
@@ -90,7 +90,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
let projected_plan = self.project(base_plan.clone(), select_exprs.clone())?;
// Place the fields of the base plan at the front so that when there are references
// with the same name, the fields of the base plan will be searched first.
// See https://github.com/apache/arrow-datafusion/issues/9162
// See https://github.com/apache/datafusion/issues/9162
let mut combined_schema = base_plan.schema().as_ref().clone();
combined_schema.merge(projected_plan.schema());
+1 -1
View File
@@ -799,7 +799,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
) -> Result<LogicalPlan> {
if self.has_table("information_schema", "tables") {
// we only support the basic "SHOW TABLES"
// https://github.com/apache/arrow-datafusion/issues/3188
// https://github.com/apache/datafusion/issues/3188
if db_name.is_some() || filter.is_some() || full || extended {
plan_err!("Unsupported parameters to SHOW TABLES")
} else {
+1 -1
View File
@@ -19,7 +19,7 @@
//! not exported as they will eventually be move to the SQLparser package.
//!
//!
//! See <https://github.com/apache/arrow-datafusion/issues/8661>
//! See <https://github.com/apache/datafusion/issues/8661>
use core::fmt;
+1 -1
View File
@@ -156,7 +156,7 @@ pub(crate) fn resolve_positions_to_exprs(
) -> Option<Expr> {
match expr {
// sql_expr_to_logical_expr maps number to i64
// https://github.com/apache/arrow-datafusion/blob/8d175c759e17190980f270b5894348dc4cff9bbf/datafusion/src/sql/planner.rs#L882-L887
// https://github.com/apache/datafusion/blob/8d175c759e17190980f270b5894348dc4cff9bbf/datafusion/src/sql/planner.rs#L882-L887
Expr::Literal(ScalarValue::Int64(Some(position)))
if position > &0_i64 && position <= &(select_exprs.len() as i64) =>
{
+3 -3
View File
@@ -2615,7 +2615,7 @@ fn select_multibyte_column() {
#[test]
fn select_groupby_orderby() {
// ensure that references are correctly resolved in the order by clause
// see https://github.com/apache/arrow-datafusion/issues/4854
// see https://github.com/apache/datafusion/issues/4854
let sql = r#"SELECT
avg(age) AS "value",
date_trunc('month', birth_date) AS "birth_date"
@@ -3360,7 +3360,7 @@ fn hive_aggregate_with_filter() -> Result<()> {
#[test]
fn order_by_unaliased_name() {
// https://github.com/apache/arrow-datafusion/issues/3160
// https://github.com/apache/datafusion/issues/3160
// This query was failing with:
// SchemaError(FieldNotFound { qualifier: Some("p"), name: "state", valid_fields: ["z", "q"] })
let sql =
@@ -3592,7 +3592,7 @@ fn test_noneq_with_filter_join() {
#[test]
fn test_one_side_constant_full_join() {
// TODO: this sql should be parsed as join after
// https://github.com/apache/arrow-datafusion/issues/2877 is resolved.
// https://github.com/apache/datafusion/issues/2877 is resolved.
let sql = "SELECT id, order_id \
FROM person \
FULL OUTER JOIN orders \
+1 -1
View File
@@ -297,7 +297,7 @@ fn read_dir_recursive_impl(dst: &mut Vec<PathBuf>, path: &Path) -> Result<()> {
/// This structure attempts to mimic the command line options
/// accepted by IDEs such as CLion that pass arguments
///
/// See <https://github.com/apache/arrow-datafusion/issues/8287> for more details
/// See <https://github.com/apache/datafusion/issues/8287> for more details
#[derive(Parser, Debug)]
#[clap(author, version, about, long_about= None)]
struct Options {
@@ -101,7 +101,7 @@ fn expand_row(mut row: Vec<String>) -> impl Iterator<Item = Vec<String>> {
// replace any leading spaces with '-' as
// `sqllogictest` ignores whitespace differences
//
// See https://github.com/apache/arrow-datafusion/issues/6328
// See https://github.com/apache/datafusion/issues/6328
let content = l.trim_start();
let new_prefix = "-".repeat(l.len() - content.len());
// maintain for each line a number, so
@@ -71,7 +71,7 @@ CREATE TABLE test (c1 BIGINT,c2 BIGINT) as values
# Error tests
#######
# https://github.com/apache/arrow-datafusion/issues/3353
# https://github.com/apache/datafusion/issues/3353
statement error DataFusion error: Schema error: Schema contains duplicate unqualified field name "APPROX_DISTINCT\(aggregate_test_100\.c9\)"
SELECT approx_distinct(c9) count_c9, approx_distinct(cast(c9 as varchar)) count_c9_str FROM aggregate_test_100
@@ -551,7 +551,7 @@ SELECT approx_median(col_f64_nan) FROM median_table
NaN
# median_multi
# test case for https://github.com/apache/arrow-datafusion/issues/3105
# test case for https://github.com/apache/datafusion/issues/3105
# has an intermediate grouping
statement ok
create table cpu (host string, usage float) as select * from (values
@@ -674,7 +674,7 @@ SELECT COUNT(2) FROM aggregate_test_100
100
# csv_query_approx_count
# FIX: https://github.com/apache/arrow-datafusion/issues/3353
# FIX: https://github.com/apache/datafusion/issues/3353
# query II
# SELECT approx_distinct(c9) AS count_c9, approx_distinct(cast(c9 as varchar)) count_c9_str FROM aggregate_test_100
# ----
@@ -3383,7 +3383,7 @@ query I
SELECT 0 AS "t.a" FROM t HAVING MAX(t.a) = 0;
----
# Test issue: https://github.com/apache/arrow-datafusion/issues/9161
# Test issue: https://github.com/apache/datafusion/issues/9161
query I rowsort
SELECT CAST(a AS INT) FROM t GROUP BY t.a;
----
+11 -11
View File
@@ -1378,7 +1378,7 @@ NULL 43
## array_pop_back (aliases: `list_pop_back`)
# array_pop_back scalar function with null
#TODO: https://github.com/apache/arrow-datafusion/issues/7142
#TODO: https://github.com/apache/datafusion/issues/7142
# follow clickhouse and duckdb
#query ?
#select array_pop_back(null);
@@ -1556,7 +1556,7 @@ select array_pop_back(arrow_cast(column1, 'LargeList(Int64)')) from large_arrays
## array_pop_front (aliases: `list_pop_front`)
#TODO:https://github.com/apache/arrow-datafusion/issues/7142
#TODO:https://github.com/apache/datafusion/issues/7142
# array_pop_front scalar function with null
# follow clickhouse and duckdb
#query ?
@@ -3020,7 +3020,7 @@ select array_positions([1, 2, 3, 4, 5], null);
----
[]
#TODO: https://github.com/apache/arrow-datafusion/issues/7142
#TODO: https://github.com/apache/datafusion/issues/7142
# array_positions with NULL (follow PostgreSQL)
#query ?
#select array_positions(null, 1);
@@ -4079,7 +4079,7 @@ select cardinality(arrow_cast(make_array(), 'LargeList(Null)')), cardinality(arr
NULL 0
#TODO
#https://github.com/apache/arrow-datafusion/issues/9158
#https://github.com/apache/datafusion/issues/9158
#query II
#select cardinality(arrow_cast(make_array(), 'FixedSizeList(1, Null)')), cardinality(arrow_cast(make_array(make_array()), 'FixedSizeList(1, List(Null))'))
#----
@@ -4165,7 +4165,7 @@ select
----
[1, , 3] [, 2.2, 3.3] [, bc]
#TODO: https://github.com/apache/arrow-datafusion/issues/7142
#TODO: https://github.com/apache/datafusion/issues/7142
# follow PostgreSQL behavior
#query ?
#select
@@ -4385,7 +4385,7 @@ select array_remove_n(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12],
## array_remove_all (aliases: `list_removes`)
#TODO: https://github.com/apache/arrow-datafusion/issues/7142
#TODO: https://github.com/apache/datafusion/issues/7142
# array_remove_all with NULL elements
#query ?
#select array_remove_all(NULL, 1);
@@ -5257,7 +5257,7 @@ true false true false false false true true false false true false true
## array_distinct
#TODO: https://github.com/apache/arrow-datafusion/issues/7142
#TODO: https://github.com/apache/datafusion/issues/7142
#query ?
#select array_distinct(null);
#----
@@ -6056,7 +6056,7 @@ select array_concat(column1, [7]) from arrays_values_v2;
# flatten
#TODO: https://github.com/apache/arrow-datafusion/issues/7142
#TODO: https://github.com/apache/datafusion/issues/7142
# follow DuckDB
#query ?
#select flatten(NULL);
@@ -6144,7 +6144,7 @@ select empty(arrow_cast(make_array(), 'LargeList(Null)'));
----
true
#TODO: https://github.com/apache/arrow-datafusion/issues/9158
#TODO: https://github.com/apache/datafusion/issues/9158
#query B
#select empty(arrow_cast(make_array(), 'FixedSizeList(0, Null)'));
#----
@@ -6166,7 +6166,7 @@ select empty(arrow_cast(make_array(NULL), 'FixedSizeList(1, Null)'));
----
false
#TODO: https://github.com/apache/arrow-datafusion/issues/7142
#TODO: https://github.com/apache/datafusion/issues/7142
# empty scalar function #4
#query B
#select empty(NULL);
@@ -6473,7 +6473,7 @@ select arrow_typeof(c) from test_create_array_table;
List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
# Test casting to array types
# issue: https://github.com/apache/arrow-datafusion/issues/9440
# issue: https://github.com/apache/datafusion/issues/9440
query ??T
select [1,2,3]::int[], [['1']]::int[][], arrow_typeof([]::text[]);
----
+2 -2
View File
@@ -111,7 +111,7 @@ a
statement ok
create table test ("'test'" varchar, "'test2'" varchar, "'test3'" varchar);
# https://github.com/apache/arrow-datafusion/issues/9714
# https://github.com/apache/datafusion/issues/9714
## Until the partition by parsing uses ColumnDef, this test is meaningless since it becomes an overfit. Even in
## CREATE EXTERNAL TABLE, there is a schema mismatch, this should be an issue.
#
@@ -138,7 +138,7 @@ create table test ("'test'" varchar, "'test2'" varchar, "'test3'" varchar);
#LOCATION 'test_files/scratch/copy/escape_quote/' PARTITIONED BY ("'test2'", "'test3'");
#
# This triggers a panic (index out of bounds)
# https://github.com/apache/arrow-datafusion/issues/9269
# https://github.com/apache/datafusion/issues/9269
#query
#select * from validate_partitioned_escape_quote;
+6 -6
View File
@@ -679,7 +679,7 @@ WITH RECURSIVE my_cte AS (
SELECT a FROM my_cte;
# Test issue: https://github.com/apache/arrow-datafusion/issues/9680
# Test issue: https://github.com/apache/datafusion/issues/9680
query I
WITH RECURSIVE recursive_cte AS (
SELECT 1 as val
@@ -700,7 +700,7 @@ SELECT * FROM recursive_cte;
1
2
# Test issue: https://github.com/apache/arrow-datafusion/issues/9680
# Test issue: https://github.com/apache/datafusion/issues/9680
# 'recursive_cte' should be on the left of the cross join, as this is the test purpose of the above query.
query TT
explain WITH RECURSIVE recursive_cte AS (
@@ -746,7 +746,7 @@ physical_plan
11)------ProjectionExec: expr=[2 as val]
12)--------PlaceholderRowExec
# Test issue: https://github.com/apache/arrow-datafusion/issues/9794
# Test issue: https://github.com/apache/datafusion/issues/9794
# Non-recursive term and recursive term have different types
query IT rowsort
WITH RECURSIVE my_cte AS(
@@ -758,7 +758,7 @@ WITH RECURSIVE my_cte AS(
1 Int32
3 Int32
# Test issue: https://github.com/apache/arrow-datafusion/issues/9794
# Test issue: https://github.com/apache/datafusion/issues/9794
# Non-recursive term and recursive term have different number of columns
query error DataFusion error: Error during planning: Non\-recursive term and recursive term must have the same number of columns \(1 != 3\)
WITH RECURSIVE my_cte AS (
@@ -767,7 +767,7 @@ WITH RECURSIVE my_cte AS (
SELECT a+2, 'a','c' FROM my_cte WHERE a<3
) SELECT * FROM my_cte;
# Test issue: https://github.com/apache/arrow-datafusion/issues/9794
# Test issue: https://github.com/apache/datafusion/issues/9794
# Non-recursive term and recursive term have different types, and cannot be casted
query error DataFusion error: Arrow error: Cast error: Cannot cast string 'abc' to value of Int64 type
WITH RECURSIVE my_cte AS (
@@ -777,7 +777,7 @@ WITH RECURSIVE my_cte AS (
) SELECT * FROM my_cte;
# Define a non-recursive CTE in the recursive WITH clause.
# Test issue: https://github.com/apache/arrow-datafusion/issues/9804
# Test issue: https://github.com/apache/datafusion/issues/9804
query I
WITH RECURSIVE cte AS (
SELECT a FROM (VALUES(1)) AS t(a) WHERE a > 2
+1 -1
View File
@@ -19,7 +19,7 @@
## Date/Time Handling Tests
##########
# Reproducer for https://github.com/apache/arrow-datafusion/issues/3944
# Reproducer for https://github.com/apache/datafusion/issues/3944
statement ok
CREATE TABLE test(
i_item_desc VARCHAR,
@@ -206,7 +206,7 @@ true false NULL true true false true NULL
true false NULL true true false true NULL
# Reproducer for https://github.com/apache/arrow-datafusion/issues/8738
# Reproducer for https://github.com/apache/datafusion/issues/8738
# This query should work correctly
query P?TT rowsort
SELECT
+1 -1
View File
@@ -775,7 +775,7 @@ SELECT upper(NULL)
----
NULL
# TODO issue: https://github.com/apache/arrow-datafusion/issues/6596
# TODO issue: https://github.com/apache/datafusion/issues/6596
# query ??
#SELECT
# CAST([1,2,3,4] AS INT[]) as a,
@@ -1050,7 +1050,7 @@ SELECT
----
arrow.apache.org arrow.apache.org
# Test substring_index issue https://github.com/apache/arrow-datafusion/issues/9472
# Test substring_index issue https://github.com/apache/datafusion/issues/9472
query TTT
SELECT
url,
@@ -4340,7 +4340,7 @@ physical_plan
statement ok
drop table t1
# Reproducer for https://github.com/apache/arrow-datafusion/issues/8175
# Reproducer for https://github.com/apache/datafusion/issues/8175
statement ok
create table t1(state string, city string, min_temp float, area int, time timestamp) as values
@@ -320,14 +320,14 @@ SHOW datafusion.execution.batch_size VERBOSE
datafusion.execution.batch_size 8192 Default batch size while creating new batches, it's especially useful for buffer-in-memory batches since creating tiny batches would result in too much metadata memory consumption
# show_time_zone_default_utc
# https://github.com/apache/arrow-datafusion/issues/3255
# https://github.com/apache/datafusion/issues/3255
query TT
SHOW TIME ZONE
----
datafusion.execution.time_zone +00:00
# show_timezone_default_utc
# https://github.com/apache/arrow-datafusion/issues/3255
# https://github.com/apache/datafusion/issues/3255
query TT
SHOW TIMEZONE
----
@@ -335,14 +335,14 @@ datafusion.execution.time_zone +00:00
# show_time_zone_default_utc_verbose
# https://github.com/apache/arrow-datafusion/issues/3255
# https://github.com/apache/datafusion/issues/3255
query TTT
SHOW TIME ZONE VERBOSE
----
datafusion.execution.time_zone +00:00 The default time zone Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime according to this time zone, and then extract the hour
# show_timezone_default_utc
# https://github.com/apache/arrow-datafusion/issues/3255
# https://github.com/apache/datafusion/issues/3255
query TTT
SHOW TIMEZONE VERBOSE
----
@@ -207,7 +207,7 @@ create table table_without_values(c1 varchar not null);
# verify that the sort order of the insert query is maintained into the
# insert (there should be a SortExec in the following plan)
# See https://github.com/apache/arrow-datafusion/pull/6354#discussion_r1195284178 for more background
# See https://github.com/apache/datafusion/pull/6354#discussion_r1195284178 for more background
query TT
explain insert into table_without_values select c1 from aggregate_test_100 order by c1;
----
@@ -448,7 +448,7 @@ LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q3/';
# verify that the sort order of the insert query is maintained into the
# insert (there should be a SortExec in the following plan)
# See https://github.com/apache/arrow-datafusion/pull/6354#discussion_r1195284178 for more background
# See https://github.com/apache/datafusion/pull/6354#discussion_r1195284178 for more background
query TT
explain insert into table_without_values select c1 from aggregate_test_100 order by c1;
----
@@ -17,7 +17,7 @@
# Use `interval` SQL literal syntax
# the types should be the same: https://github.com/apache/arrow-datafusion/issues/5801
# the types should be the same: https://github.com/apache/datafusion/issues/5801
query TT
select
arrow_typeof(interval '5 months'),
+3 -3
View File
@@ -19,7 +19,7 @@
## Join Tests
##########
# Regression test: https://github.com/apache/arrow-datafusion/issues/4844
# Regression test: https://github.com/apache/datafusion/issues/4844
statement ok
CREATE TABLE IF NOT EXISTS students(name TEXT, mark INT) AS VALUES
('Stuart', 28),
@@ -49,7 +49,7 @@ drop table IF EXISTS students;
statement ok
drop table IF EXISTS grades;
# issue: https://github.com/apache/arrow-datafusion/issues/5382
# issue: https://github.com/apache/datafusion/issues/5382
statement ok
CREATE TABLE IF NOT EXISTS test1(a int, b int) as select 1 as a, 2 as b;
@@ -702,7 +702,7 @@ drop table IF EXISTS full_join_test;
statement ok
set datafusion.execution.batch_size = 8192;
# related to: https://github.com/apache/arrow-datafusion/issues/8374
# related to: https://github.com/apache/datafusion/issues/8374
statement ok
CREATE TABLE t1(a text, b int) AS VALUES ('Alice', 50), ('Alice', 100);
+2 -2
View File
@@ -265,7 +265,7 @@ CREATE TABLE b(a INT, b INT, c INT) AS VALUES
(4, 400, 800)
# issue_3002
# // repro case for https://github.com/apache/arrow-datafusion/issues/3002
# // repro case for https://github.com/apache/datafusion/issues/3002
query II
select a.a, b.b from a join b on a.a = b.b
@@ -1272,7 +1272,7 @@ ORDER BY t1_id
NULL e
# Error left anti join
# https://github.com/apache/arrow-datafusion/issues/4366
# https://github.com/apache/datafusion/issues/4366
statement ok
set datafusion.optimizer.repartition_joins = false;
@@ -69,7 +69,7 @@ physical_plan
# doesn't invalidate lexicographical ordering.
# Hence '[CAST(a AS BIGINT) AS a_big ASC, b ASC]'
# is valid for the given ordering: '[a ASC, b ASC]'.
# See discussion for rationale: https://github.com/apache/arrow-datafusion/issues/8838#issue-2077714891
# See discussion for rationale: https://github.com/apache/datafusion/issues/8838#issue-2077714891
query TT
EXPLAIN
SELECT a, CAST(a AS BIGINT) AS a_big, b
@@ -118,7 +118,7 @@ physical_plan
# test for cast Utf8
# (must actually sort as the sort order for a number cast to utf8 is different than for int)
# See discussion: https://github.com/apache/arrow-datafusion/pull/9127#discussion_r1492336709
# See discussion: https://github.com/apache/datafusion/pull/9127#discussion_r1492336709
query TT
EXPLAIN
SELECT
+2 -2
View File
@@ -333,7 +333,7 @@ drop table foo;
#####
# Tests for https://github.com/apache/arrow-datafusion/issues/4854
# Tests for https://github.com/apache/datafusion/issues/4854
# Ordering / grouping by the same column
#####
statement ok
@@ -704,7 +704,7 @@ physical_plan
# Minimal reproduction of issue 5970
# https://github.com/apache/arrow-datafusion/issues/5970
# https://github.com/apache/datafusion/issues/5970
statement ok
set datafusion.execution.target_partitions = 2;
@@ -468,7 +468,7 @@ DROP TABLE test_float;
#########
# Predicates on memory tables / statistics generation
# Reproducer for https://github.com/apache/arrow-datafusion/issues/7125
# Reproducer for https://github.com/apache/datafusion/issues/7125
#########
statement ok
@@ -508,7 +508,7 @@ DROP TABLE t;
########
# Test query with bloom filter
# Refer to https://github.com/apache/arrow-datafusion/pull/7821#pullrequestreview-1688062599
# Refer to https://github.com/apache/datafusion/pull/7821#pullrequestreview-1688062599
########
statement ok
@@ -75,7 +75,7 @@ DROP TABLE parquet_table;
# Unbounded repartition
# See https://github.com/apache/arrow-datafusion/issues/5278
# See https://github.com/apache/datafusion/issues/5278
# Set up unbounded table and run a query - the query plan should display a `RepartitionExec`
# and a `CoalescePartitionsExec`
statement ok
@@ -126,7 +126,7 @@ SELECT column1 FROM parquet_table_with_order WHERE column1 <> 42 ORDER BY column
200
# explain should not have any groups with more than one file
# https://github.com/apache/arrow-datafusion/issues/8451
# https://github.com/apache/datafusion/issues/8451
query TT
EXPLAIN SELECT column1 FROM parquet_table_with_order WHERE column1 <> 42 ORDER BY column1;
----
@@ -240,7 +240,7 @@ DROP TABLE json_table;
###################
## Use pre-existing files we don't have a way to create arrow files yet
## (https://github.com/apache/arrow-datafusion/issues/8504)
## (https://github.com/apache/datafusion/issues/8504)
statement ok
CREATE EXTERNAL TABLE arrow_table
STORED AS ARROW
@@ -248,7 +248,7 @@ LOCATION '../core/tests/data/example.arrow';
# It would be great to see the file read as "4" groups with even sizes (offsets) eventually
# https://github.com/apache/arrow-datafusion/issues/8503
# https://github.com/apache/datafusion/issues/8503
query TT
EXPLAIN SELECT * FROM arrow_table
----
@@ -533,7 +533,7 @@ select ln(null);
NULL
# ln scalar ops with zero edgecases
# please see https://github.com/apache/arrow-datafusion/pull/5245#issuecomment-1426828382
# please see https://github.com/apache/datafusion/pull/5245#issuecomment-1426828382
query R rowsort
select ln(0);
----
@@ -582,7 +582,7 @@ select log(2, 2.0/3) a, log(10, 2.0/3) b;
-0.584962500721 -0.176091259056
# log scalar ops with zero edgecases
# please see https://github.com/apache/arrow-datafusion/pull/5245#issuecomment-1426828382
# please see https://github.com/apache/datafusion/pull/5245#issuecomment-1426828382
query RR rowsort
select log(0) a, log(1, 64) b;
----
@@ -627,7 +627,7 @@ select log10(2.0/3);
-0.176091259056
# log10 scalar ops with zero edgecases
# please see https://github.com/apache/arrow-datafusion/pull/5245#issuecomment-1426828382
# please see https://github.com/apache/datafusion/pull/5245#issuecomment-1426828382
query R rowsort
select log10(0);
----
@@ -663,7 +663,7 @@ select log2(2.0/3);
-0.584962500721
# log2 scalar ops with zero edgecases
# please see https://github.com/apache/arrow-datafusion/pull/5245#issuecomment-1426828382
# please see https://github.com/apache/datafusion/pull/5245#issuecomment-1426828382
query R rowsort
select log2(0);
----
@@ -1276,7 +1276,7 @@ FROM t1
999
999
# issue: https://github.com/apache/arrow-datafusion/issues/7004
# issue: https://github.com/apache/datafusion/issues/7004
query B
select case c1
when 'foo' then TRUE
@@ -1325,7 +1325,7 @@ NULL
NULL
4
# issue: https://github.com/apache/arrow-datafusion/issues/6376
# issue: https://github.com/apache/datafusion/issues/6376
query I
select case when a = 0 then 123 end from (values(1), (0), (null)) as t(a);
----

Some files were not shown because too many files have changed in this diff Show More