mirror of
https://github.com/langchain-ai/datafusion.git
synced 2026-07-01 21:24:06 -04:00
fix: cardinality() of an empty array should be zero (#20533)
## Which issue does this PR close? - Closes #20526. ## Rationale for this change Per Postgres and the SQL spec, `cardinality()` of an empty array should be zero; we previously returned `NULL`. Along the way, fix another bug: we previously returned `0` for the cardinality of an untyped `NULL` and `NULL` for the cardinality of a typed null (e.g., `NULL::int[]`). We should return `NULL` in both cases. ## What changes are included in this PR? Bug fixes, update SLT. ## Are these changes tested? Yes. ## Are there any user-facing changes? Yes: the behavior of `cardinality` has changed, albeit the previous behavior was incorrect.
This commit is contained in:
@@ -120,7 +120,7 @@ impl ScalarUDFImpl for Cardinality {
|
||||
fn cardinality_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
|
||||
let [array] = take_function_args("cardinality", args)?;
|
||||
match array.data_type() {
|
||||
Null => Ok(Arc::new(UInt64Array::from_value(0, array.len()))),
|
||||
Null => Ok(Arc::new(UInt64Array::new_null(array.len()))),
|
||||
List(_) => {
|
||||
let list_array = as_list_array(array)?;
|
||||
generic_list_cardinality::<i32>(list_array)
|
||||
@@ -152,9 +152,14 @@ fn generic_list_cardinality<O: OffsetSizeTrait>(
|
||||
) -> Result<ArrayRef> {
|
||||
let result = array
|
||||
.iter()
|
||||
.map(|arr| match crate::utils::compute_array_dims(arr)? {
|
||||
Some(vector) => Ok(Some(vector.iter().map(|x| x.unwrap()).product::<u64>())),
|
||||
None => Ok(None),
|
||||
.map(|arr| match arr {
|
||||
Some(arr) if arr.is_empty() => Ok(Some(0u64)),
|
||||
arr => match crate::utils::compute_array_dims(arr)? {
|
||||
Some(vector) => {
|
||||
Ok(Some(vector.iter().map(|x| x.unwrap()).product::<u64>()))
|
||||
}
|
||||
None => Ok(None),
|
||||
},
|
||||
})
|
||||
.collect::<Result<UInt64Array>>()?;
|
||||
Ok(Arc::new(result) as ArrayRef)
|
||||
|
||||
@@ -5181,12 +5181,17 @@ select cardinality(arrow_cast([[1, 2], [3, 4], [5, 6]], 'FixedSizeList(3, List(I
|
||||
query II
|
||||
select cardinality(make_array()), cardinality(make_array(make_array()))
|
||||
----
|
||||
NULL 0
|
||||
0 0
|
||||
|
||||
query II
|
||||
select cardinality([]), cardinality([]::int[]) as with_cast
|
||||
----
|
||||
0 0
|
||||
|
||||
query II
|
||||
select cardinality(arrow_cast(make_array(), 'LargeList(Int64)')), cardinality(arrow_cast(make_array(make_array()), 'LargeList(List(Int64))'))
|
||||
----
|
||||
NULL 0
|
||||
0 0
|
||||
|
||||
#TODO
|
||||
#https://github.com/apache/datafusion/issues/9158
|
||||
@@ -5195,6 +5200,12 @@ NULL 0
|
||||
#----
|
||||
#NULL 0
|
||||
|
||||
# cardinality of NULL arrays should return NULL
|
||||
query II
|
||||
select cardinality(NULL), cardinality(arrow_cast(NULL, 'LargeList(Int64)'))
|
||||
----
|
||||
NULL NULL
|
||||
|
||||
# cardinality with columns
|
||||
query III
|
||||
select cardinality(column1), cardinality(column2), cardinality(column3) from arrays;
|
||||
|
||||
Reference in New Issue
Block a user