chore: Cleanup returning null arrays (#20423)

Cleanup a few places where the code returned a null array but it would
be a bit cleaner and faster to return a typed scalar null instead.

## Which issue does this PR close?

Does not close an issue; this cleanup was mentioned in the code review
for #20361

## Rationale for this change

Returning a typed scalar null should be preferred to returning a null
array: it still has type information, and avoids materializing an
all-null array. The downstream consumer can always materialize the
equivalent array if they want to.

## What changes are included in this PR?

Cleanup five instances of this pattern.

## Are these changes tested?

Yes. No new test cases possible/warranted.

## Are there any user-facing changes?

No.
This commit is contained in:
Neil Conway
2026-02-19 11:15:28 -05:00
committed by GitHub
parent c3f080774c
commit 0022d8e503
4 changed files with 19 additions and 30 deletions
+2 -5
View File
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
use arrow::array::{ArrayRef, AsArray, Int64Array, PrimitiveArray, new_null_array};
use arrow::array::{ArrayRef, AsArray, Int64Array, PrimitiveArray};
use arrow::compute::try_binary;
use arrow::datatypes::{DataType, Int64Type};
use arrow::error::ArrowError;
@@ -144,10 +144,7 @@ fn compute_gcd_with_scalar(arr: &ArrayRef, scalar: Option<i64>) -> Result<Column
result.map(|arr| ColumnarValue::Array(Arc::new(arr) as ArrayRef))
}
None => Ok(ColumnarValue::Array(new_null_array(
&DataType::Int64,
arr.len(),
))),
None => Ok(ColumnarValue::Scalar(ScalarValue::Int64(None))),
}
}
+13 -13
View File
@@ -20,7 +20,7 @@ use std::sync::Arc;
use arrow::array::{
ArrayAccessor, ArrayIter, ArrayRef, ArrowPrimitiveType, AsArray, OffsetSizeTrait,
PrimitiveArray, new_null_array,
PrimitiveArray,
};
use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
@@ -138,9 +138,11 @@ impl ScalarUDFImpl for FindInSetFunc {
| ScalarValue::LargeUtf8(str_list_literal),
),
) => {
let result_array = match str_list_literal {
match str_list_literal {
// find_in_set(column_a, null) = null
None => new_null_array(return_field.data_type(), str_array.len()),
None => Ok(ColumnarValue::Scalar(ScalarValue::try_new_null(
return_field.data_type(),
)?)),
Some(str_list_literal) => {
let str_list = str_list_literal.split(',').collect::<Vec<&str>>();
let result = match str_array.data_type() {
@@ -171,10 +173,9 @@ impl ScalarUDFImpl for FindInSetFunc {
)
}
};
Arc::new(result?)
Ok(ColumnarValue::Array(Arc::new(result?)))
}
};
Ok(ColumnarValue::Array(result_array))
}
}
// `string` is scalar, `str_list` is an array
@@ -186,11 +187,11 @@ impl ScalarUDFImpl for FindInSetFunc {
),
ColumnarValue::Array(str_list_array),
) => {
let res = match string_literal {
match string_literal {
// find_in_set(null, column_b) = null
None => {
new_null_array(return_field.data_type(), str_list_array.len())
}
None => Ok(ColumnarValue::Scalar(ScalarValue::try_new_null(
return_field.data_type(),
)?)),
Some(string) => {
let result = match str_list_array.data_type() {
DataType::Utf8 => {
@@ -217,10 +218,9 @@ impl ScalarUDFImpl for FindInSetFunc {
)
}
};
Arc::new(result?)
Ok(ColumnarValue::Array(Arc::new(result?)))
}
};
Ok(ColumnarValue::Array(res))
}
}
// both inputs are arrays
@@ -18,7 +18,7 @@
use std::any::Any;
use std::sync::Arc;
use arrow::array::{ArrayRef, AsArray, Date32Array, StringArrayType, new_null_array};
use arrow::array::{ArrayRef, AsArray, Date32Array, StringArrayType};
use arrow::datatypes::{DataType, Date32Type, Field, FieldRef};
use chrono::{Datelike, Duration, Weekday};
use datafusion_common::{Result, ScalarValue, exec_err, internal_err};
@@ -129,10 +129,7 @@ impl ScalarUDFImpl for SparkNextDay {
} else {
// TODO: if spark.sql.ansi.enabled is false,
// returns NULL instead of an error for a malformed dayOfWeek.
Ok(ColumnarValue::Array(Arc::new(new_null_array(
&DataType::Date32,
date_array.len(),
))))
Ok(ColumnarValue::Scalar(ScalarValue::Date32(None)))
}
}
_ => exec_err!(
+2 -7
View File
@@ -15,9 +15,7 @@
// specific language governing permissions and limitations
// under the License.
use arrow::array::{
ArrayRef, AsArray, BinaryArrayType, Int32Array, StringArray, new_null_array,
};
use arrow::array::{ArrayRef, AsArray, BinaryArrayType, Int32Array, StringArray};
use arrow::datatypes::{DataType, Int32Type};
use datafusion_common::types::{
NativeType, logical_binary, logical_int32, logical_string,
@@ -170,10 +168,7 @@ impl ScalarUDFImpl for SparkSha2 {
(
ColumnarValue::Array(_),
ColumnarValue::Scalar(ScalarValue::Int32(None)),
) => Ok(ColumnarValue::Array(new_null_array(
&DataType::Utf8,
args.number_rows,
))),
) => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None))),
_ => {
// Fallback to existing behavior for any array/mixed cases
make_scalar_function(sha2_impl, vec![])(&args.args)