mirror of
https://github.com/langchain-ai/datafusion.git
synced 2026-07-01 21:24:06 -04:00
chore: Cleanup returning null arrays (#20423)
Cleanup a few places where the code returned a null array but it would be a bit cleaner and faster to return a typed scalar null instead. ## Which issue does this PR close? Does not close an issue; this cleanup was mentioned in the code review for #20361 ## Rationale for this change Returning a typed scalar null should be preferred to returning a null array: it still has type information, and avoids materializing an all-null array. The downstream consumer can always materialize the equivalent array if they want to. ## What changes are included in this PR? Cleanup five instances of this pattern. ## Are these changes tested? Yes. No new test cases possible/warranted. ## Are there any user-facing changes? No.
This commit is contained in:
@@ -15,7 +15,7 @@
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
use arrow::array::{ArrayRef, AsArray, Int64Array, PrimitiveArray, new_null_array};
|
||||
use arrow::array::{ArrayRef, AsArray, Int64Array, PrimitiveArray};
|
||||
use arrow::compute::try_binary;
|
||||
use arrow::datatypes::{DataType, Int64Type};
|
||||
use arrow::error::ArrowError;
|
||||
@@ -144,10 +144,7 @@ fn compute_gcd_with_scalar(arr: &ArrayRef, scalar: Option<i64>) -> Result<Column
|
||||
|
||||
result.map(|arr| ColumnarValue::Array(Arc::new(arr) as ArrayRef))
|
||||
}
|
||||
None => Ok(ColumnarValue::Array(new_null_array(
|
||||
&DataType::Int64,
|
||||
arr.len(),
|
||||
))),
|
||||
None => Ok(ColumnarValue::Scalar(ScalarValue::Int64(None))),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ use std::sync::Arc;
|
||||
|
||||
use arrow::array::{
|
||||
ArrayAccessor, ArrayIter, ArrayRef, ArrowPrimitiveType, AsArray, OffsetSizeTrait,
|
||||
PrimitiveArray, new_null_array,
|
||||
PrimitiveArray,
|
||||
};
|
||||
use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
|
||||
|
||||
@@ -138,9 +138,11 @@ impl ScalarUDFImpl for FindInSetFunc {
|
||||
| ScalarValue::LargeUtf8(str_list_literal),
|
||||
),
|
||||
) => {
|
||||
let result_array = match str_list_literal {
|
||||
match str_list_literal {
|
||||
// find_in_set(column_a, null) = null
|
||||
None => new_null_array(return_field.data_type(), str_array.len()),
|
||||
None => Ok(ColumnarValue::Scalar(ScalarValue::try_new_null(
|
||||
return_field.data_type(),
|
||||
)?)),
|
||||
Some(str_list_literal) => {
|
||||
let str_list = str_list_literal.split(',').collect::<Vec<&str>>();
|
||||
let result = match str_array.data_type() {
|
||||
@@ -171,10 +173,9 @@ impl ScalarUDFImpl for FindInSetFunc {
|
||||
)
|
||||
}
|
||||
};
|
||||
Arc::new(result?)
|
||||
Ok(ColumnarValue::Array(Arc::new(result?)))
|
||||
}
|
||||
};
|
||||
Ok(ColumnarValue::Array(result_array))
|
||||
}
|
||||
}
|
||||
|
||||
// `string` is scalar, `str_list` is an array
|
||||
@@ -186,11 +187,11 @@ impl ScalarUDFImpl for FindInSetFunc {
|
||||
),
|
||||
ColumnarValue::Array(str_list_array),
|
||||
) => {
|
||||
let res = match string_literal {
|
||||
match string_literal {
|
||||
// find_in_set(null, column_b) = null
|
||||
None => {
|
||||
new_null_array(return_field.data_type(), str_list_array.len())
|
||||
}
|
||||
None => Ok(ColumnarValue::Scalar(ScalarValue::try_new_null(
|
||||
return_field.data_type(),
|
||||
)?)),
|
||||
Some(string) => {
|
||||
let result = match str_list_array.data_type() {
|
||||
DataType::Utf8 => {
|
||||
@@ -217,10 +218,9 @@ impl ScalarUDFImpl for FindInSetFunc {
|
||||
)
|
||||
}
|
||||
};
|
||||
Arc::new(result?)
|
||||
Ok(ColumnarValue::Array(Arc::new(result?)))
|
||||
}
|
||||
};
|
||||
Ok(ColumnarValue::Array(res))
|
||||
}
|
||||
}
|
||||
|
||||
// both inputs are arrays
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{ArrayRef, AsArray, Date32Array, StringArrayType, new_null_array};
|
||||
use arrow::array::{ArrayRef, AsArray, Date32Array, StringArrayType};
|
||||
use arrow::datatypes::{DataType, Date32Type, Field, FieldRef};
|
||||
use chrono::{Datelike, Duration, Weekday};
|
||||
use datafusion_common::{Result, ScalarValue, exec_err, internal_err};
|
||||
@@ -129,10 +129,7 @@ impl ScalarUDFImpl for SparkNextDay {
|
||||
} else {
|
||||
// TODO: if spark.sql.ansi.enabled is false,
|
||||
// returns NULL instead of an error for a malformed dayOfWeek.
|
||||
Ok(ColumnarValue::Array(Arc::new(new_null_array(
|
||||
&DataType::Date32,
|
||||
date_array.len(),
|
||||
))))
|
||||
Ok(ColumnarValue::Scalar(ScalarValue::Date32(None)))
|
||||
}
|
||||
}
|
||||
_ => exec_err!(
|
||||
|
||||
@@ -15,9 +15,7 @@
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
use arrow::array::{
|
||||
ArrayRef, AsArray, BinaryArrayType, Int32Array, StringArray, new_null_array,
|
||||
};
|
||||
use arrow::array::{ArrayRef, AsArray, BinaryArrayType, Int32Array, StringArray};
|
||||
use arrow::datatypes::{DataType, Int32Type};
|
||||
use datafusion_common::types::{
|
||||
NativeType, logical_binary, logical_int32, logical_string,
|
||||
@@ -170,10 +168,7 @@ impl ScalarUDFImpl for SparkSha2 {
|
||||
(
|
||||
ColumnarValue::Array(_),
|
||||
ColumnarValue::Scalar(ScalarValue::Int32(None)),
|
||||
) => Ok(ColumnarValue::Array(new_null_array(
|
||||
&DataType::Utf8,
|
||||
args.number_rows,
|
||||
))),
|
||||
) => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None))),
|
||||
_ => {
|
||||
// Fallback to existing behavior for any array/mixed cases
|
||||
make_scalar_function(sha2_impl, vec![])(&args.args)
|
||||
|
||||
Reference in New Issue
Block a user