perf: Fix quadratic behavior of to_array_of_size (#20459)

## Which issue does this PR close?

- Closes #20458.
- Closes #18159.

## Rationale for this change

When `array_to_size(n)` was called on a `List`-like object containing a
`StringViewArray` with `b` data buffers, the previous implementation
returned a list containing a `StringViewArray` with `n*b` buffers, which
results in catastrophically bad performance if `b` grows even somewhat
large.

This issue was previously noticed causing poor nested loop join
performance. #18161 adjusted the NLJ code to avoid calling
`to_array_of_size` for this reason, but didn't attempt to fix the
underlying issue in `to_array_of_size`. This PR doesn't attempt to
revert the change to the NLJ code: the special-case code added in #18161
is still slightly faster than `to_array_of_size` after this
optimization. It might be possible to address that in a future PR.

## What changes are included in this PR?
* Instead of using `repeat_n` + `concat` to merge together `n` copies of
the `StringViewArray`, we instead use `take`, which preserves the same
number of buffers as the input `StringViewArray`.
* Add a new benchmark for this situation
* Add more unit tests for `to_array_of_size`

## Are these changes tested?

Yes and benchmarked.

## Are there any user-facing changes?

No.

## AI usage

Iterated on the problem with Claude Code; I understand the problem and
the solution.
This commit is contained in:
Neil Conway
2026-02-24 08:53:10 -05:00
committed by GitHub
parent a9c090141d
commit 9c85ac608f
4 changed files with 204 additions and 10 deletions
+4
View File
@@ -57,6 +57,10 @@ sql = ["sqlparser"]
harness = false
name = "with_hashes"
[[bench]]
harness = false
name = "scalar_to_array"
[dependencies]
ahash = { workspace = true }
apache-avro = { workspace = true, features = [
@@ -0,0 +1,107 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//! Benchmarks for `ScalarValue::to_array_of_size`, focusing on List
//! scalars.
use arrow::array::{Array, ArrayRef, AsArray, StringViewBuilder};
use arrow::datatypes::{DataType, Field};
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
use datafusion_common::ScalarValue;
use datafusion_common::utils::SingleRowListArrayBuilder;
use std::sync::Arc;
/// Build a `ScalarValue::List` of `num_elements` Utf8View strings whose
/// inner StringViewArray has `num_buffers` data buffers.
fn make_list_scalar(num_elements: usize, num_buffers: usize) -> ScalarValue {
let elements_per_buffer = num_elements.div_ceil(num_buffers);
let mut small_arrays: Vec<ArrayRef> = Vec::new();
let mut remaining = num_elements;
for buf_idx in 0..num_buffers {
let count = remaining.min(elements_per_buffer);
if count == 0 {
break;
}
let start = buf_idx * elements_per_buffer;
let mut builder = StringViewBuilder::with_capacity(count);
for i in start..start + count {
builder.append_value(format!("{i:024x}"));
}
small_arrays.push(Arc::new(builder.finish()) as ArrayRef);
remaining -= count;
}
let refs: Vec<&dyn Array> = small_arrays.iter().map(|a| a.as_ref()).collect();
let concated = arrow::compute::concat(&refs).unwrap();
let list_array = SingleRowListArrayBuilder::new(concated)
.with_field(&Field::new_list_field(DataType::Utf8View, true))
.build_list_array();
ScalarValue::List(Arc::new(list_array))
}
/// We want to measure the cost of doing the conversion and then also accessing
/// the results, to model what would happen during query evaluation.
fn consume_list_array(arr: &ArrayRef) {
let list_arr = arr.as_list::<i32>();
let mut total_len: usize = 0;
for i in 0..list_arr.len() {
let inner = list_arr.value(i);
let sv = inner.as_string_view();
for j in 0..sv.len() {
total_len += sv.value(j).len();
}
}
std::hint::black_box(total_len);
}
fn bench_list_to_array_of_size(c: &mut Criterion) {
let mut group = c.benchmark_group("list_to_array_of_size");
let num_elements = 1245;
let scalar_1buf = make_list_scalar(num_elements, 1);
let scalar_50buf = make_list_scalar(num_elements, 50);
for batch_size in [256, 1024] {
group.bench_with_input(
BenchmarkId::new("1_buffer", batch_size),
&batch_size,
|b, &sz| {
b.iter(|| {
let arr = scalar_1buf.to_array_of_size(sz).unwrap();
consume_list_array(&arr);
});
},
);
group.bench_with_input(
BenchmarkId::new("50_buffers", batch_size),
&batch_size,
|b, &sz| {
b.iter(|| {
let arr = scalar_50buf.to_array_of_size(sz).unwrap();
consume_list_array(&arr);
});
},
);
}
group.finish();
}
criterion_group!(benches, bench_list_to_array_of_size);
criterion_main!(benches);
+89 -7
View File
@@ -3008,7 +3008,7 @@ impl ScalarValue {
///
/// Errors if `self` is
/// - a decimal that fails be converted to a decimal array of size
/// - a `FixedsizeList` that fails to be concatenated into an array of size
/// - a `FixedSizeList` that fails to be concatenated into an array of size
/// - a `List` that fails to be concatenated into an array of size
/// - a `Dictionary` that fails be converted to a dictionary array of size
pub fn to_array_of_size(&self, size: usize) -> Result<ArrayRef> {
@@ -3434,13 +3434,22 @@ impl ScalarValue {
}
}
/// Repeats the rows of `arr` `size` times, producing an array with
/// `arr.len() * size` total rows.
fn list_to_array_of_size(arr: &dyn Array, size: usize) -> Result<ArrayRef> {
let arrays = repeat_n(arr, size).collect::<Vec<_>>();
let ret = match !arrays.is_empty() {
true => arrow::compute::concat(arrays.as_slice())?,
false => arr.slice(0, 0),
};
Ok(ret)
if size == 0 {
return Ok(arr.slice(0, 0));
}
// Examples: given `arr = [[A, B, C]]` and `size = 3`, `indices = [0, 0, 0]` and
// the result is `[[A, B, C], [A, B, C], [A, B, C]]`.
//
// Given `arr = [[A, B], [C]]` and `size = 2`, `indices = [0, 1, 0, 1]` and the
// result is `[[A, B], [C], [A, B], [C]]`. (But in practice, we are always called
// with `arr.len() == 1`.)
let n = arr.len() as u32;
let indices = UInt32Array::from_iter_values((0..size).flat_map(|_| 0..n));
Ok(arrow::compute::take(arr, &indices, None)?)
}
/// Retrieve ScalarValue for each row in `array`
@@ -5532,6 +5541,79 @@ mod tests {
assert_eq!(empty_array.len(), 0);
}
#[test]
fn test_to_array_of_size_list_size_one() {
// size=1 takes the fast path (Arc::clone)
let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
Some(10),
Some(20),
])]);
let sv = ScalarValue::List(Arc::new(arr.clone()));
let result = sv.to_array_of_size(1).unwrap();
assert_eq!(result.as_list::<i32>(), &arr);
}
#[test]
fn test_to_array_of_size_list_empty_inner() {
// A list scalar containing an empty list: [[]]
let arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![])]);
let sv = ScalarValue::List(Arc::new(arr));
let result = sv.to_array_of_size(3).unwrap();
let result_list = result.as_list::<i32>();
assert_eq!(result_list.len(), 3);
for i in 0..3 {
assert_eq!(result_list.value(i).len(), 0);
}
}
#[test]
fn test_to_array_of_size_large_list() {
let arr =
LargeListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
Some(100),
Some(200),
])]);
let sv = ScalarValue::LargeList(Arc::new(arr));
let result = sv.to_array_of_size(3).unwrap();
let expected = LargeListArray::from_iter_primitive::<Int32Type, _, _>(vec![
Some(vec![Some(100), Some(200)]),
Some(vec![Some(100), Some(200)]),
Some(vec![Some(100), Some(200)]),
]);
assert_eq!(result.as_list::<i64>(), &expected);
}
#[test]
fn test_list_to_array_of_size_multi_row() {
// Call list_to_array_of_size directly with arr.len() > 1
let arr = Int32Array::from(vec![Some(10), None, Some(30)]);
let result = ScalarValue::list_to_array_of_size(&arr, 3).unwrap();
let result = result.as_primitive::<Int32Type>();
assert_eq!(
result.iter().collect::<Vec<_>>(),
vec![
Some(10),
None,
Some(30),
Some(10),
None,
Some(30),
Some(10),
None,
Some(30),
]
);
}
#[test]
fn test_to_array_of_size_null_list() {
let dt = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
let sv = ScalarValue::try_from(&dt).unwrap();
let result = sv.to_array_of_size(3).unwrap();
assert_eq!(result.len(), 3);
assert_eq!(result.null_count(), 3);
}
/// See https://github.com/apache/datafusion/issues/18870
#[test]
fn test_to_array_of_size_for_none_fsb() {
@@ -2011,9 +2011,10 @@ fn build_row_join_batch(
// Broadcast the single build-side row to match the filtered
// probe-side batch length
let original_left_array = build_side_batch.column(column_index.index);
// Avoid using `ScalarValue::to_array_of_size()` for `List(Utf8View)` to avoid
// deep copies for buffers inside `Utf8View` array. See below for details.
// https://github.com/apache/datafusion/issues/18159
// Use `arrow::compute::take` directly for `List(Utf8View)` rather
// than going through `ScalarValue::to_array_of_size()`, which
// avoids some intermediate allocations.
//
// In other cases, `to_array_of_size()` is faster.
match original_left_array.data_type() {