Use rustfmt default line width (#4960)

* Use rustfmt default line width

* Further format
This commit is contained in:
Raphael Taylor-Davies
2023-10-19 17:19:40 +01:00
committed by GitHub
parent f597d3a687
commit 7e134f4d27
289 changed files with 4999 additions and 8788 deletions
+18 -43
View File
@@ -207,15 +207,15 @@ where
}
let iter = ArrayIter::new(array);
let sum =
iter.into_iter()
.try_fold(T::default_value(), |accumulator, value| {
if let Some(value) = value {
accumulator.add_checked(value)
} else {
Ok(accumulator)
}
})?;
let sum = iter
.into_iter()
.try_fold(T::default_value(), |accumulator, value| {
if let Some(value) = value {
accumulator.add_checked(value)
} else {
Ok(accumulator)
}
})?;
Ok(Some(sum))
}
@@ -230,11 +230,7 @@ where
T: ArrowNumericType,
T::Native: ArrowNativeType,
{
min_max_array_helper::<T, A, _, _>(
array,
|a, b| (is_nan(*a) & !is_nan(*b)) || a > b,
min,
)
min_max_array_helper::<T, A, _, _>(array, |a, b| (is_nan(*a) & !is_nan(*b)) || a > b, min)
}
/// Returns the max of values in the array of `ArrowNumericType` type, or dictionary
@@ -244,11 +240,7 @@ where
T: ArrowNumericType,
T::Native: ArrowNativeType,
{
min_max_array_helper::<T, A, _, _>(
array,
|a, b| (!is_nan(*a) & is_nan(*b)) || a < b,
max,
)
min_max_array_helper::<T, A, _, _>(array, |a, b| (!is_nan(*a) & is_nan(*b)) || a < b, max)
}
fn min_max_array_helper<T, A: ArrayAccessor<Item = T::Native>, F, M>(
@@ -501,10 +493,7 @@ mod simd {
fn init_accumulator_chunk() -> Self::SimdAccumulator;
/// Updates the accumulator with the values of one chunk
fn accumulate_chunk_non_null(
accumulator: &mut Self::SimdAccumulator,
chunk: T::Simd,
);
fn accumulate_chunk_non_null(accumulator: &mut Self::SimdAccumulator, chunk: T::Simd);
/// Updates the accumulator with the values of one chunk according to the given vector mask
fn accumulate_chunk_nullable(
@@ -602,10 +591,7 @@ mod simd {
(T::init(T::default_value()), T::mask_init(false))
}
fn accumulate_chunk_non_null(
accumulator: &mut Self::SimdAccumulator,
chunk: T::Simd,
) {
fn accumulate_chunk_non_null(accumulator: &mut Self::SimdAccumulator, chunk: T::Simd) {
let acc_is_nan = !T::eq(accumulator.0, accumulator.0);
let is_lt = acc_is_nan | T::lt(chunk, accumulator.0);
let first_or_lt = !accumulator.1 | is_lt;
@@ -627,10 +613,7 @@ mod simd {
accumulator.1 |= vecmask;
}
fn accumulate_scalar(
accumulator: &mut Self::ScalarAccumulator,
value: T::Native,
) {
fn accumulate_scalar(accumulator: &mut Self::ScalarAccumulator, value: T::Native) {
if !accumulator.1 {
accumulator.0 = value;
} else {
@@ -690,10 +673,7 @@ mod simd {
(T::init(T::default_value()), T::mask_init(false))
}
fn accumulate_chunk_non_null(
accumulator: &mut Self::SimdAccumulator,
chunk: T::Simd,
) {
fn accumulate_chunk_non_null(accumulator: &mut Self::SimdAccumulator, chunk: T::Simd) {
let chunk_is_nan = !T::eq(chunk, chunk);
let is_gt = chunk_is_nan | T::gt(chunk, accumulator.0);
let first_or_gt = !accumulator.1 | is_gt;
@@ -715,10 +695,7 @@ mod simd {
accumulator.1 |= vecmask;
}
fn accumulate_scalar(
accumulator: &mut Self::ScalarAccumulator,
value: T::Native,
) {
fn accumulate_scalar(accumulator: &mut Self::ScalarAccumulator, value: T::Native) {
if !accumulator.1 {
accumulator.0 = value;
} else {
@@ -1009,8 +986,7 @@ mod tests {
#[test]
fn test_primitive_array_bool_or_with_nulls() {
let a =
BooleanArray::from(vec![None, Some(false), Some(false), None, Some(false)]);
let a = BooleanArray::from(vec![None, Some(false), Some(false), None, Some(false)]);
assert!(!bool_or(&a).unwrap());
}
@@ -1297,8 +1273,7 @@ mod tests {
assert_eq!(Some(false), min_boolean(&a));
assert_eq!(Some(true), max_boolean(&a));
let a =
BooleanArray::from(vec![Some(false), Some(true), None, Some(false), None]);
let a = BooleanArray::from(vec![Some(false), Some(true), None, Some(false), None]);
assert_eq!(Some(false), min_boolean(&a));
assert_eq!(Some(true), max_boolean(&a));
}
+19 -30
View File
@@ -48,8 +48,7 @@ fn get_fixed_point_info(
)));
}
let divisor =
i256::from_i128(10).pow_wrapping((product_scale - required_scale) as u32);
let divisor = i256::from_i128(10).pow_wrapping((product_scale - required_scale) as u32);
Ok((precision, product_scale, divisor))
}
@@ -78,8 +77,7 @@ pub fn multiply_fixed_point_dyn(
let left = left.as_any().downcast_ref::<Decimal128Array>().unwrap();
let right = right.as_any().downcast_ref::<Decimal128Array>().unwrap();
multiply_fixed_point(left, right, required_scale)
.map(|a| Arc::new(a) as ArrayRef)
multiply_fixed_point(left, right, required_scale).map(|a| Arc::new(a) as ArrayRef)
}
(_, _) => Err(ArrowError::CastError(format!(
"Unsupported data type {}, {}",
@@ -113,10 +111,8 @@ pub fn multiply_fixed_point_checked(
)?;
if required_scale == product_scale {
return try_binary::<_, _, _, Decimal128Type>(left, right, |a, b| {
a.mul_checked(b)
})?
.with_precision_and_scale(precision, required_scale);
return try_binary::<_, _, _, Decimal128Type>(left, right, |a, b| a.mul_checked(b))?
.with_precision_and_scale(precision, required_scale);
}
try_binary::<_, _, _, Decimal128Type>(left, right, |a, b| {
@@ -213,17 +209,16 @@ mod tests {
.unwrap();
let err = mul(&a, &b).unwrap_err();
assert!(err.to_string().contains(
"Overflow happened on: 123456789000000000000000000 * 10000000000000000000"
));
assert!(err
.to_string()
.contains("Overflow happened on: 123456789000000000000000000 * 10000000000000000000"));
// Allow precision loss.
let result = multiply_fixed_point_checked(&a, &b, 28).unwrap();
// [1234567890]
let expected =
Decimal128Array::from(vec![12345678900000000000000000000000000000])
.with_precision_and_scale(38, 28)
.unwrap();
let expected = Decimal128Array::from(vec![12345678900000000000000000000000000000])
.with_precision_and_scale(38, 28)
.unwrap();
assert_eq!(&expected, &result);
assert_eq!(
@@ -233,13 +228,9 @@ mod tests {
// Rounding case
// [0.000000000000000001, 123456789.555555555555555555, 1.555555555555555555]
let a = Decimal128Array::from(vec![
1,
123456789555555555555555555,
1555555555555555555,
])
.with_precision_and_scale(38, 18)
.unwrap();
let a = Decimal128Array::from(vec![1, 123456789555555555555555555, 1555555555555555555])
.with_precision_and_scale(38, 18)
.unwrap();
// [1.555555555555555555, 11.222222222222222222, 0.000000000000000001]
let b = Decimal128Array::from(vec![1555555555555555555, 11222222222222222222, 1])
@@ -311,10 +302,9 @@ mod tests {
));
let result = multiply_fixed_point(&a, &b, 28).unwrap();
let expected =
Decimal128Array::from(vec![62946009661555981610246871926660136960])
.with_precision_and_scale(38, 28)
.unwrap();
let expected = Decimal128Array::from(vec![62946009661555981610246871926660136960])
.with_precision_and_scale(38, 28)
.unwrap();
assert_eq!(&expected, &result);
}
@@ -338,10 +328,9 @@ mod tests {
// Avoid overflow by reducing the scale.
let result = multiply_fixed_point(&a, &b, 28).unwrap();
// [1234567890]
let expected =
Decimal128Array::from(vec![12345678900000000000000000000000000000])
.with_precision_and_scale(38, 28)
.unwrap();
let expected = Decimal128Array::from(vec![12345678900000000000000000000000000000])
.with_precision_and_scale(38, 28)
.unwrap();
assert_eq!(&expected, &result);
assert_eq!(
+6 -16
View File
@@ -49,10 +49,7 @@ where
}
/// See [`PrimitiveArray::try_unary`]
pub fn try_unary<I, F, O>(
array: &PrimitiveArray<I>,
op: F,
) -> Result<PrimitiveArray<O>, ArrowError>
pub fn try_unary<I, F, O>(array: &PrimitiveArray<I>, op: F) -> Result<PrimitiveArray<O>, ArrowError>
where
I: ArrowPrimitiveType,
O: ArrowPrimitiveType,
@@ -86,10 +83,7 @@ where
}
/// A helper function that applies a fallible unary function to a dictionary array with primitive value type.
fn try_unary_dict<K, F, T>(
array: &DictionaryArray<K>,
op: F,
) -> Result<ArrayRef, ArrowError>
fn try_unary_dict<K, F, T>(array: &DictionaryArray<K>, op: F) -> Result<ArrayRef, ArrowError>
where
K: ArrowDictionaryKeyType + ArrowNumericType,
T: ArrowPrimitiveType,
@@ -299,8 +293,7 @@ where
try_binary_no_nulls(len, a, b, op)
} else {
let nulls =
NullBuffer::union(a.logical_nulls().as_ref(), b.logical_nulls().as_ref())
.unwrap();
NullBuffer::union(a.logical_nulls().as_ref(), b.logical_nulls().as_ref()).unwrap();
let mut buffer = BufferBuilder::<O::Native>::new(len);
buffer.append_n_zeroed(len);
@@ -308,8 +301,7 @@ where
nulls.try_for_each_valid_idx(|idx| {
unsafe {
*slice.get_unchecked_mut(idx) =
op(a.value_unchecked(idx), b.value_unchecked(idx))?
*slice.get_unchecked_mut(idx) = op(a.value_unchecked(idx), b.value_unchecked(idx))?
};
Ok::<_, ArrowError>(())
})?;
@@ -360,8 +352,7 @@ where
try_binary_no_nulls_mut(len, a, b, op)
} else {
let nulls =
NullBuffer::union(a.logical_nulls().as_ref(), b.logical_nulls().as_ref())
.unwrap();
NullBuffer::union(a.logical_nulls().as_ref(), b.logical_nulls().as_ref()).unwrap();
let mut builder = a.into_builder()?;
@@ -440,8 +431,7 @@ mod tests {
#[test]
#[allow(deprecated)]
fn test_unary_f64_slice() {
let input =
Float64Array::from(vec![Some(5.1f64), None, Some(6.8), None, Some(7.2)]);
let input = Float64Array::from(vec![Some(5.1f64), None, Some(6.8), None, Some(7.2)]);
let input_slice = input.slice(1, 4);
let result = unary(&input_slice, |n| n.round());
assert_eq!(
+7 -14
View File
@@ -212,10 +212,8 @@ mod tests {
#[test]
fn test_bitwise_shift_left() {
let left = UInt64Array::from(vec![Some(1), Some(2), None, Some(4), Some(8)]);
let right =
UInt64Array::from(vec![Some(5), Some(10), Some(8), Some(12), Some(u64::MAX)]);
let expected =
UInt64Array::from(vec![Some(32), Some(2048), None, Some(16384), Some(0)]);
let right = UInt64Array::from(vec![Some(5), Some(10), Some(8), Some(12), Some(u64::MAX)]);
let expected = UInt64Array::from(vec![Some(32), Some(2048), None, Some(16384), Some(0)]);
let result = bitwise_shift_left(&left, &right).unwrap();
assert_eq!(expected, result);
}
@@ -224,18 +222,15 @@ mod tests {
fn test_bitwise_shift_left_scalar() {
let left = UInt64Array::from(vec![Some(1), Some(2), None, Some(4), Some(8)]);
let scalar = 2;
let expected =
UInt64Array::from(vec![Some(4), Some(8), None, Some(16), Some(32)]);
let expected = UInt64Array::from(vec![Some(4), Some(8), None, Some(16), Some(32)]);
let result = bitwise_shift_left_scalar(&left, scalar).unwrap();
assert_eq!(expected, result);
}
#[test]
fn test_bitwise_shift_right() {
let left =
UInt64Array::from(vec![Some(32), Some(2048), None, Some(16384), Some(3)]);
let right =
UInt64Array::from(vec![Some(5), Some(10), Some(8), Some(12), Some(65)]);
let left = UInt64Array::from(vec![Some(32), Some(2048), None, Some(16384), Some(3)]);
let right = UInt64Array::from(vec![Some(5), Some(10), Some(8), Some(12), Some(65)]);
let expected = UInt64Array::from(vec![Some(1), Some(2), None, Some(4), Some(1)]);
let result = bitwise_shift_right(&left, &right).unwrap();
assert_eq!(expected, result);
@@ -243,11 +238,9 @@ mod tests {
#[test]
fn test_bitwise_shift_right_scalar() {
let left =
UInt64Array::from(vec![Some(32), Some(2048), None, Some(16384), Some(3)]);
let left = UInt64Array::from(vec![Some(32), Some(2048), None, Some(16384), Some(3)]);
let scalar = 2;
let expected =
UInt64Array::from(vec![Some(8), Some(512), None, Some(4096), Some(0)]);
let expected = UInt64Array::from(vec![Some(8), Some(512), None, Some(4096), Some(0)]);
let result = bitwise_shift_right_scalar(&left, scalar).unwrap();
assert_eq!(expected, result);
}
+11 -28
View File
@@ -57,10 +57,7 @@ use arrow_schema::ArrowError;
/// # Fails
///
/// If the operands have different lengths
pub fn and_kleene(
left: &BooleanArray,
right: &BooleanArray,
) -> Result<BooleanArray, ArrowError> {
pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, ArrowError> {
if left.len() != right.len() {
return Err(ArrowError::ComputeError(
"Cannot perform bitwise operation on arrays of different length".to_string(),
@@ -155,10 +152,7 @@ pub fn and_kleene(
/// # Fails
///
/// If the operands have different lengths
pub fn or_kleene(
left: &BooleanArray,
right: &BooleanArray,
) -> Result<BooleanArray, ArrowError> {
pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, ArrowError> {
if left.len() != right.len() {
return Err(ArrowError::ComputeError(
"Cannot perform bitwise operation on arrays of different length".to_string(),
@@ -257,10 +251,7 @@ where
/// let and_ab = and(&a, &b).unwrap();
/// assert_eq!(and_ab, BooleanArray::from(vec![Some(false), Some(true), None]));
/// ```
pub fn and(
left: &BooleanArray,
right: &BooleanArray,
) -> Result<BooleanArray, ArrowError> {
pub fn and(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, ArrowError> {
binary_boolean_kernel(left, right, |a, b| a & b)
}
@@ -581,8 +572,7 @@ mod tests {
let a = a.as_any().downcast_ref::<BooleanArray>().unwrap();
let c = not(a).unwrap();
let expected =
BooleanArray::from(vec![Some(false), Some(true), None, Some(false)]);
let expected = BooleanArray::from(vec![Some(false), Some(true), None, Some(false)]);
assert_eq!(c, expected);
}
@@ -631,12 +621,10 @@ mod tests {
#[test]
fn test_bool_array_and_sliced_same_offset() {
let a = BooleanArray::from(vec![
false, false, false, false, false, false, false, false, false, false, true,
true,
false, false, false, false, false, false, false, false, false, false, true, true,
]);
let b = BooleanArray::from(vec![
false, false, false, false, false, false, false, false, false, true, false,
true,
false, false, false, false, false, false, false, false, false, true, false, true,
]);
let a = a.slice(8, 4);
@@ -654,12 +642,10 @@ mod tests {
#[test]
fn test_bool_array_and_sliced_same_offset_mod8() {
let a = BooleanArray::from(vec![
false, false, true, true, false, false, false, false, false, false, false,
false,
false, false, true, true, false, false, false, false, false, false, false, false,
]);
let b = BooleanArray::from(vec![
false, false, false, false, false, false, false, false, false, true, false,
true,
false, false, false, false, false, false, false, false, false, true, false, true,
]);
let a = a.slice(0, 4);
@@ -677,8 +663,7 @@ mod tests {
#[test]
fn test_bool_array_and_sliced_offset1() {
let a = BooleanArray::from(vec![
false, false, false, false, false, false, false, false, false, false, true,
true,
false, false, false, false, false, false, false, false, false, false, true, true,
]);
let b = BooleanArray::from(vec![false, true, false, true]);
@@ -696,8 +681,7 @@ mod tests {
fn test_bool_array_and_sliced_offset2() {
let a = BooleanArray::from(vec![false, false, true, true]);
let b = BooleanArray::from(vec![
false, false, false, false, false, false, false, false, false, true, false,
true,
false, false, false, false, false, false, false, false, false, true, false, true,
]);
let b = b.slice(8, 4);
@@ -730,8 +714,7 @@ mod tests {
let c = and(a, b).unwrap();
let expected =
BooleanArray::from(vec![Some(false), Some(false), None, Some(true)]);
let expected = BooleanArray::from(vec![Some(false), Some(false), None, Some(true)]);
assert_eq!(expected, c);
}
+15 -20
View File
@@ -144,13 +144,13 @@ pub fn neg(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
let a = array
.as_primitive::<IntervalMonthDayNanoType>()
.try_unary::<_, IntervalMonthDayNanoType, ArrowError>(|x| {
let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(x);
Ok(IntervalMonthDayNanoType::make_value(
months.neg_checked()?,
days.neg_checked()?,
nanos.neg_checked()?,
))
})?;
let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(x);
Ok(IntervalMonthDayNanoType::make_value(
months.neg_checked()?,
days.neg_checked()?,
nanos.neg_checked()?,
))
})?;
Ok(Arc::new(a))
}
t => Err(ArrowError::InvalidArgumentError(format!(
@@ -201,11 +201,7 @@ impl Op {
}
/// Dispatch the given `op` to the appropriate specialized kernel
fn arithmetic_op(
op: Op,
lhs: &dyn Datum,
rhs: &dyn Datum,
) -> Result<ArrayRef, ArrowError> {
fn arithmetic_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result<ArrayRef, ArrowError> {
use DataType::*;
use IntervalUnit::*;
use TimeUnit::*;
@@ -675,8 +671,7 @@ fn date_op<T: DateOp>(
(Date64, Op::Sub | Op::SubWrapping, Date64) => {
let l = l.as_primitive::<Date64Type>();
let r = r.as_primitive::<Date64Type>();
let result =
try_op_ref!(DurationMillisecondType, l, l_s, r, r_s, l.sub_checked(r));
let result = try_op_ref!(DurationMillisecondType, l, l_s, r, r_s, l.sub_checked(r));
return Ok(result);
}
_ => {}
@@ -800,8 +795,7 @@ fn decimal_op<T: DecimalType>(
let mul_pow = result_scale - s1 + s2;
// p1 - s1 + s2 + result_scale
let result_precision =
(mul_pow.saturating_add(*p1 as i8) as u8).min(T::MAX_PRECISION);
let result_precision = (mul_pow.saturating_add(*p1 as i8) as u8).min(T::MAX_PRECISION);
let (l_mul, r_mul) = match mul_pow.cmp(&0) {
Ordering::Greater => (
@@ -1158,7 +1152,10 @@ mod tests {
.with_precision_and_scale(3, -1)
.unwrap();
let err = add(&a, &b).unwrap_err().to_string();
assert_eq!(err, "Compute error: Overflow happened on: 10 * 100000000000000000000000000000000000000");
assert_eq!(
err,
"Compute error: Overflow happened on: 10 * 100000000000000000000000000000000000000"
);
let b = Decimal128Array::from(vec![0])
.with_precision_and_scale(1, 1)
@@ -1199,9 +1196,7 @@ mod tests {
"1960-01-30T04:23:20Z",
]
.into_iter()
.map(|x| {
T::make_value(DateTime::parse_from_rfc3339(x).unwrap().naive_utc()).unwrap()
})
.map(|x| T::make_value(DateTime::parse_from_rfc3339(x).unwrap().naive_utc()).unwrap())
.collect();
let a = PrimitiveArray::<T>::new(values, None);
+33 -75
View File
@@ -23,9 +23,7 @@ use chrono::{DateTime, Datelike, NaiveDateTime, NaiveTime, Offset, Timelike};
use arrow_array::builder::*;
use arrow_array::iterator::ArrayIter;
use arrow_array::temporal_conversions::{
as_datetime, as_datetime_with_timezone, as_time,
};
use arrow_array::temporal_conversions::{as_datetime, as_datetime_with_timezone, as_time};
use arrow_array::timezone::Tz;
use arrow_array::types::*;
use arrow_array::*;
@@ -209,12 +207,9 @@ where
}
DataType::Timestamp(_, Some(tz)) => {
let iter = ArrayIter::new(array);
extract_component_from_datetime_array::<&PrimitiveArray<T>, T, _>(
iter,
b,
tz,
|t| t.hour() as i32,
)
extract_component_from_datetime_array::<&PrimitiveArray<T>, T, _>(iter, b, tz, |t| {
t.hour() as i32
})
}
_ => return_compute_error_with!("hour does not support", array.data_type()),
}
@@ -289,9 +284,7 @@ pub fn num_days_from_monday_dyn(array: &dyn Array) -> Result<ArrayRef, ArrowErro
/// Monday is encoded as `0`, Tuesday as `1`, etc.
///
/// See also [`num_days_from_sunday`] which starts at Sunday.
pub fn num_days_from_monday<T>(
array: &PrimitiveArray<T>,
) -> Result<Int32Array, ArrowError>
pub fn num_days_from_monday<T>(array: &PrimitiveArray<T>) -> Result<Int32Array, ArrowError>
where
T: ArrowTemporalType + ArrowNumericType,
i64: From<T::Native>,
@@ -318,9 +311,7 @@ pub fn num_days_from_sunday_dyn(array: &dyn Array) -> Result<ArrayRef, ArrowErro
/// Sunday is encoded as `0`, Monday as `1`, etc.
///
/// See also [`num_days_from_monday`] which starts at Monday.
pub fn num_days_from_sunday<T>(
array: &PrimitiveArray<T>,
) -> Result<Int32Array, ArrowError>
pub fn num_days_from_sunday<T>(array: &PrimitiveArray<T>) -> Result<Int32Array, ArrowError>
where
T: ArrowTemporalType + ArrowNumericType,
i64: From<T::Native>,
@@ -449,11 +440,7 @@ pub fn millisecond_dyn(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
}
/// Extracts the time fraction of a given temporal array as an array of integers
fn time_fraction_dyn<F>(
array: &dyn Array,
name: &str,
op: F,
) -> Result<ArrayRef, ArrowError>
fn time_fraction_dyn<F>(array: &dyn Array, name: &str, op: F) -> Result<ArrayRef, ArrowError>
where
F: Fn(NaiveDateTime) -> i32,
{
@@ -498,14 +485,9 @@ where
}
DataType::Timestamp(_, Some(tz)) => {
let iter = ArrayIter::new(array);
extract_component_from_datetime_array::<_, T, _>(iter, b, tz, |t| {
op(t.naive_local())
})
extract_component_from_datetime_array::<_, T, _>(iter, b, tz, |t| op(t.naive_local()))
}
_ => return_compute_error_with!(
format!("{name} does not support"),
array.data_type()
),
_ => return_compute_error_with!(format!("{name} does not support"), array.data_type()),
}
}
@@ -559,8 +541,7 @@ mod tests {
#[test]
fn test_temporal_array_time64_micro_hour() {
let a: PrimitiveArray<Time64MicrosecondType> =
vec![37800000000, 86339000000].into();
let a: PrimitiveArray<Time64MicrosecondType> = vec![37800000000, 86339000000].into();
let b = hour(&a).unwrap();
assert_eq!(10, b.value(0));
@@ -623,12 +604,10 @@ mod tests {
#[test]
fn test_temporal_array_timestamp_quarter_with_timezone() {
// 24 * 60 * 60 = 86400
let a = TimestampSecondArray::from(vec![86400 * 90])
.with_timezone("+00:00".to_string());
let a = TimestampSecondArray::from(vec![86400 * 90]).with_timezone("+00:00".to_string());
let b = quarter(&a).unwrap();
assert_eq!(2, b.value(0));
let a = TimestampSecondArray::from(vec![86400 * 90])
.with_timezone("-10:00".to_string());
let a = TimestampSecondArray::from(vec![86400 * 90]).with_timezone("-10:00".to_string());
let b = quarter(&a).unwrap();
assert_eq!(1, b.value(0));
}
@@ -659,12 +638,10 @@ mod tests {
#[test]
fn test_temporal_array_timestamp_month_with_timezone() {
// 24 * 60 * 60 = 86400
let a = TimestampSecondArray::from(vec![86400 * 31])
.with_timezone("+00:00".to_string());
let a = TimestampSecondArray::from(vec![86400 * 31]).with_timezone("+00:00".to_string());
let b = month(&a).unwrap();
assert_eq!(2, b.value(0));
let a = TimestampSecondArray::from(vec![86400 * 31])
.with_timezone("-10:00".to_string());
let a = TimestampSecondArray::from(vec![86400 * 31]).with_timezone("-10:00".to_string());
let b = month(&a).unwrap();
assert_eq!(1, b.value(0));
}
@@ -672,12 +649,10 @@ mod tests {
#[test]
fn test_temporal_array_timestamp_day_with_timezone() {
// 24 * 60 * 60 = 86400
let a =
TimestampSecondArray::from(vec![86400]).with_timezone("+00:00".to_string());
let a = TimestampSecondArray::from(vec![86400]).with_timezone("+00:00".to_string());
let b = day(&a).unwrap();
assert_eq!(2, b.value(0));
let a =
TimestampSecondArray::from(vec![86400]).with_timezone("-10:00".to_string());
let a = TimestampSecondArray::from(vec![86400]).with_timezone("-10:00".to_string());
let b = day(&a).unwrap();
assert_eq!(1, b.value(0));
}
@@ -857,8 +832,7 @@ mod tests {
#[test]
fn test_temporal_array_timestamp_second_with_timezone() {
let a =
TimestampSecondArray::from(vec![10, 20]).with_timezone("+00:00".to_string());
let a = TimestampSecondArray::from(vec![10, 20]).with_timezone("+00:00".to_string());
let b = second(&a).unwrap();
assert_eq!(10, b.value(0));
assert_eq!(20, b.value(1));
@@ -866,8 +840,7 @@ mod tests {
#[test]
fn test_temporal_array_timestamp_minute_with_timezone() {
let a =
TimestampSecondArray::from(vec![0, 60]).with_timezone("+00:50".to_string());
let a = TimestampSecondArray::from(vec![0, 60]).with_timezone("+00:50".to_string());
let b = minute(&a).unwrap();
assert_eq!(50, b.value(0));
assert_eq!(51, b.value(1));
@@ -875,48 +848,42 @@ mod tests {
#[test]
fn test_temporal_array_timestamp_minute_with_negative_timezone() {
let a =
TimestampSecondArray::from(vec![60 * 55]).with_timezone("-00:50".to_string());
let a = TimestampSecondArray::from(vec![60 * 55]).with_timezone("-00:50".to_string());
let b = minute(&a).unwrap();
assert_eq!(5, b.value(0));
}
#[test]
fn test_temporal_array_timestamp_hour_with_timezone() {
let a = TimestampSecondArray::from(vec![60 * 60 * 10])
.with_timezone("+01:00".to_string());
let a = TimestampSecondArray::from(vec![60 * 60 * 10]).with_timezone("+01:00".to_string());
let b = hour(&a).unwrap();
assert_eq!(11, b.value(0));
}
#[test]
fn test_temporal_array_timestamp_hour_with_timezone_without_colon() {
let a = TimestampSecondArray::from(vec![60 * 60 * 10])
.with_timezone("+0100".to_string());
let a = TimestampSecondArray::from(vec![60 * 60 * 10]).with_timezone("+0100".to_string());
let b = hour(&a).unwrap();
assert_eq!(11, b.value(0));
}
#[test]
fn test_temporal_array_timestamp_hour_with_timezone_without_minutes() {
let a = TimestampSecondArray::from(vec![60 * 60 * 10])
.with_timezone("+01".to_string());
let a = TimestampSecondArray::from(vec![60 * 60 * 10]).with_timezone("+01".to_string());
let b = hour(&a).unwrap();
assert_eq!(11, b.value(0));
}
#[test]
fn test_temporal_array_timestamp_hour_with_timezone_without_initial_sign() {
let a = TimestampSecondArray::from(vec![60 * 60 * 10])
.with_timezone("0100".to_string());
let a = TimestampSecondArray::from(vec![60 * 60 * 10]).with_timezone("0100".to_string());
let err = hour(&a).unwrap_err().to_string();
assert!(err.contains("Invalid timezone"), "{}", err);
}
#[test]
fn test_temporal_array_timestamp_hour_with_timezone_with_only_colon() {
let a = TimestampSecondArray::from(vec![60 * 60 * 10])
.with_timezone("01:00".to_string());
let a = TimestampSecondArray::from(vec![60 * 60 * 10]).with_timezone("01:00".to_string());
let err = hour(&a).unwrap_err().to_string();
assert!(err.contains("Invalid timezone"), "{}", err);
}
@@ -960,10 +927,8 @@ mod tests {
let b = hour_dyn(&dict).unwrap();
let expected_dict = DictionaryArray::new(
keys.clone(),
Arc::new(Int32Array::from(vec![11, 21, 7])),
);
let expected_dict =
DictionaryArray::new(keys.clone(), Arc::new(Int32Array::from(vec![11, 21, 7])));
let expected = Arc::new(expected_dict) as ArrayRef;
assert_eq!(&expected, &b);
@@ -987,8 +952,7 @@ mod tests {
assert_eq!(&expected, &b);
assert_eq!(&expected, &b_old);
let b =
time_fraction_dyn(&dict, "nanosecond", |t| t.nanosecond() as i32).unwrap();
let b = time_fraction_dyn(&dict, "nanosecond", |t| t.nanosecond() as i32).unwrap();
let expected_dict =
DictionaryArray::new(keys, Arc::new(Int32Array::from(vec![0, 0, 0, 0, 0])));
@@ -998,8 +962,7 @@ mod tests {
#[test]
fn test_year_dictionary_array() {
let a: PrimitiveArray<Date64Type> =
vec![Some(1514764800000), Some(1550636625000)].into();
let a: PrimitiveArray<Date64Type> = vec![Some(1514764800000), Some(1550636625000)].into();
let keys = Int8Array::from_iter_values([0_i8, 1, 1, 0]);
let dict = DictionaryArray::new(keys.clone(), Arc::new(a));
@@ -1018,24 +981,20 @@ mod tests {
fn test_quarter_month_dictionary_array() {
//1514764800000 -> 2018-01-01
//1566275025000 -> 2019-08-20
let a: PrimitiveArray<Date64Type> =
vec![Some(1514764800000), Some(1566275025000)].into();
let a: PrimitiveArray<Date64Type> = vec![Some(1514764800000), Some(1566275025000)].into();
let keys = Int8Array::from_iter_values([0_i8, 1, 1, 0]);
let dict = DictionaryArray::new(keys.clone(), Arc::new(a));
let b = quarter_dyn(&dict).unwrap();
let expected = DictionaryArray::new(
keys.clone(),
Arc::new(Int32Array::from(vec![1, 3, 3, 1])),
);
let expected =
DictionaryArray::new(keys.clone(), Arc::new(Int32Array::from(vec![1, 3, 3, 1])));
assert_eq!(b.as_ref(), &expected);
let b = month_dyn(&dict).unwrap();
let expected =
DictionaryArray::new(keys, Arc::new(Int32Array::from(vec![1, 8, 8, 1])));
let expected = DictionaryArray::new(keys, Arc::new(Int32Array::from(vec![1, 8, 8, 1])));
assert_eq!(b.as_ref(), &expected);
}
@@ -1043,8 +1002,7 @@ mod tests {
fn test_num_days_from_monday_sunday_day_doy_week_dictionary_array() {
//1514764800000 -> 2018-01-01 (Monday)
//1550636625000 -> 2019-02-20 (Wednesday)
let a: PrimitiveArray<Date64Type> =
vec![Some(1514764800000), Some(1550636625000)].into();
let a: PrimitiveArray<Date64Type> = vec![Some(1514764800000), Some(1550636625000)].into();
let keys = Int8Array::from(vec![Some(0_i8), Some(1), Some(1), Some(0), None]);
let dict = DictionaryArray::new(keys.clone(), Arc::new(a));
+1 -4
View File
@@ -229,10 +229,7 @@ macro_rules! native_type_op {
#[inline]
fn pow_checked(self, exp: u32) -> Result<Self, ArrowError> {
self.checked_pow(exp).ok_or_else(|| {
ArrowError::ComputeError(format!(
"Overflow happened on: {:?} ^ {exp:?}",
self
))
ArrowError::ComputeError(format!("Overflow happened on: {:?} ^ {exp:?}", self))
})
}
+20 -22
View File
@@ -16,9 +16,7 @@
// under the License.
use crate::types::{ByteArrayType, GenericBinaryType};
use crate::{
Array, GenericByteArray, GenericListArray, GenericStringArray, OffsetSizeTrait,
};
use crate::{Array, GenericByteArray, GenericListArray, GenericStringArray, OffsetSizeTrait};
use arrow_data::ArrayData;
use arrow_schema::DataType;
@@ -102,9 +100,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericBinaryArray<OffsetSize> {
}
}
impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<&[u8]>>>
for GenericBinaryArray<OffsetSize>
{
impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<&[u8]>>> for GenericBinaryArray<OffsetSize> {
fn from(v: Vec<Option<&[u8]>>) -> Self {
Self::from_opt_vec(v)
}
@@ -376,9 +372,11 @@ mod tests {
.unwrap();
let binary_array1 = GenericBinaryArray::<O>::from(array_data1);
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
Field::new("item", DataType::UInt8, false),
));
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
"item",
DataType::UInt8,
false,
)));
let array_data2 = ArrayData::builder(data_type)
.len(3)
@@ -423,9 +421,11 @@ mod tests {
let offsets = [0, 5, 8, 15].map(|n| O::from_usize(n).unwrap());
let null_buffer = Buffer::from_slice_ref([0b101]);
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
Field::new("item", DataType::UInt8, false),
));
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
"item",
DataType::UInt8,
false,
)));
// [None, Some(b"Parquet")]
let array_data = ArrayData::builder(data_type)
@@ -456,9 +456,7 @@ mod tests {
_test_generic_binary_array_from_list_array_with_offset::<i64>();
}
fn _test_generic_binary_array_from_list_array_with_child_nulls_failed<
O: OffsetSizeTrait,
>() {
fn _test_generic_binary_array_from_list_array_with_child_nulls_failed<O: OffsetSizeTrait>() {
let values = b"HelloArrow";
let child_data = ArrayData::builder(DataType::UInt8)
.len(10)
@@ -468,9 +466,11 @@ mod tests {
.unwrap();
let offsets = [0, 5, 10].map(|n| O::from_usize(n).unwrap());
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
Field::new("item", DataType::UInt8, true),
));
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
"item",
DataType::UInt8,
true,
)));
// [None, Some(b"Parquet")]
let array_data = ArrayData::builder(data_type)
@@ -558,8 +558,7 @@ mod tests {
.unwrap();
let offsets: [i32; 4] = [0, 5, 5, 12];
let data_type =
DataType::List(Arc::new(Field::new("item", DataType::UInt32, false)));
let data_type = DataType::List(Arc::new(Field::new("item", DataType::UInt32, false)));
let array_data = ArrayData::builder(data_type)
.len(3)
.add_buffer(Buffer::from_slice_ref(offsets))
@@ -575,8 +574,7 @@ mod tests {
expected = "Trying to access an element at index 4 from a BinaryArray of length 3"
)]
fn test_binary_array_get_value_index_out_of_bound() {
let values: [u8; 12] =
[104, 101, 108, 108, 111, 112, 97, 114, 113, 117, 101, 116];
let values: [u8; 12] = [104, 101, 108, 108, 111, 112, 97, 114, 113, 117, 101, 116];
let offsets: [i32; 4] = [0, 5, 5, 12];
let array_data = ArrayData::builder(DataType::Binary)
.len(3)
+3 -10
View File
@@ -238,11 +238,7 @@ impl BooleanArray {
///
/// This function panics if left and right are not the same length
///
pub fn from_binary<T: ArrayAccessor, S: ArrayAccessor, F>(
left: T,
right: S,
mut op: F,
) -> Self
pub fn from_binary<T: ArrayAccessor, S: ArrayAccessor, F>(left: T, right: S, mut op: F) -> Self
where
F: FnMut(T::Item, S::Item) -> bool,
{
@@ -362,8 +358,7 @@ impl From<ArrayData> for BooleanArray {
1,
"BooleanArray data should contain a single buffer only (values buffer)"
);
let values =
BooleanBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
let values = BooleanBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
Self {
values,
@@ -591,9 +586,7 @@ mod tests {
}
#[test]
#[should_panic(
expected = "BooleanArray expected ArrayData with type Boolean got Int32"
)]
#[should_panic(expected = "BooleanArray expected ArrayData with type Boolean got Int32")]
fn test_from_array_data_validation() {
let _ = BooleanArray::from(ArrayData::new_empty(&DataType::Int32));
}
+6 -12
View File
@@ -197,8 +197,7 @@ impl<T: ByteArrayType> GenericByteArray<T> {
let (_, data_len) = iter.size_hint();
let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.
let mut offsets =
MutableBuffer::new((data_len + 1) * std::mem::size_of::<T::Offset>());
let mut offsets = MutableBuffer::new((data_len + 1) * std::mem::size_of::<T::Offset>());
offsets.push(T::Offset::usize_as(0));
let mut values = MutableBuffer::new(0);
@@ -335,8 +334,7 @@ impl<T: ByteArrayType> GenericByteArray<T> {
/// offset and data buffers are not shared by others.
pub fn into_builder(self) -> Result<GenericByteBuilder<T>, Self> {
let len = self.len();
let value_len =
T::Offset::as_usize(self.value_offsets()[len] - self.value_offsets()[0]);
let value_len = T::Offset::as_usize(self.value_offsets()[len] - self.value_offsets()[0]);
let data = self.into_data();
let null_bit_buffer = data.nulls().map(|b| b.inner().sliced());
@@ -578,17 +576,14 @@ mod tests {
let nulls = NullBuffer::new_null(3);
let err =
StringArray::try_new(offsets.clone(), data.clone(), Some(nulls.clone()))
.unwrap_err();
StringArray::try_new(offsets.clone(), data.clone(), Some(nulls.clone())).unwrap_err();
assert_eq!(err.to_string(), "Invalid argument error: Incorrect length of null buffer for StringArray, expected 2 got 3");
let err =
BinaryArray::try_new(offsets.clone(), data.clone(), Some(nulls)).unwrap_err();
let err = BinaryArray::try_new(offsets.clone(), data.clone(), Some(nulls)).unwrap_err();
assert_eq!(err.to_string(), "Invalid argument error: Incorrect length of null buffer for BinaryArray, expected 2 got 3");
let non_utf8_data = Buffer::from_slice_ref(b"he\xFFloworld");
let err = StringArray::try_new(offsets.clone(), non_utf8_data.clone(), None)
.unwrap_err();
let err = StringArray::try_new(offsets.clone(), non_utf8_data.clone(), None).unwrap_err();
assert_eq!(err.to_string(), "Invalid argument error: Encountered non UTF-8 data: invalid utf-8 sequence of 1 bytes from index 2");
BinaryArray::new(offsets, non_utf8_data, None);
@@ -611,8 +606,7 @@ mod tests {
BinaryArray::new(offsets, non_ascii_data.clone(), None);
let offsets = OffsetBuffer::new(vec![0, 3, 10].into());
let err = StringArray::try_new(offsets.clone(), non_ascii_data.clone(), None)
.unwrap_err();
let err = StringArray::try_new(offsets.clone(), non_ascii_data.clone(), None).unwrap_err();
assert_eq!(
err.to_string(),
"Invalid argument error: Split UTF-8 codepoint at offset 3"
+19 -38
View File
@@ -286,10 +286,7 @@ impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
/// # Errors
///
/// Returns an error if any `keys[i] >= values.len() || keys[i] < 0`
pub fn try_new(
keys: PrimitiveArray<K>,
values: ArrayRef,
) -> Result<Self, ArrowError> {
pub fn try_new(keys: PrimitiveArray<K>, values: ArrayRef) -> Result<Self, ArrowError> {
let data_type = DataType::Dictionary(
Box::new(keys.data_type().clone()),
Box::new(values.data_type().clone()),
@@ -298,9 +295,11 @@ impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
let zero = K::Native::usize_as(0);
let values_len = values.len();
if let Some((idx, v)) = keys.values().iter().enumerate().find(|(idx, v)| {
(v.is_lt(zero) || v.as_usize() >= values_len) && keys.is_valid(*idx)
}) {
if let Some((idx, v)) =
keys.values().iter().enumerate().find(|(idx, v)| {
(v.is_lt(zero) || v.as_usize() >= values_len) && keys.is_valid(*idx)
})
{
return Err(ArrowError::InvalidArgumentError(format!(
"Invalid dictionary key {v:?} at index {idx}, expected 0 <= key < {values_len}",
)));
@@ -349,8 +348,7 @@ impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
///
/// Panics if `values` is not a [`StringArray`].
pub fn lookup_key(&self, value: &str) -> Option<K::Native> {
let rd_buf: &StringArray =
self.values.as_any().downcast_ref::<StringArray>().unwrap();
let rd_buf: &StringArray = self.values.as_any().downcast_ref::<StringArray>().unwrap();
(0..rd_buf.len())
.position(|i| rd_buf.value(i) == value)
@@ -463,10 +461,8 @@ impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
///
pub fn with_values(&self, values: ArrayRef) -> Self {
assert!(values.len() >= self.values.len());
let data_type = DataType::Dictionary(
Box::new(K::DATA_TYPE),
Box::new(values.data_type().clone()),
);
let data_type =
DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(values.data_type().clone()));
Self {
data_type,
keys: self.keys.clone(),
@@ -477,9 +473,7 @@ impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
/// Returns `PrimitiveDictionaryBuilder` of this dictionary array for mutating
/// its keys and values if the underlying data buffer is not shared by others.
pub fn into_primitive_dict_builder<V>(
self,
) -> Result<PrimitiveDictionaryBuilder<K, V>, Self>
pub fn into_primitive_dict_builder<V>(self) -> Result<PrimitiveDictionaryBuilder<K, V>, Self>
where
V: ArrowPrimitiveType,
{
@@ -540,8 +534,7 @@ impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
V: ArrowPrimitiveType,
F: Fn(V::Native) -> V::Native,
{
let mut builder: PrimitiveDictionaryBuilder<K, V> =
self.into_primitive_dict_builder()?;
let mut builder: PrimitiveDictionaryBuilder<K, V> = self.into_primitive_dict_builder()?;
builder
.values_slice_mut()
.iter_mut()
@@ -806,9 +799,7 @@ impl<'a, K: ArrowDictionaryKeyType, V> Clone for TypedDictionaryArray<'a, K, V>
impl<'a, K: ArrowDictionaryKeyType, V> Copy for TypedDictionaryArray<'a, K, V> {}
impl<'a, K: ArrowDictionaryKeyType, V> std::fmt::Debug
for TypedDictionaryArray<'a, K, V>
{
impl<'a, K: ArrowDictionaryKeyType, V> std::fmt::Debug for TypedDictionaryArray<'a, K, V> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
writeln!(f, "TypedDictionaryArray({:?})", self.dictionary)
}
@@ -1040,8 +1031,7 @@ mod tests {
// Construct a dictionary array from the above two
let key_type = DataType::Int16;
let value_type = DataType::Int8;
let dict_data_type =
DataType::Dictionary(Box::new(key_type), Box::new(value_type));
let dict_data_type = DataType::Dictionary(Box::new(key_type), Box::new(value_type));
let dict_data = ArrayData::builder(dict_data_type.clone())
.len(3)
.add_buffer(keys.clone())
@@ -1079,8 +1069,7 @@ mod tests {
#[test]
fn test_dictionary_array_fmt_debug() {
let mut builder =
PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(3, 2);
let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(3, 2);
builder.append(12345678).unwrap();
builder.append_null();
builder.append(22345678).unwrap();
@@ -1090,8 +1079,7 @@ mod tests {
format!("{array:?}")
);
let mut builder =
PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(20, 2);
let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(20, 2);
for _ in 0..20 {
builder.append(1).unwrap();
}
@@ -1267,9 +1255,7 @@ mod tests {
}
#[test]
#[should_panic(
expected = "Invalid dictionary key 3 at index 1, expected 0 <= key < 2"
)]
#[should_panic(expected = "Invalid dictionary key 3 at index 1, expected 0 <= key < 2")]
fn test_try_new_index_too_large() {
let values: StringArray = [Some("foo"), Some("bar")].into_iter().collect();
// dictionary only has 2 values, so offset 3 is out of bounds
@@ -1278,9 +1264,7 @@ mod tests {
}
#[test]
#[should_panic(
expected = "Invalid dictionary key -100 at index 0, expected 0 <= key < 2"
)]
#[should_panic(expected = "Invalid dictionary key -100 at index 0, expected 0 <= key < 2")]
fn test_try_new_index_too_small() {
let values: StringArray = [Some("foo"), Some("bar")].into_iter().collect();
let keys: Int32Array = [Some(-100)].into_iter().collect();
@@ -1288,9 +1272,7 @@ mod tests {
}
#[test]
#[should_panic(
expected = "DictionaryArray's data type must match, expected Int64 got Int32"
)]
#[should_panic(expected = "DictionaryArray's data type must match, expected Int64 got Int32")]
fn test_from_array_data_validation() {
let a = DictionaryArray::<Int32Type>::from_iter(["32"]);
let _ = DictionaryArray::<Int64Type>::from(a.into_data());
@@ -1335,8 +1317,7 @@ mod tests {
let boxed: ArrayRef = Arc::new(dict_array);
let col: DictionaryArray<Int8Type> =
DictionaryArray::<Int8Type>::from(boxed.to_data());
let col: DictionaryArray<Int8Type> = DictionaryArray::<Int8Type>::from(boxed.to_data());
let err = col.into_primitive_dict_builder::<Int32Type>();
let returned = err.unwrap_err();
@@ -81,10 +81,7 @@ impl FixedSizeBinaryArray {
) -> Result<Self, ArrowError> {
let data_type = DataType::FixedSizeBinary(size);
let s = size.to_usize().ok_or_else(|| {
ArrowError::InvalidArgumentError(format!(
"Size cannot be negative, got {}",
size
))
ArrowError::InvalidArgumentError(format!("Size cannot be negative, got {}", size))
})?;
let len = values.len() / s;
@@ -333,10 +330,7 @@ impl FixedSizeBinaryArray {
/// # Errors
///
/// Returns error if argument has length zero, or sizes of nested slices don't match.
pub fn try_from_sparse_iter_with_size<T, U>(
mut iter: T,
size: i32,
) -> Result<Self, ArrowError>
pub fn try_from_sparse_iter_with_size<T, U>(mut iter: T, size: i32) -> Result<Self, ArrowError>
where
T: Iterator<Item = Option<U>>,
U: AsRef<[u8]>,
@@ -812,8 +806,7 @@ mod tests {
let none_option: Option<[u8; 32]> = None;
let input_arg = vec![none_option, none_option, none_option];
#[allow(deprecated)]
let arr =
FixedSizeBinaryArray::try_from_sparse_iter(input_arg.into_iter()).unwrap();
let arr = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.into_iter()).unwrap();
assert_eq!(0, arr.value_length());
assert_eq!(3, arr.len())
}
@@ -828,16 +821,12 @@ mod tests {
Some(vec![13, 14]),
];
#[allow(deprecated)]
let arr = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.iter().cloned())
.unwrap();
let arr = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.iter().cloned()).unwrap();
assert_eq!(2, arr.value_length());
assert_eq!(5, arr.len());
let arr = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
input_arg.into_iter(),
2,
)
.unwrap();
let arr =
FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 2).unwrap();
assert_eq!(2, arr.value_length());
assert_eq!(5, arr.len());
}
@@ -846,11 +835,8 @@ mod tests {
fn test_fixed_size_binary_array_from_sparse_iter_with_size_all_none() {
let input_arg = vec![None, None, None, None, None] as Vec<Option<Vec<u8>>>;
let arr = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
input_arg.into_iter(),
16,
)
.unwrap();
let arr = FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 16)
.unwrap();
assert_eq!(16, arr.value_length());
assert_eq!(5, arr.len())
}
@@ -917,8 +903,7 @@ mod tests {
fn fixed_size_binary_array_all_null() {
let data = vec![None] as Vec<Option<String>>;
let array =
FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.into_iter(), 0)
.unwrap();
FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.into_iter(), 0).unwrap();
array
.into_data()
.validate_full()
@@ -928,8 +913,7 @@ mod tests {
#[test]
// Test for https://github.com/apache/arrow-rs/issues/1390
fn fixed_size_binary_array_all_null_in_batch_with_schema() {
let schema =
Schema::new(vec![Field::new("a", DataType::FixedSizeBinary(2), true)]);
let schema = Schema::new(vec![Field::new("a", DataType::FixedSizeBinary(2), true)]);
let none_option: Option<[u8; 2]> = None;
let item = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
+16 -38
View File
@@ -130,12 +130,7 @@ impl FixedSizeListArray {
/// # Panics
///
/// Panics if [`Self::try_new`] returns an error
pub fn new(
field: FieldRef,
size: i32,
values: ArrayRef,
nulls: Option<NullBuffer>,
) -> Self {
pub fn new(field: FieldRef, size: i32, values: ArrayRef, nulls: Option<NullBuffer>) -> Self {
Self::try_new(field, size, values, nulls).unwrap()
}
@@ -154,10 +149,7 @@ impl FixedSizeListArray {
nulls: Option<NullBuffer>,
) -> Result<Self, ArrowError> {
let s = size.to_usize().ok_or_else(|| {
ArrowError::InvalidArgumentError(format!(
"Size cannot be negative, got {}",
size
))
ArrowError::InvalidArgumentError(format!("Size cannot be negative, got {}", size))
})?;
let len = values.len() / s.max(1);
@@ -350,9 +342,8 @@ impl From<ArrayData> for FixedSizeListArray {
};
let size = value_length as usize;
let values = make_array(
data.child_data()[0].slice(data.offset() * size, data.len() * size),
);
let values =
make_array(data.child_data()[0].slice(data.offset() * size, data.len() * size));
Self {
data_type: data.data_type().clone(),
values,
@@ -483,10 +474,8 @@ mod tests {
.unwrap();
// Construct a list array from the above two
let list_data_type = DataType::FixedSizeList(
Arc::new(Field::new("item", DataType::Int32, false)),
3,
);
let list_data_type =
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 3);
let list_data = ArrayData::builder(list_data_type.clone())
.len(3)
.add_child_data(value_data.clone())
@@ -538,10 +527,8 @@ mod tests {
.unwrap();
// Construct a list array from the above two
let list_data_type = DataType::FixedSizeList(
Arc::new(Field::new("item", DataType::Int32, false)),
3,
);
let list_data_type =
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 3);
let list_data = unsafe {
ArrayData::builder(list_data_type)
.len(3)
@@ -569,10 +556,8 @@ mod tests {
bit_util::set_bit(&mut null_bits, 4);
// Construct a fixed size list array from the above two
let list_data_type = DataType::FixedSizeList(
Arc::new(Field::new("item", DataType::Int32, false)),
2,
);
let list_data_type =
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 2);
let list_data = ArrayData::builder(list_data_type)
.len(5)
.add_child_data(value_data.clone())
@@ -611,9 +596,7 @@ mod tests {
}
#[test]
#[should_panic(
expected = "the offset of the new Buffer cannot exceed the existing length"
)]
#[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
fn test_fixed_size_list_array_index_out_of_bound() {
// Construct a value array
let value_data = ArrayData::builder(DataType::Int32)
@@ -631,10 +614,8 @@ mod tests {
bit_util::set_bit(&mut null_bits, 4);
// Construct a fixed size list array from the above two
let list_data_type = DataType::FixedSizeList(
Arc::new(Field::new("item", DataType::Int32, false)),
2,
);
let list_data_type =
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 2);
let list_data = ArrayData::builder(list_data_type)
.len(5)
.add_child_data(value_data)
@@ -668,8 +649,7 @@ mod tests {
let list = FixedSizeListArray::new(field.clone(), 4, values.clone(), None);
assert_eq!(list.len(), 1);
let err = FixedSizeListArray::try_new(field.clone(), -1, values.clone(), None)
.unwrap_err();
let err = FixedSizeListArray::try_new(field.clone(), -1, values.clone(), None).unwrap_err();
assert_eq!(
err.to_string(),
"Invalid argument error: Size cannot be negative, got -1"
@@ -679,13 +659,11 @@ mod tests {
assert_eq!(list.len(), 6);
let nulls = NullBuffer::new_null(2);
let err = FixedSizeListArray::try_new(field, 2, values.clone(), Some(nulls))
.unwrap_err();
let err = FixedSizeListArray::try_new(field, 2, values.clone(), Some(nulls)).unwrap_err();
assert_eq!(err.to_string(), "Invalid argument error: Incorrect length of null buffer for FixedSizeListArray, expected 3 got 2");
let field = Arc::new(Field::new("item", DataType::Int32, false));
let err = FixedSizeListArray::try_new(field.clone(), 2, values.clone(), None)
.unwrap_err();
let err = FixedSizeListArray::try_new(field.clone(), 2, values.clone(), None).unwrap_err();
assert_eq!(err.to_string(), "Invalid argument error: Found unmasked nulls for non-nullable FixedSizeListArray field \"item\"");
// Valid as nulls in child masked by parent
+28 -52
View File
@@ -372,9 +372,8 @@ impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListArray<OffsetSize> {
fn from(data: ArrayData) -> Self {
Self::try_new_from_array_data(data).expect(
"Expected infallible creation of GenericListArray from ArrayDataRef failed",
)
Self::try_new_from_array_data(data)
.expect("Expected infallible creation of GenericListArray from ArrayDataRef failed")
}
}
@@ -391,17 +390,14 @@ impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>> for ArrayDa
}
}
impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray>
for GenericListArray<OffsetSize>
{
impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListArray<OffsetSize> {
fn from(value: FixedSizeListArray) -> Self {
let (field, size) = match value.data_type() {
DataType::FixedSizeList(f, size) => (f, *size as usize),
_ => unreachable!(),
};
let offsets =
OffsetBuffer::from_lengths(std::iter::repeat(size).take(value.len()));
let offsets = OffsetBuffer::from_lengths(std::iter::repeat(size).take(value.len()));
Self {
data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
@@ -415,9 +411,10 @@ impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray>
impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
if data.buffers().len() != 1 {
return Err(ArrowError::InvalidArgumentError(
format!("ListArray data should contain a single buffer only (value offsets), had {}",
data.buffers().len())));
return Err(ArrowError::InvalidArgumentError(format!(
"ListArray data should contain a single buffer only (value offsets), had {}",
data.buffers().len()
)));
}
if data.child_data().len() != 1 {
@@ -593,8 +590,7 @@ mod tests {
let value_offsets = Buffer::from([]);
// Construct a list array from the above two
let list_data_type =
DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
let list_data = ArrayData::builder(list_data_type)
.len(0)
.add_buffer(value_offsets)
@@ -620,8 +616,7 @@ mod tests {
let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
// Construct a list array from the above two
let list_data_type =
DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
let list_data = ArrayData::builder(list_data_type.clone())
.len(3)
.add_buffer(value_offsets.clone())
@@ -807,8 +802,7 @@ mod tests {
bit_util::set_bit(&mut null_bits, 8);
// Construct a list array from the above two
let list_data_type =
DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
let list_data = ArrayData::builder(list_data_type)
.len(9)
.add_buffer(value_offsets)
@@ -839,8 +833,7 @@ mod tests {
}
// Check offset and length for each non-null value.
let sliced_list_array =
sliced_array.as_any().downcast_ref::<ListArray>().unwrap();
let sliced_list_array = sliced_array.as_any().downcast_ref::<ListArray>().unwrap();
assert_eq!(2, sliced_list_array.value_offsets()[2]);
assert_eq!(2, sliced_list_array.value_length(2));
assert_eq!(4, sliced_list_array.value_offsets()[3]);
@@ -951,9 +944,7 @@ mod tests {
list_array.value(10);
}
#[test]
#[should_panic(
expected = "ListArray data should contain a single buffer only (value offsets)"
)]
#[should_panic(expected = "ListArray data should contain a single buffer only (value offsets)")]
// Different error messages, so skip for now
// https://github.com/apache/arrow-rs/issues/1545
#[cfg(not(feature = "force_validate"))]
@@ -964,8 +955,7 @@ mod tests {
.add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
.build_unchecked()
};
let list_data_type =
DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
let list_data = unsafe {
ArrayData::builder(list_data_type)
.len(3)
@@ -976,16 +966,13 @@ mod tests {
}
#[test]
#[should_panic(
expected = "ListArray should contain a single child array (values array)"
)]
#[should_panic(expected = "ListArray should contain a single child array (values array)")]
// Different error messages, so skip for now
// https://github.com/apache/arrow-rs/issues/1545
#[cfg(not(feature = "force_validate"))]
fn test_list_array_invalid_child_array_len() {
let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
let list_data_type =
DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
let list_data = unsafe {
ArrayData::builder(list_data_type)
.len(3)
@@ -996,9 +983,7 @@ mod tests {
}
#[test]
#[should_panic(
expected = "[Large]ListArray's datatype must be [Large]ListArray(). It is List"
)]
#[should_panic(expected = "[Large]ListArray's datatype must be [Large]ListArray(). It is List")]
fn test_from_array_data_validation() {
let mut builder = ListBuilder::new(Int32Builder::new());
builder.values().append_value(1);
@@ -1017,8 +1002,7 @@ mod tests {
let value_offsets = Buffer::from_slice_ref([2, 2, 5, 7]);
let list_data_type =
DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
let list_data = ArrayData::builder(list_data_type)
.len(3)
.add_buffer(value_offsets)
@@ -1033,9 +1017,7 @@ mod tests {
}
#[test]
#[should_panic(
expected = "Memory pointer is not aligned with the specified scalar type"
)]
#[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
// Different error messages, so skip for now
// https://github.com/apache/arrow-rs/issues/1545
#[cfg(not(feature = "force_validate"))]
@@ -1051,9 +1033,7 @@ mod tests {
}
#[test]
#[should_panic(
expected = "Memory pointer is not aligned with the specified scalar type"
)]
#[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
// Different error messages, so skip for now
// https://github.com/apache/arrow-rs/issues/1545
#[cfg(not(feature = "force_validate"))]
@@ -1068,8 +1048,7 @@ mod tests {
.build_unchecked()
};
let list_data_type =
DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
let list_data = unsafe {
ArrayData::builder(list_data_type)
.add_buffer(buf2)
@@ -1187,9 +1166,8 @@ mod tests {
let nulls = NullBuffer::new_null(3);
let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
let err =
LargeListArray::try_new(field, offsets.clone(), values.clone(), Some(nulls))
.unwrap_err();
let err = LargeListArray::try_new(field, offsets.clone(), values.clone(), Some(nulls))
.unwrap_err();
assert_eq!(
err.to_string(),
@@ -1197,9 +1175,8 @@ mod tests {
);
let field = Arc::new(Field::new("element", DataType::Int64, false));
let err =
LargeListArray::try_new(field.clone(), offsets.clone(), values.clone(), None)
.unwrap_err();
let err = LargeListArray::try_new(field.clone(), offsets.clone(), values.clone(), None)
.unwrap_err();
assert_eq!(
err.to_string(),
@@ -1210,8 +1187,8 @@ mod tests {
let values = Int64Array::new(vec![0; 7].into(), Some(nulls));
let values = Arc::new(values);
let err = LargeListArray::try_new(field, offsets.clone(), values.clone(), None)
.unwrap_err();
let err =
LargeListArray::try_new(field, offsets.clone(), values.clone(), None).unwrap_err();
assert_eq!(
err.to_string(),
@@ -1222,8 +1199,7 @@ mod tests {
LargeListArray::new(field.clone(), offsets.clone(), values, None);
let values = Int64Array::new(vec![0; 2].into(), None);
let err =
LargeListArray::try_new(field, offsets, Arc::new(values), None).unwrap_err();
let err = LargeListArray::try_new(field, offsets, Arc::new(values), None).unwrap_err();
assert_eq!(
err.to_string(),
+16 -26
View File
@@ -17,9 +17,7 @@
use crate::array::{get_offsets, print_long_array};
use crate::iterator::MapArrayIter;
use crate::{
make_array, Array, ArrayAccessor, ArrayRef, ListArray, StringArray, StructArray,
};
use crate::{make_array, Array, ArrayAccessor, ArrayRef, ListArray, StringArray, StructArray};
use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, OffsetBuffer, ToByteSlice};
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::{ArrowError, DataType, Field, FieldRef};
@@ -264,9 +262,10 @@ impl MapArray {
}
if data.buffers().len() != 1 {
return Err(ArrowError::InvalidArgumentError(
format!("MapArray data should contain a single buffer only (value offsets), had {}",
data.len())));
return Err(ArrowError::InvalidArgumentError(format!(
"MapArray data should contain a single buffer only (value offsets), had {}",
data.len()
)));
}
if data.child_data().len() != 1 {
@@ -281,9 +280,9 @@ impl MapArray {
if let DataType::Struct(fields) = entries.data_type() {
if fields.len() != 2 {
return Err(ArrowError::InvalidArgumentError(format!(
"MapArray should contain a struct array with 2 fields, have {} fields",
fields.len()
)));
"MapArray should contain a struct array with 2 fields, have {} fields",
fields.len()
)));
}
} else {
return Err(ArrowError::InvalidArgumentError(format!(
@@ -576,8 +575,7 @@ mod tests {
assert_eq!(2, map_array.value_length(1));
let key_array = Arc::new(Int32Array::from(vec![3, 4, 5])) as ArrayRef;
let value_array =
Arc::new(UInt32Array::from(vec![None, Some(40), None])) as ArrayRef;
let value_array = Arc::new(UInt32Array::from(vec![None, Some(40), None])) as ArrayRef;
let struct_array =
StructArray::from(vec![(keys_field, key_array), (values_field, value_array)]);
assert_eq!(
@@ -669,9 +667,7 @@ mod tests {
}
#[test]
#[should_panic(
expected = "MapArray expected ArrayData with DataType::Map got Dictionary"
)]
#[should_panic(expected = "MapArray expected ArrayData with DataType::Map got Dictionary")]
fn test_from_array_data_validation() {
// A DictionaryArray has similar buffer layout to a MapArray
// but the meaning of the values differs
@@ -692,12 +688,9 @@ mod tests {
// [[a, b, c], [d, e, f], [g, h]]
let entry_offsets = [0, 3, 6, 8];
let map_array = MapArray::new_from_strings(
keys.clone().into_iter(),
&values_data,
&entry_offsets,
)
.unwrap();
let map_array =
MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
.unwrap();
assert_eq!(
&values_data,
@@ -768,9 +761,8 @@ mod tests {
"Invalid argument error: Incorrect length of null buffer for MapArray, expected 4 got 3"
);
let err =
MapArray::try_new(field, offsets.clone(), entries.slice(0, 2), None, false)
.unwrap_err();
let err = MapArray::try_new(field, offsets.clone(), entries.slice(0, 2), None, false)
.unwrap_err();
assert_eq!(
err.to_string(),
@@ -783,9 +775,7 @@ mod tests {
.to_string();
assert!(
err.starts_with(
"Invalid argument error: MapArray expected data type Int64 got Struct"
),
err.starts_with("Invalid argument error: MapArray expected data type Int64 got Struct"),
"{err}"
);
+26 -67
View File
@@ -536,9 +536,7 @@ pub fn make_array(data: ArrayData) -> ArrayRef {
DataType::Float64 => Arc::new(Float64Array::from(data)) as ArrayRef,
DataType::Date32 => Arc::new(Date32Array::from(data)) as ArrayRef,
DataType::Date64 => Arc::new(Date64Array::from(data)) as ArrayRef,
DataType::Time32(TimeUnit::Second) => {
Arc::new(Time32SecondArray::from(data)) as ArrayRef
}
DataType::Time32(TimeUnit::Second) => Arc::new(Time32SecondArray::from(data)) as ArrayRef,
DataType::Time32(TimeUnit::Millisecond) => {
Arc::new(Time32MillisecondArray::from(data)) as ArrayRef
}
@@ -583,9 +581,7 @@ pub fn make_array(data: ArrayData) -> ArrayRef {
}
DataType::Binary => Arc::new(BinaryArray::from(data)) as ArrayRef,
DataType::LargeBinary => Arc::new(LargeBinaryArray::from(data)) as ArrayRef,
DataType::FixedSizeBinary(_) => {
Arc::new(FixedSizeBinaryArray::from(data)) as ArrayRef
}
DataType::FixedSizeBinary(_) => Arc::new(FixedSizeBinaryArray::from(data)) as ArrayRef,
DataType::Utf8 => Arc::new(StringArray::from(data)) as ArrayRef,
DataType::LargeUtf8 => Arc::new(LargeStringArray::from(data)) as ArrayRef,
DataType::List(_) => Arc::new(ListArray::from(data)) as ArrayRef,
@@ -593,50 +589,24 @@ pub fn make_array(data: ArrayData) -> ArrayRef {
DataType::Struct(_) => Arc::new(StructArray::from(data)) as ArrayRef,
DataType::Map(_, _) => Arc::new(MapArray::from(data)) as ArrayRef,
DataType::Union(_, _) => Arc::new(UnionArray::from(data)) as ArrayRef,
DataType::FixedSizeList(_, _) => {
Arc::new(FixedSizeListArray::from(data)) as ArrayRef
}
DataType::FixedSizeList(_, _) => Arc::new(FixedSizeListArray::from(data)) as ArrayRef,
DataType::Dictionary(ref key_type, _) => match key_type.as_ref() {
DataType::Int8 => {
Arc::new(DictionaryArray::<Int8Type>::from(data)) as ArrayRef
}
DataType::Int16 => {
Arc::new(DictionaryArray::<Int16Type>::from(data)) as ArrayRef
}
DataType::Int32 => {
Arc::new(DictionaryArray::<Int32Type>::from(data)) as ArrayRef
}
DataType::Int64 => {
Arc::new(DictionaryArray::<Int64Type>::from(data)) as ArrayRef
}
DataType::UInt8 => {
Arc::new(DictionaryArray::<UInt8Type>::from(data)) as ArrayRef
}
DataType::UInt16 => {
Arc::new(DictionaryArray::<UInt16Type>::from(data)) as ArrayRef
}
DataType::UInt32 => {
Arc::new(DictionaryArray::<UInt32Type>::from(data)) as ArrayRef
}
DataType::UInt64 => {
Arc::new(DictionaryArray::<UInt64Type>::from(data)) as ArrayRef
}
DataType::Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)) as ArrayRef,
DataType::Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)) as ArrayRef,
DataType::Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)) as ArrayRef,
DataType::Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)) as ArrayRef,
DataType::UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)) as ArrayRef,
DataType::UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)) as ArrayRef,
DataType::UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)) as ArrayRef,
DataType::UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)) as ArrayRef,
dt => panic!("Unexpected dictionary key type {dt:?}"),
},
DataType::RunEndEncoded(ref run_ends_type, _) => {
match run_ends_type.data_type() {
DataType::Int16 => {
Arc::new(RunArray::<Int16Type>::from(data)) as ArrayRef
}
DataType::Int32 => {
Arc::new(RunArray::<Int32Type>::from(data)) as ArrayRef
}
DataType::Int64 => {
Arc::new(RunArray::<Int64Type>::from(data)) as ArrayRef
}
dt => panic!("Unexpected data type for run_ends array {dt:?}"),
}
}
DataType::RunEndEncoded(ref run_ends_type, _) => match run_ends_type.data_type() {
DataType::Int16 => Arc::new(RunArray::<Int16Type>::from(data)) as ArrayRef,
DataType::Int32 => Arc::new(RunArray::<Int32Type>::from(data)) as ArrayRef,
DataType::Int64 => Arc::new(RunArray::<Int64Type>::from(data)) as ArrayRef,
dt => panic!("Unexpected data type for run_ends array {dt:?}"),
},
DataType::Null => Arc::new(NullArray::from(data)) as ArrayRef,
DataType::Decimal128(_, _) => Arc::new(Decimal128Array::from(data)) as ArrayRef,
DataType::Decimal256(_, _) => Arc::new(Decimal256Array::from(data)) as ArrayRef,
@@ -687,11 +657,8 @@ unsafe fn get_offsets<O: ArrowNativeType>(data: &ArrayData) -> OffsetBuffer<O> {
match data.is_empty() && data.buffers()[0].is_empty() {
true => OffsetBuffer::new_empty(),
false => {
let buffer = ScalarBuffer::new(
data.buffers()[0].clone(),
data.offset(),
data.len() + 1,
);
let buffer =
ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len() + 1);
// Safety:
// ArrayData is valid
unsafe { OffsetBuffer::new_unchecked(buffer) }
@@ -700,11 +667,7 @@ unsafe fn get_offsets<O: ArrowNativeType>(data: &ArrayData) -> OffsetBuffer<O> {
}
/// Helper function for printing potentially long arrays.
fn print_long_array<A, F>(
array: &A,
f: &mut std::fmt::Formatter,
print_item: F,
) -> std::fmt::Result
fn print_long_array<A, F>(array: &A, f: &mut std::fmt::Formatter, print_item: F) -> std::fmt::Result
where
A: Array,
F: Fn(&A, usize, &mut std::fmt::Formatter) -> std::fmt::Result,
@@ -767,8 +730,7 @@ mod tests {
#[test]
fn test_empty_list_primitive() {
let data_type =
DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
let data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
let array = new_empty_array(&data_type);
let a = array.as_any().downcast_ref::<ListArray>().unwrap();
assert_eq!(a.len(), 0);
@@ -799,8 +761,7 @@ mod tests {
fn test_null_struct() {
// It is possible to create a null struct containing a non-nullable child
// see https://github.com/apache/arrow-rs/pull/3244 for details
let struct_type =
DataType::Struct(vec![Field::new("data", DataType::Int64, false)].into());
let struct_type = DataType::Struct(vec![Field::new("data", DataType::Int64, false)].into());
let array = new_null_array(&struct_type, 9);
let a = array.as_any().downcast_ref::<StructArray>().unwrap();
@@ -827,8 +788,7 @@ mod tests {
#[test]
fn test_null_list_primitive() {
let data_type =
DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
let data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
let array = new_null_array(&data_type, 9);
let a = array.as_any().downcast_ref::<ListArray>().unwrap();
assert_eq!(a.len(), 9);
@@ -862,8 +822,8 @@ mod tests {
#[test]
fn test_null_dictionary() {
let values = vec![None, None, None, None, None, None, None, None, None]
as Vec<Option<&str>>;
let values =
vec![None, None, None, None, None, None, None, None, None] as Vec<Option<&str>>;
let array: DictionaryArray<Int8Type> = values.into_iter().collect();
let array = Arc::new(array) as ArrayRef;
@@ -965,8 +925,7 @@ mod tests {
#[test]
fn test_memory_size_primitive() {
let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
let empty =
PrimitiveArray::<Int64Type>::from(ArrayData::new_empty(arr.data_type()));
let empty = PrimitiveArray::<Int64Type>::from(ArrayData::new_empty(arr.data_type()));
// subtract empty array to avoid magic numbers for the size of additional fields
assert_eq!(
+44 -100
View File
@@ -917,8 +917,8 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
let null_bit_buffer = data.nulls().map(|b| b.inner().sliced());
let element_len = std::mem::size_of::<T::Native>();
let buffer = data.buffers()[0]
.slice_with_length(data.offset() * element_len, len * element_len);
let buffer =
data.buffers()[0].slice_with_length(data.offset() * element_len, len * element_len);
drop(data);
@@ -1116,10 +1116,9 @@ impl<T: ArrowPrimitiveType> std::fmt::Debug for PrimitiveArray<T> {
},
// if the time zone is invalid, shows NaiveDateTime with an error message
Err(_) => match as_datetime::<T>(v) {
Some(datetime) => write!(
f,
"{datetime:?} (Unknown Time Zone '{tz_string}')"
),
Some(datetime) => {
write!(f, "{datetime:?} (Unknown Time Zone '{tz_string}')")
}
None => write!(f, "null"),
},
}
@@ -1191,25 +1190,19 @@ def_from_for_primitive!(Float64Type, f64);
def_from_for_primitive!(Decimal128Type, i128);
def_from_for_primitive!(Decimal256Type, i256);
impl<T: ArrowPrimitiveType> From<Option<<T as ArrowPrimitiveType>::Native>>
for NativeAdapter<T>
{
impl<T: ArrowPrimitiveType> From<Option<<T as ArrowPrimitiveType>::Native>> for NativeAdapter<T> {
fn from(value: Option<<T as ArrowPrimitiveType>::Native>) -> Self {
NativeAdapter { native: value }
}
}
impl<T: ArrowPrimitiveType> From<&Option<<T as ArrowPrimitiveType>::Native>>
for NativeAdapter<T>
{
impl<T: ArrowPrimitiveType> From<&Option<<T as ArrowPrimitiveType>::Native>> for NativeAdapter<T> {
fn from(value: &Option<<T as ArrowPrimitiveType>::Native>) -> Self {
NativeAdapter { native: *value }
}
}
impl<T: ArrowPrimitiveType, Ptr: Into<NativeAdapter<T>>> FromIterator<Ptr>
for PrimitiveArray<T>
{
impl<T: ArrowPrimitiveType, Ptr: Into<NativeAdapter<T>>> FromIterator<Ptr> for PrimitiveArray<T> {
fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
let iter = iter.into_iter();
let (lower, _) = iter.size_hint();
@@ -1265,15 +1258,8 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
let (null, buffer) = trusted_len_unzip(iterator);
let data = ArrayData::new_unchecked(
T::DATA_TYPE,
len,
None,
Some(null),
0,
vec![buffer],
vec![],
);
let data =
ArrayData::new_unchecked(T::DATA_TYPE, len, None, Some(null), 0, vec![buffer], vec![]);
PrimitiveArray::from(data)
}
}
@@ -1294,9 +1280,7 @@ macro_rules! def_numeric_from_vec {
}
// Constructs a primitive array from a vector. Should only be used for testing.
impl From<Vec<Option<<$ty as ArrowPrimitiveType>::Native>>>
for PrimitiveArray<$ty>
{
impl From<Vec<Option<<$ty as ArrowPrimitiveType>::Native>>> for PrimitiveArray<$ty> {
fn from(data: Vec<Option<<$ty as ArrowPrimitiveType>::Native>>) -> Self {
PrimitiveArray::from_iter(data.iter())
}
@@ -1392,8 +1376,7 @@ impl<T: ArrowPrimitiveType> From<ArrayData> for PrimitiveArray<T> {
"PrimitiveArray data should contain a single buffer only (values buffer)"
);
let values =
ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
let values = ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
Self {
data_type: data.data_type().clone(),
values,
@@ -1407,11 +1390,7 @@ impl<T: DecimalType + ArrowPrimitiveType> PrimitiveArray<T> {
/// specified precision and scale.
///
/// See [`validate_decimal_precision_and_scale`]
pub fn with_precision_and_scale(
self,
precision: u8,
scale: i8,
) -> Result<Self, ArrowError> {
pub fn with_precision_and_scale(self, precision: u8, scale: i8) -> Result<Self, ArrowError> {
validate_decimal_precision_and_scale::<T>(precision, scale)?;
Ok(Self {
data_type: T::TYPE_CONSTRUCTOR(precision, scale),
@@ -1575,8 +1554,7 @@ mod tests {
// 1: 00:00:00.001
// 37800005: 10:30:00.005
// 86399210: 23:59:59.210
let arr: PrimitiveArray<Time32MillisecondType> =
vec![1, 37_800_005, 86_399_210].into();
let arr: PrimitiveArray<Time32MillisecondType> = vec![1, 37_800_005, 86_399_210].into();
assert_eq!(3, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(0, arr.null_count());
@@ -1858,11 +1836,7 @@ mod tests {
#[test]
fn test_timestamp_fmt_debug() {
let arr: PrimitiveArray<TimestampMillisecondType> =
TimestampMillisecondArray::from(vec![
1546214400000,
1546214400000,
-1546214400000,
]);
TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000]);
assert_eq!(
"PrimitiveArray<Timestamp(Millisecond, None)>\n[\n 2018-12-31T00:00:00,\n 2018-12-31T00:00:00,\n 1921-01-02T00:00:00,\n]",
format!("{arr:?}")
@@ -1872,12 +1846,8 @@ mod tests {
#[test]
fn test_timestamp_utc_fmt_debug() {
let arr: PrimitiveArray<TimestampMillisecondType> =
TimestampMillisecondArray::from(vec![
1546214400000,
1546214400000,
-1546214400000,
])
.with_timezone_utc();
TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000])
.with_timezone_utc();
assert_eq!(
"PrimitiveArray<Timestamp(Millisecond, Some(\"+00:00\"))>\n[\n 2018-12-31T00:00:00+00:00,\n 2018-12-31T00:00:00+00:00,\n 1921-01-02T00:00:00+00:00,\n]",
format!("{arr:?}")
@@ -1888,12 +1858,8 @@ mod tests {
#[cfg(feature = "chrono-tz")]
fn test_timestamp_with_named_tz_fmt_debug() {
let arr: PrimitiveArray<TimestampMillisecondType> =
TimestampMillisecondArray::from(vec![
1546214400000,
1546214400000,
-1546214400000,
])
.with_timezone("Asia/Taipei".to_string());
TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000])
.with_timezone("Asia/Taipei".to_string());
assert_eq!(
"PrimitiveArray<Timestamp(Millisecond, Some(\"Asia/Taipei\"))>\n[\n 2018-12-31T08:00:00+08:00,\n 2018-12-31T08:00:00+08:00,\n 1921-01-02T08:00:00+08:00,\n]",
format!("{:?}", arr)
@@ -1904,12 +1870,8 @@ mod tests {
#[cfg(not(feature = "chrono-tz"))]
fn test_timestamp_with_named_tz_fmt_debug() {
let arr: PrimitiveArray<TimestampMillisecondType> =
TimestampMillisecondArray::from(vec![
1546214400000,
1546214400000,
-1546214400000,
])
.with_timezone("Asia/Taipei".to_string());
TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000])
.with_timezone("Asia/Taipei".to_string());
println!("{arr:?}");
@@ -1922,12 +1884,8 @@ mod tests {
#[test]
fn test_timestamp_with_fixed_offset_tz_fmt_debug() {
let arr: PrimitiveArray<TimestampMillisecondType> =
TimestampMillisecondArray::from(vec![
1546214400000,
1546214400000,
-1546214400000,
])
.with_timezone("+08:00".to_string());
TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000])
.with_timezone("+08:00".to_string());
assert_eq!(
"PrimitiveArray<Timestamp(Millisecond, Some(\"+08:00\"))>\n[\n 2018-12-31T08:00:00+08:00,\n 2018-12-31T08:00:00+08:00,\n 1921-01-02T08:00:00+08:00,\n]",
format!("{arr:?}")
@@ -1937,12 +1895,8 @@ mod tests {
#[test]
fn test_timestamp_with_incorrect_tz_fmt_debug() {
let arr: PrimitiveArray<TimestampMillisecondType> =
TimestampMillisecondArray::from(vec![
1546214400000,
1546214400000,
-1546214400000,
])
.with_timezone("xxx".to_string());
TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000])
.with_timezone("xxx".to_string());
assert_eq!(
"PrimitiveArray<Timestamp(Millisecond, Some(\"xxx\"))>\n[\n 2018-12-31T00:00:00 (Unknown Time Zone 'xxx'),\n 2018-12-31T00:00:00 (Unknown Time Zone 'xxx'),\n 1921-01-02T00:00:00 (Unknown Time Zone 'xxx'),\n]",
format!("{arr:?}")
@@ -1952,14 +1906,13 @@ mod tests {
#[test]
#[cfg(feature = "chrono-tz")]
fn test_timestamp_with_tz_with_daylight_saving_fmt_debug() {
let arr: PrimitiveArray<TimestampMillisecondType> =
TimestampMillisecondArray::from(vec![
1647161999000,
1647162000000,
1667717999000,
1667718000000,
])
.with_timezone("America/Denver".to_string());
let arr: PrimitiveArray<TimestampMillisecondType> = TimestampMillisecondArray::from(vec![
1647161999000,
1647162000000,
1667717999000,
1667718000000,
])
.with_timezone("America/Denver".to_string());
assert_eq!(
"PrimitiveArray<Timestamp(Millisecond, Some(\"America/Denver\"))>\n[\n 2022-03-13T01:59:59-07:00,\n 2022-03-13T03:00:00-06:00,\n 2022-11-06T00:59:59-06:00,\n 2022-11-06T01:00:00-06:00,\n]",
format!("{:?}", arr)
@@ -1997,8 +1950,7 @@ mod tests {
#[test]
fn test_timestamp_micros_out_of_range() {
// replicate the issue from https://github.com/apache/arrow-datafusion/issues/3832
let arr: PrimitiveArray<TimestampMicrosecondType> =
vec![9065525203050843594].into();
let arr: PrimitiveArray<TimestampMicrosecondType> = vec![9065525203050843594].into();
assert_eq!(
"PrimitiveArray<Timestamp(Microsecond, None)>\n[\n null,\n]",
format!("{arr:?}")
@@ -2143,8 +2095,7 @@ mod tests {
#[test]
fn test_decimal256() {
let values: Vec<_> =
vec![i256::ZERO, i256::ONE, i256::MINUS_ONE, i256::MIN, i256::MAX];
let values: Vec<_> = vec![i256::ZERO, i256::ONE, i256::MINUS_ONE, i256::MIN, i256::MAX];
let array: PrimitiveArray<Decimal256Type> =
PrimitiveArray::from_iter(values.iter().copied());
@@ -2166,8 +2117,8 @@ mod tests {
// let val_8887: [u8; 16] = [192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
// let val_neg_8887: [u8; 16] = [64, 36, 75, 238, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255];
let values: [u8; 32] = [
192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 36, 75, 238, 253,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 36, 75, 238, 253, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255,
];
let array_data = ArrayData::builder(DataType::Decimal128(38, 6))
.len(2)
@@ -2232,8 +2183,7 @@ mod tests {
#[test]
fn test_decimal_from_iter() {
let array: Decimal128Array =
vec![Some(-100), None, Some(101)].into_iter().collect();
let array: Decimal128Array = vec![Some(-100), None, Some(101)].into_iter().collect();
assert_eq!(array.len(), 3);
assert_eq!(array.data_type(), &DataType::Decimal128(38, 10));
assert_eq!(-100_i128, array.value(0));
@@ -2343,8 +2293,7 @@ mod tests {
#[test]
fn test_decimal_array_set_null_if_overflow_with_precision() {
let array =
Decimal128Array::from(vec![Some(123456), Some(123), None, Some(123456)]);
let array = Decimal128Array::from(vec![Some(123456), Some(123), None, Some(123456)]);
let result = array.null_if_overflow_precision(5);
let expected = Decimal128Array::from(vec![None, Some(123), None, None]);
assert_eq!(result, expected);
@@ -2361,8 +2310,7 @@ mod tests {
let decimal2 = i256::from_i128(56789);
builder.append_value(decimal2);
let array: Decimal256Array =
builder.finish().with_precision_and_scale(76, 6).unwrap();
let array: Decimal256Array = builder.finish().with_precision_and_scale(76, 6).unwrap();
let collected: Vec<_> = array.iter().collect();
assert_eq!(vec![Some(decimal1), None, Some(decimal2)], collected);
@@ -2387,8 +2335,7 @@ mod tests {
#[test]
fn test_from_iter_decimal128array() {
let mut array: Decimal128Array =
vec![Some(-100), None, Some(101)].into_iter().collect();
let mut array: Decimal128Array = vec![Some(-100), None, Some(101)].into_iter().collect();
array = array.with_precision_and_scale(38, 10).unwrap();
assert_eq!(array.len(), 3);
assert_eq!(array.data_type(), &DataType::Decimal128(38, 10));
@@ -2404,13 +2351,11 @@ mod tests {
let array = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7]);
let r = array.unary_opt::<_, Int32Type>(|x| (x % 2 != 0).then_some(x));
let expected =
Int32Array::from(vec![Some(1), None, Some(3), None, Some(5), None, Some(7)]);
let expected = Int32Array::from(vec![Some(1), None, Some(3), None, Some(5), None, Some(7)]);
assert_eq!(r, expected);
let r = expected.unary_opt::<_, Int32Type>(|x| (x % 3 != 0).then_some(x));
let expected =
Int32Array::from(vec![Some(1), None, None, None, Some(5), None, Some(7)]);
let expected = Int32Array::from(vec![Some(1), None, None, None, Some(5), None, Some(7)]);
assert_eq!(r, expected);
}
@@ -2513,9 +2458,8 @@ mod tests {
Int32Array::new(vec![1, 2, 3, 4].into(), None);
Int32Array::new(vec![1, 2, 3, 4].into(), Some(NullBuffer::new_null(4)));
let err =
Int32Array::try_new(vec![1, 2, 3, 4].into(), Some(NullBuffer::new_null(3)))
.unwrap_err();
let err = Int32Array::try_new(vec![1, 2, 3, 4].into(), Some(NullBuffer::new_null(3)))
.unwrap_err();
assert_eq!(
err.to_string(),
+18 -31
View File
@@ -91,10 +91,7 @@ impl<R: RunEndIndexType> RunArray<R> {
/// Attempts to create RunArray using given run_ends (index where a run ends)
/// and the values (value of the run). Returns an error if the given data is not compatible
/// with RunEndEncoded specification.
pub fn try_new(
run_ends: &PrimitiveArray<R>,
values: &dyn Array,
) -> Result<Self, ArrowError> {
pub fn try_new(run_ends: &PrimitiveArray<R>, values: &dyn Array) -> Result<Self, ArrowError> {
let run_ends_type = run_ends.data_type().clone();
let values_type = values.data_type().clone();
let ree_array_type = DataType::RunEndEncoded(
@@ -182,10 +179,7 @@ impl<R: RunEndIndexType> RunArray<R> {
/// scaled well for larger inputs.
/// See <https://github.com/apache/arrow-rs/pull/3622#issuecomment-1407753727> for more details.
#[inline]
pub fn get_physical_indices<I>(
&self,
logical_indices: &[I],
) -> Result<Vec<usize>, ArrowError>
pub fn get_physical_indices<I>(&self, logical_indices: &[I]) -> Result<Vec<usize>, ArrowError>
where
I: ArrowNativeType,
{
@@ -211,8 +205,7 @@ impl<R: RunEndIndexType> RunArray<R> {
});
// Return early if all the logical indices cannot be converted to physical indices.
let largest_logical_index =
logical_indices[*ordered_indices.last().unwrap()].as_usize();
let largest_logical_index = logical_indices[*ordered_indices.last().unwrap()].as_usize();
if largest_logical_index >= len {
return Err(ArrowError::InvalidArgumentError(format!(
"Cannot convert all logical indices to physical indices. The logical index cannot be converted is {largest_logical_index}.",
@@ -225,8 +218,7 @@ impl<R: RunEndIndexType> RunArray<R> {
let mut physical_indices = vec![0; indices_len];
let mut ordered_index = 0_usize;
for (physical_index, run_end) in
self.run_ends.values().iter().enumerate().skip(skip_value)
for (physical_index, run_end) in self.run_ends.values().iter().enumerate().skip(skip_value)
{
// Get the run end index (relative to offset) of current physical index
let run_end_value = run_end.as_usize() - offset;
@@ -234,8 +226,7 @@ impl<R: RunEndIndexType> RunArray<R> {
// All the `logical_indices` that are less than current run end index
// belongs to current physical index.
while ordered_index < indices_len
&& logical_indices[ordered_indices[ordered_index]].as_usize()
< run_end_value
&& logical_indices[ordered_indices[ordered_index]].as_usize() < run_end_value
{
physical_indices[ordered_indices[ordered_index]] = physical_index;
ordered_index += 1;
@@ -245,8 +236,7 @@ impl<R: RunEndIndexType> RunArray<R> {
// If there are input values >= run_ends.last_value then we'll not be able to convert
// all logical indices to physical indices.
if ordered_index < logical_indices.len() {
let logical_index =
logical_indices[ordered_indices[ordered_index]].as_usize();
let logical_index = logical_indices[ordered_indices[ordered_index]].as_usize();
return Err(ArrowError::InvalidArgumentError(format!(
"Cannot convert all logical indices to physical indices. The logical index cannot be converted is {logical_index}.",
)));
@@ -704,8 +694,7 @@ mod tests {
seed.shuffle(&mut rng);
}
// repeat the items between 1 and 8 times. Cap the length for smaller sized arrays
let num =
max_run_length.min(rand::thread_rng().gen_range(1..=max_run_length));
let num = max_run_length.min(rand::thread_rng().gen_range(1..=max_run_length));
for _ in 0..num {
result.push(seed[ix]);
}
@@ -749,19 +738,16 @@ mod tests {
#[test]
fn test_run_array() {
// Construct a value array
let value_data = PrimitiveArray::<Int8Type>::from_iter_values([
10_i8, 11, 12, 13, 14, 15, 16, 17,
]);
let value_data =
PrimitiveArray::<Int8Type>::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
// Construct a run_ends array:
let run_ends_values = [4_i16, 6, 7, 9, 13, 18, 20, 22];
let run_ends_data = PrimitiveArray::<Int16Type>::from_iter_values(
run_ends_values.iter().copied(),
);
let run_ends_data =
PrimitiveArray::<Int16Type>::from_iter_values(run_ends_values.iter().copied());
// Construct a run ends encoded array from the above two
let ree_array =
RunArray::<Int16Type>::try_new(&run_ends_data, &value_data).unwrap();
let ree_array = RunArray::<Int16Type>::try_new(&run_ends_data, &value_data).unwrap();
assert_eq!(ree_array.len(), 22);
assert_eq!(ree_array.null_count(), 0);
@@ -872,8 +858,7 @@ mod tests {
let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
.into_iter()
.collect();
let run_ends: Int32Array =
[Some(1), Some(2), Some(3), Some(4)].into_iter().collect();
let run_ends: Int32Array = [Some(1), Some(2), Some(3), Some(4)].into_iter().collect();
let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
assert_eq!(array.values().data_type(), &DataType::Utf8);
@@ -924,7 +909,10 @@ mod tests {
let run_ends: Int32Array = [Some(1), None, Some(3)].into_iter().collect();
let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
let expected = ArrowError::InvalidArgumentError("Found null values in run_ends array. The run_ends array should not have null values.".to_string());
let expected = ArrowError::InvalidArgumentError(
"Found null values in run_ends array. The run_ends array should not have null values."
.to_string(),
);
assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
}
@@ -1003,8 +991,7 @@ mod tests {
let mut rng = thread_rng();
logical_indices.shuffle(&mut rng);
let physical_indices =
run_array.get_physical_indices(&logical_indices).unwrap();
let physical_indices = run_array.get_physical_indices(&logical_indices).unwrap();
assert_eq!(logical_indices.len(), physical_indices.len());
+21 -25
View File
@@ -59,9 +59,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericStringArray<OffsetSize> {
/// Fallibly creates a [`GenericStringArray`] from a [`GenericBinaryArray`] returning
/// an error if [`GenericBinaryArray`] contains invalid UTF-8 data
pub fn try_from_binary(
v: GenericBinaryArray<OffsetSize>,
) -> Result<Self, ArrowError> {
pub fn try_from_binary(v: GenericBinaryArray<OffsetSize>) -> Result<Self, ArrowError> {
let (offsets, values, nulls) = v.into_parts();
Self::try_new(offsets, values, nulls)
}
@@ -83,9 +81,7 @@ impl<OffsetSize: OffsetSizeTrait> From<GenericBinaryArray<OffsetSize>>
}
}
impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<&str>>>
for GenericStringArray<OffsetSize>
{
impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<&str>>> for GenericStringArray<OffsetSize> {
fn from(v: Vec<Option<&str>>) -> Self {
v.into_iter().collect()
}
@@ -97,9 +93,7 @@ impl<OffsetSize: OffsetSizeTrait> From<Vec<&str>> for GenericStringArray<OffsetS
}
}
impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<String>>>
for GenericStringArray<OffsetSize>
{
impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<String>>> for GenericStringArray<OffsetSize> {
fn from(v: Vec<Option<String>>) -> Self {
v.into_iter().collect()
}
@@ -438,13 +432,11 @@ mod tests {
let expected: LargeStringArray = data.clone().into_iter().map(Some).collect();
// Iterator reports too many items
let arr =
LargeStringArray::from_iter_values(BadIterator::new(3, 10, data.clone()));
let arr = LargeStringArray::from_iter_values(BadIterator::new(3, 10, data.clone()));
assert_eq!(expected, arr);
// Iterator reports too few items
let arr =
LargeStringArray::from_iter_values(BadIterator::new(3, 1, data.clone()));
let arr = LargeStringArray::from_iter_values(BadIterator::new(3, 1, data.clone()));
assert_eq!(expected, arr);
}
@@ -460,9 +452,11 @@ mod tests {
let offsets = [0, 5, 8, 15].map(|n| O::from_usize(n).unwrap());
let null_buffer = Buffer::from_slice_ref([0b101]);
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
Field::new("item", DataType::UInt8, false),
));
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
"item",
DataType::UInt8,
false,
)));
// [None, Some("Parquet")]
let array_data = ArrayData::builder(data_type)
@@ -493,9 +487,7 @@ mod tests {
_test_generic_string_array_from_list_array::<i64>();
}
fn _test_generic_string_array_from_list_array_with_child_nulls_failed<
O: OffsetSizeTrait,
>() {
fn _test_generic_string_array_from_list_array_with_child_nulls_failed<O: OffsetSizeTrait>() {
let values = b"HelloArrow";
let child_data = ArrayData::builder(DataType::UInt8)
.len(10)
@@ -508,9 +500,11 @@ mod tests {
// It is possible to create a null struct containing a non-nullable child
// see https://github.com/apache/arrow-rs/pull/3244 for details
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
Field::new("item", DataType::UInt8, true),
));
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
"item",
DataType::UInt8,
true,
)));
// [None, Some(b"Parquet")]
let array_data = ArrayData::builder(data_type)
@@ -544,9 +538,11 @@ mod tests {
.unwrap();
let offsets = [0, 2, 3].map(|n| O::from_usize(n).unwrap());
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
Field::new("item", DataType::UInt16, false),
));
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
"item",
DataType::UInt16,
false,
)));
let array_data = ArrayData::builder(data_type)
.len(2)
+10 -20
View File
@@ -462,9 +462,7 @@ impl Index<&str> for StructArray {
mod tests {
use super::*;
use crate::{
BooleanArray, Float32Array, Float64Array, Int32Array, Int64Array, StringArray,
};
use crate::{BooleanArray, Float32Array, Float64Array, Int32Array, Int64Array, StringArray};
use arrow_buffer::ToByteSlice;
use std::sync::Arc;
@@ -540,12 +538,10 @@ mod tests {
None,
Some("mark"),
]));
let ints: ArrayRef =
Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
let ints: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
let arr =
StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
.unwrap();
StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]).unwrap();
let struct_data = arr.into_data();
assert_eq!(4, struct_data.len());
@@ -578,13 +574,11 @@ mod tests {
None,
// 3 elements, not 4
]));
let ints: ArrayRef =
Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
let ints: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
let err =
StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
.unwrap_err()
.to_string();
let err = StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
.unwrap_err()
.to_string();
assert_eq!(
err,
@@ -599,8 +593,7 @@ mod tests {
fn test_struct_array_from_mismatched_types_single() {
drop(StructArray::from(vec![(
Arc::new(Field::new("b", DataType::Int16, false)),
Arc::new(BooleanArray::from(vec![false, false, true, true]))
as Arc<dyn Array>,
Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
)]));
}
@@ -612,8 +605,7 @@ mod tests {
drop(StructArray::from(vec![
(
Arc::new(Field::new("b", DataType::Int16, false)),
Arc::new(BooleanArray::from(vec![false, false, true, true]))
as Arc<dyn Array>,
Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
),
(
Arc::new(Field::new("c", DataType::Utf8, false)),
@@ -733,9 +725,7 @@ mod tests {
}
#[test]
#[should_panic(
expected = "Found unmasked nulls for non-nullable StructArray field \\\"c\\\""
)]
#[should_panic(expected = "Found unmasked nulls for non-nullable StructArray field \\\"c\\\"")]
fn test_struct_array_from_mismatched_nullability() {
drop(StructArray::from(vec![(
Arc::new(Field::new("c", DataType::Int32, false)),
+10 -6
View File
@@ -179,8 +179,7 @@ impl UnionArray {
if let Some(b) = &value_offsets {
if ((type_ids.len()) * 4) != b.len() {
return Err(ArrowError::InvalidArgumentError(
"Type Ids and Offsets represent a different number of array slots."
.to_string(),
"Type Ids and Offsets represent a different number of array slots.".to_string(),
));
}
}
@@ -216,9 +215,8 @@ impl UnionArray {
// Unsafe Justification: arguments were validated above (and
// re-revalidated as part of data().validate() below)
let new_self = unsafe {
Self::new_unchecked(field_type_ids, type_ids, value_offsets, child_arrays)
};
let new_self =
unsafe { Self::new_unchecked(field_type_ids, type_ids, value_offsets, child_arrays) };
new_self.to_data().validate()?;
Ok(new_self)
@@ -1059,7 +1057,13 @@ mod tests {
let mut builder = UnionBuilder::new_sparse();
builder.append::<Float32Type>("a", 1.0).unwrap();
let err = builder.append::<Int32Type>("a", 1).unwrap_err().to_string();
assert!(err.contains("Attempt to write col \"a\" with type Int32 doesn't match existing type Float32"), "{}", err);
assert!(
err.contains(
"Attempt to write col \"a\" with type Int32 doesn't match existing type Float32"
),
"{}",
err
);
}
#[test]
+2 -7
View File
@@ -127,11 +127,7 @@ impl BooleanBuilder {
///
/// Returns an error if the slices are of different lengths
#[inline]
pub fn append_values(
&mut self,
values: &[bool],
is_valid: &[bool],
) -> Result<(), ArrowError> {
pub fn append_values(&mut self, values: &[bool], is_valid: &[bool]) -> Result<(), ArrowError> {
if values.len() != is_valid.len() {
Err(ArrowError::InvalidArgumentError(
"Value and validity lengths must be equal".to_string(),
@@ -250,8 +246,7 @@ mod tests {
#[test]
fn test_boolean_array_builder_append_slice() {
let arr1 =
BooleanArray::from(vec![Some(true), Some(false), None, None, Some(false)]);
let arr1 = BooleanArray::from(vec![Some(true), Some(false), None, None, Some(false)]);
let mut builder = BooleanArray::builder(0);
builder.append_slice(&[true, false]);
+3 -7
View File
@@ -45,11 +45,9 @@ pub type Float32BufferBuilder = BufferBuilder<f32>;
pub type Float64BufferBuilder = BufferBuilder<f64>;
/// Buffer builder for 128-bit decimal type.
pub type Decimal128BufferBuilder =
BufferBuilder<<Decimal128Type as ArrowPrimitiveType>::Native>;
pub type Decimal128BufferBuilder = BufferBuilder<<Decimal128Type as ArrowPrimitiveType>::Native>;
/// Buffer builder for 256-bit decimal type.
pub type Decimal256BufferBuilder =
BufferBuilder<<Decimal256Type as ArrowPrimitiveType>::Native>;
pub type Decimal256BufferBuilder = BufferBuilder<<Decimal256Type as ArrowPrimitiveType>::Native>;
/// Buffer builder for timestamp type of second unit.
pub type TimestampSecondBufferBuilder =
@@ -107,9 +105,7 @@ pub type DurationNanosecondBufferBuilder =
#[cfg(test)]
mod tests {
use crate::builder::{
ArrayBuilder, Int32BufferBuilder, Int8Builder, UInt8BufferBuilder,
};
use crate::builder::{ArrayBuilder, Int32BufferBuilder, Int8Builder, UInt8BufferBuilder};
use crate::Array;
#[test]
@@ -75,7 +75,8 @@ impl FixedSizeBinaryBuilder {
pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<(), ArrowError> {
if self.value_length != value.as_ref().len() as i32 {
Err(ArrowError::InvalidArgumentError(
"Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths".to_string()
"Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths"
.to_string(),
))
} else {
self.values_builder.append_slice(value.as_ref());
@@ -95,11 +96,10 @@ impl FixedSizeBinaryBuilder {
/// Builds the [`FixedSizeBinaryArray`] and reset this builder.
pub fn finish(&mut self) -> FixedSizeBinaryArray {
let array_length = self.len();
let array_data_builder =
ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
.add_buffer(self.values_builder.finish())
.nulls(self.null_buffer_builder.finish())
.len(array_length);
let array_data_builder = ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
.add_buffer(self.values_builder.finish())
.nulls(self.null_buffer_builder.finish())
.len(array_length);
let array_data = unsafe { array_data_builder.build_unchecked() };
FixedSizeBinaryArray::from(array_data)
}
@@ -108,11 +108,10 @@ impl FixedSizeBinaryBuilder {
pub fn finish_cloned(&self) -> FixedSizeBinaryArray {
let array_length = self.len();
let values_buffer = Buffer::from_slice_ref(self.values_builder.as_slice());
let array_data_builder =
ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
.add_buffer(values_buffer)
.nulls(self.null_buffer_builder.finish_cloned())
.len(array_length);
let array_data_builder = ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
.add_buffer(values_buffer)
.nulls(self.null_buffer_builder.finish_cloned())
.len(array_length);
let array_data = unsafe { array_data_builder.build_unchecked() };
FixedSizeBinaryArray::from(array_data)
}
@@ -19,10 +19,7 @@ use crate::types::bytes::ByteArrayNativeType;
use std::{any::Any, sync::Arc};
use crate::{
types::{
BinaryType, ByteArrayType, LargeBinaryType, LargeUtf8Type, RunEndIndexType,
Utf8Type,
},
types::{BinaryType, ByteArrayType, LargeBinaryType, LargeUtf8Type, RunEndIndexType, Utf8Type},
ArrayRef, ArrowPrimitiveType, RunArray,
};
@@ -112,10 +109,7 @@ where
pub fn with_capacity(capacity: usize, data_capacity: usize) -> Self {
Self {
run_ends_builder: PrimitiveBuilder::with_capacity(capacity),
values_builder: GenericByteBuilder::<V>::with_capacity(
capacity,
data_capacity,
),
values_builder: GenericByteBuilder::<V>::with_capacity(capacity, data_capacity),
current_value: Vec::new(),
has_current_value: false,
current_run_end_index: 0,
@@ -282,12 +276,13 @@ where
}
fn run_end_index_as_native(&self) -> R::Native {
R::Native::from_usize(self.current_run_end_index)
.unwrap_or_else(|| panic!(
R::Native::from_usize(self.current_run_end_index).unwrap_or_else(|| {
panic!(
"Cannot convert the value {} from `usize` to native form of arrow datatype {}",
self.current_run_end_index,
R::DATA_TYPE
))
)
})
}
}
@@ -413,8 +408,7 @@ mod tests {
// Values are polymorphic and so require a downcast.
let av = array.values();
let ava: &GenericByteArray<T> =
av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
assert_eq!(*ava.value(0), *values[0]);
assert!(ava.is_null(1));
@@ -459,8 +453,7 @@ mod tests {
// Values are polymorphic and so require a downcast.
let av = array.values();
let ava: &GenericByteArray<T> =
av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
assert_eq!(ava.value(0), values[0]);
assert!(ava.is_null(1));
@@ -68,12 +68,8 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
let value_builder = BufferBuilder::<u8>::new_from_buffer(value_buffer);
let null_buffer_builder = null_buffer
.map(|buffer| {
NullBufferBuilder::new_from_buffer(buffer, offsets_builder.len() - 1)
})
.unwrap_or_else(|| {
NullBufferBuilder::new_with_len(offsets_builder.len() - 1)
});
.map(|buffer| NullBufferBuilder::new_from_buffer(buffer, offsets_builder.len() - 1))
.unwrap_or_else(|| NullBufferBuilder::new_with_len(offsets_builder.len() - 1));
Self {
offsets_builder,
@@ -84,8 +80,7 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
#[inline]
fn next_offset(&self) -> T::Offset {
T::Offset::from_usize(self.value_builder.len())
.expect("byte array offset overflow")
T::Offset::from_usize(self.value_builder.len()).expect("byte array offset overflow")
}
/// Appends a value into the builder.
@@ -16,9 +16,7 @@
// under the License.
use crate::builder::{ArrayBuilder, GenericByteBuilder, PrimitiveBuilder};
use crate::types::{
ArrowDictionaryKeyType, ByteArrayType, GenericBinaryType, GenericStringType,
};
use crate::types::{ArrowDictionaryKeyType, ByteArrayType, GenericBinaryType, GenericStringType};
use crate::{Array, ArrayRef, DictionaryArray, GenericByteArray};
use arrow_buffer::ArrowNativeType;
use arrow_schema::{ArrowError, DataType};
@@ -91,10 +89,7 @@ where
state: Default::default(),
dedup: Default::default(),
keys_builder: PrimitiveBuilder::with_capacity(keys_capacity),
values_builder: GenericByteBuilder::<T>::with_capacity(
value_capacity,
data_capacity,
),
values_builder: GenericByteBuilder::<T>::with_capacity(value_capacity, data_capacity),
}
}
@@ -131,8 +126,7 @@ where
let mut dedup = HashMap::with_capacity_and_hasher(dict_len, ());
let values_len = dictionary_values.value_data().len();
let mut values_builder =
GenericByteBuilder::<T>::with_capacity(dict_len, values_len);
let mut values_builder = GenericByteBuilder::<T>::with_capacity(dict_len, values_len);
K::Native::from_usize(dictionary_values.len())
.ok_or(ArrowError::DictionaryKeyOverflowError)?;
@@ -214,10 +208,7 @@ where
/// value is appended to the values array.
///
/// Returns an error if the new index would overflow the key type.
pub fn append(
&mut self,
value: impl AsRef<T::Native>,
) -> Result<K::Native, ArrowError> {
pub fn append(&mut self, value: impl AsRef<T::Native>) -> Result<K::Native, ArrowError> {
let value_native: &T::Native = value.as_ref();
let value_bytes: &[u8] = value_native.as_ref();
@@ -240,8 +231,7 @@ where
state.hash_one(get_bytes(storage, *idx))
});
K::Native::from_usize(idx)
.ok_or(ArrowError::DictionaryKeyOverflowError)?
K::Native::from_usize(idx).ok_or(ArrowError::DictionaryKeyOverflowError)?
}
};
self.keys_builder.append_value(key);
@@ -283,8 +273,7 @@ where
let values = self.values_builder.finish();
let keys = self.keys_builder.finish();
let data_type =
DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(T::DATA_TYPE));
let data_type = DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(T::DATA_TYPE));
let builder = keys
.into_data()
@@ -300,8 +289,7 @@ where
let values = self.values_builder.finish_cloned();
let keys = self.keys_builder.finish_cloned();
let data_type =
DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(T::DATA_TYPE));
let data_type = DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(T::DATA_TYPE));
let builder = keys
.into_data()
@@ -367,12 +355,10 @@ fn get_bytes<T: ByteArrayType>(values: &GenericByteBuilder<T>, idx: usize) -> &[
/// assert_eq!(ava.value(1), "def");
///
/// ```
pub type StringDictionaryBuilder<K> =
GenericByteDictionaryBuilder<K, GenericStringType<i32>>;
pub type StringDictionaryBuilder<K> = GenericByteDictionaryBuilder<K, GenericStringType<i32>>;
/// Builder for [`DictionaryArray`] of [`LargeStringArray`](crate::array::LargeStringArray)
pub type LargeStringDictionaryBuilder<K> =
GenericByteDictionaryBuilder<K, GenericStringType<i64>>;
pub type LargeStringDictionaryBuilder<K> = GenericByteDictionaryBuilder<K, GenericStringType<i64>>;
/// Builder for [`DictionaryArray`] of [`BinaryArray`](crate::array::BinaryArray)
///
@@ -407,12 +393,10 @@ pub type LargeStringDictionaryBuilder<K> =
/// assert_eq!(ava.value(1), b"def");
///
/// ```
pub type BinaryDictionaryBuilder<K> =
GenericByteDictionaryBuilder<K, GenericBinaryType<i32>>;
pub type BinaryDictionaryBuilder<K> = GenericByteDictionaryBuilder<K, GenericBinaryType<i32>>;
/// Builder for [`DictionaryArray`] of [`LargeBinaryArray`](crate::array::LargeBinaryArray)
pub type LargeBinaryDictionaryBuilder<K> =
GenericByteDictionaryBuilder<K, GenericBinaryType<i64>>;
pub type LargeBinaryDictionaryBuilder<K> = GenericByteDictionaryBuilder<K, GenericBinaryType<i64>>;
#[cfg(test)]
mod tests {
@@ -444,8 +428,7 @@ mod tests {
// Values are polymorphic and so require a downcast.
let av = array.values();
let ava: &GenericByteArray<T> =
av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
assert_eq!(*ava.value(0), *values[0]);
assert_eq!(*ava.value(1), *values[1]);
@@ -483,8 +466,7 @@ mod tests {
// Values are polymorphic and so require a downcast.
let av = array.values();
let ava: &GenericByteArray<T> =
av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
assert_eq!(ava.value(0), values[0]);
assert_eq!(ava.value(1), values[1]);
@@ -542,11 +524,8 @@ mod tests {
<T as ByteArrayType>::Native: AsRef<<T as ByteArrayType>::Native>,
{
let mut builder =
GenericByteDictionaryBuilder::<Int8Type, T>::new_with_dictionary(
6,
&dictionary,
)
.unwrap();
GenericByteDictionaryBuilder::<Int8Type, T>::new_with_dictionary(6, &dictionary)
.unwrap();
builder.append(values[0]).unwrap();
builder.append_null();
builder.append(values[1]).unwrap();
@@ -562,8 +541,7 @@ mod tests {
// Values are polymorphic and so require a downcast.
let av = array.values();
let ava: &GenericByteArray<T> =
av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
assert!(!ava.is_valid(0));
assert_eq!(ava.value(1), values[1]);
@@ -597,11 +575,8 @@ mod tests {
<T as ByteArrayType>::Native: AsRef<<T as ByteArrayType>::Native>,
{
let mut builder =
GenericByteDictionaryBuilder::<Int16Type, T>::new_with_dictionary(
4,
&dictionary,
)
.unwrap();
GenericByteDictionaryBuilder::<Int16Type, T>::new_with_dictionary(4, &dictionary)
.unwrap();
builder.append(values[0]).unwrap();
builder.append_null();
builder.append(values[1]).unwrap();
+3 -10
View File
@@ -86,11 +86,7 @@ impl Default for MapFieldNames {
impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
/// Creates a new `MapBuilder`
pub fn new(
field_names: Option<MapFieldNames>,
key_builder: K,
value_builder: V,
) -> Self {
pub fn new(field_names: Option<MapFieldNames>, key_builder: K, value_builder: V) -> Self {
let capacity = key_builder.len();
Self::with_capacity(field_names, key_builder, value_builder, capacity)
}
@@ -243,12 +239,9 @@ mod tests {
use super::*;
#[test]
#[should_panic(
expected = "Keys array must have no null values, found 1 null value(s)"
)]
#[should_panic(expected = "Keys array must have no null values, found 1 null value(s)")]
fn test_map_builder_with_null_keys_panics() {
let mut builder =
MapBuilder::new(None, StringBuilder::new(), Int32Builder::new());
let mut builder = MapBuilder::new(None, StringBuilder::new(), Int32Builder::new());
builder.keys().append_null();
builder.values().append_value(42);
builder.append(true).unwrap();
+6 -19
View File
@@ -161,9 +161,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
let values_builder = BufferBuilder::<T::Native>::new_from_buffer(values_buffer);
let null_buffer_builder = null_buffer
.map(|buffer| {
NullBufferBuilder::new_from_buffer(buffer, values_builder.len())
})
.map(|buffer| NullBufferBuilder::new_from_buffer(buffer, values_builder.len()))
.unwrap_or_else(|| NullBufferBuilder::new_with_len(values_builder.len()));
Self {
@@ -256,10 +254,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
/// This requires the iterator be a trusted length. This could instead require
/// the iterator implement `TrustedLen` once that is stabilized.
#[inline]
pub unsafe fn append_trusted_len_iter(
&mut self,
iter: impl IntoIterator<Item = T::Native>,
) {
pub unsafe fn append_trusted_len_iter(&mut self, iter: impl IntoIterator<Item = T::Native>) {
let iter = iter.into_iter();
let len = iter
.size_hint()
@@ -328,11 +323,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
impl<P: DecimalType> PrimitiveBuilder<P> {
/// Sets the precision and scale
pub fn with_precision_and_scale(
self,
precision: u8,
scale: i8,
) -> Result<Self, ArrowError> {
pub fn with_precision_and_scale(self, precision: u8, scale: i8) -> Result<Self, ArrowError> {
validate_decimal_precision_and_scale::<P>(precision, scale)?;
Ok(Self {
data_type: P::TYPE_CONSTRUCTOR(precision, scale),
@@ -592,25 +583,21 @@ mod tests {
#[test]
fn test_primitive_array_builder_with_data_type() {
let mut builder =
Decimal128Builder::new().with_data_type(DataType::Decimal128(1, 2));
let mut builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(1, 2));
builder.append_value(1);
let array = builder.finish();
assert_eq!(array.precision(), 1);
assert_eq!(array.scale(), 2);
let data_type = DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".into()));
let mut builder =
TimestampNanosecondBuilder::new().with_data_type(data_type.clone());
let mut builder = TimestampNanosecondBuilder::new().with_data_type(data_type.clone());
builder.append_value(1);
let array = builder.finish();
assert_eq!(array.data_type(), &data_type);
}
#[test]
#[should_panic(
expected = "incompatible data type for builder, expected Int32 got Int64"
)]
#[should_panic(expected = "incompatible data type for builder, expected Int32 got Int64")]
fn test_invalid_with_data_type() {
Int32Builder::new().with_data_type(DataType::Int64);
}
@@ -221,8 +221,7 @@ where
let key = self.values_builder.len();
self.values_builder.append_value(value);
vacant.insert(key);
K::Native::from_usize(key)
.ok_or(ArrowError::DictionaryKeyOverflowError)?
K::Native::from_usize(key).ok_or(ArrowError::DictionaryKeyOverflowError)?
}
Entry::Occupied(o) => K::Native::usize_as(*o.get()),
};
@@ -266,10 +265,8 @@ where
let values = self.values_builder.finish();
let keys = self.keys_builder.finish();
let data_type = DataType::Dictionary(
Box::new(K::DATA_TYPE),
Box::new(values.data_type().clone()),
);
let data_type =
DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(values.data_type().clone()));
let builder = keys
.into_data()
@@ -285,8 +282,7 @@ where
let values = self.values_builder.finish_cloned();
let keys = self.keys_builder.finish_cloned();
let data_type =
DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(V::DATA_TYPE));
let data_type = DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(V::DATA_TYPE));
let builder = keys
.into_data()
@@ -331,8 +327,7 @@ mod tests {
#[test]
fn test_primitive_dictionary_builder() {
let mut builder =
PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(3, 2);
let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(3, 2);
builder.append(12345678).unwrap();
builder.append_null();
builder.append(22345678).unwrap();
@@ -384,8 +379,7 @@ mod tests {
#[test]
fn test_primitive_dictionary_with_builders() {
let keys_builder = PrimitiveBuilder::<Int32Type>::new();
let values_builder =
Decimal128Builder::new().with_data_type(DataType::Decimal128(1, 2));
let values_builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(1, 2));
let mut builder =
PrimitiveDictionaryBuilder::<Int32Type, Decimal128Type>::new_from_empty_builders(
keys_builder,
+9 -26
View File
@@ -106,24 +106,18 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
DataType::Float32 => Box::new(Float32Builder::with_capacity(capacity)),
DataType::Float64 => Box::new(Float64Builder::with_capacity(capacity)),
DataType::Binary => Box::new(BinaryBuilder::with_capacity(capacity, 1024)),
DataType::LargeBinary => {
Box::new(LargeBinaryBuilder::with_capacity(capacity, 1024))
}
DataType::LargeBinary => Box::new(LargeBinaryBuilder::with_capacity(capacity, 1024)),
DataType::FixedSizeBinary(len) => {
Box::new(FixedSizeBinaryBuilder::with_capacity(capacity, *len))
}
DataType::Decimal128(p, s) => Box::new(
Decimal128Builder::with_capacity(capacity)
.with_data_type(DataType::Decimal128(*p, *s)),
Decimal128Builder::with_capacity(capacity).with_data_type(DataType::Decimal128(*p, *s)),
),
DataType::Decimal256(p, s) => Box::new(
Decimal256Builder::with_capacity(capacity)
.with_data_type(DataType::Decimal256(*p, *s)),
Decimal256Builder::with_capacity(capacity).with_data_type(DataType::Decimal256(*p, *s)),
),
DataType::Utf8 => Box::new(StringBuilder::with_capacity(capacity, 1024)),
DataType::LargeUtf8 => {
Box::new(LargeStringBuilder::with_capacity(capacity, 1024))
}
DataType::LargeUtf8 => Box::new(LargeStringBuilder::with_capacity(capacity, 1024)),
DataType::Date32 => Box::new(Date32Builder::with_capacity(capacity)),
DataType::Date64 => Box::new(Date64Builder::with_capacity(capacity)),
DataType::Time32(TimeUnit::Second) => {
@@ -175,19 +169,14 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
DataType::Duration(TimeUnit::Nanosecond) => {
Box::new(DurationNanosecondBuilder::with_capacity(capacity))
}
DataType::Struct(fields) => {
Box::new(StructBuilder::from_fields(fields.clone(), capacity))
}
DataType::Struct(fields) => Box::new(StructBuilder::from_fields(fields.clone(), capacity)),
t => panic!("Data type {t:?} is not currently supported"),
}
}
impl StructBuilder {
/// Creates a new `StructBuilder`
pub fn new(
fields: impl Into<Fields>,
field_builders: Vec<Box<dyn ArrayBuilder>>,
) -> Self {
pub fn new(fields: impl Into<Fields>, field_builders: Vec<Box<dyn ArrayBuilder>>) -> Self {
Self {
field_builders,
fields: fields.into(),
@@ -234,10 +223,7 @@ impl StructBuilder {
pub fn finish(&mut self) -> StructArray {
self.validate_content();
if self.fields.is_empty() {
return StructArray::new_empty_fields(
self.len(),
self.null_buffer_builder.finish(),
);
return StructArray::new_empty_fields(self.len(), self.null_buffer_builder.finish());
}
let arrays = self.field_builders.iter_mut().map(|f| f.finish()).collect();
@@ -524,8 +510,7 @@ mod tests {
expected = "Data type List(Field { name: \"item\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) is not currently supported"
)]
fn test_struct_array_builder_from_schema_unsupported_type() {
let list_type =
DataType::List(Arc::new(Field::new("item", DataType::Int64, true)));
let list_type = DataType::List(Arc::new(Field::new("item", DataType::Int64, true)));
let fields = vec![
Field::new("f1", DataType::Int16, false),
Field::new("f2", list_type, false),
@@ -571,9 +556,7 @@ mod tests {
}
#[test]
#[should_panic(
expected = "Number of fields is not equal to the number of field_builders."
)]
#[should_panic(expected = "Number of fields is not equal to the number of field_builders.")]
fn test_struct_array_builder_unequal_field_field_builders() {
let int_builder = Int32Builder::with_capacity(10);
+7 -6
View File
@@ -65,11 +65,7 @@ impl<T: ArrowNativeType> FieldDataValues for BufferBuilder<T> {
impl FieldData {
/// Creates a new `FieldData`.
fn new<T: ArrowPrimitiveType>(
type_id: i8,
data_type: DataType,
capacity: usize,
) -> Self {
fn new<T: ArrowPrimitiveType>(type_id: i8, data_type: DataType, capacity: usize) -> Self {
Self {
type_id,
data_type,
@@ -222,7 +218,12 @@ impl UnionBuilder {
let mut field_data = match self.fields.remove(&type_name) {
Some(data) => {
if data.data_type != T::DATA_TYPE {
return Err(ArrowError::InvalidArgumentError(format!("Attempt to write col \"{}\" with type {} doesn't match existing type {}", type_name, T::DATA_TYPE, data.data_type)));
return Err(ArrowError::InvalidArgumentError(format!(
"Attempt to write col \"{}\" with type {} doesn't match existing type {}",
type_name,
T::DATA_TYPE,
data.data_type
)));
}
data
}
+6 -17
View File
@@ -578,9 +578,7 @@ macro_rules! downcast_run_array {
/// Force downcast of an [`Array`], such as an [`ArrayRef`] to
/// [`GenericListArray<T>`], panicking on failure.
pub fn as_generic_list_array<S: OffsetSizeTrait>(
arr: &dyn Array,
) -> &GenericListArray<S> {
pub fn as_generic_list_array<S: OffsetSizeTrait>(arr: &dyn Array) -> &GenericListArray<S> {
arr.as_any()
.downcast_ref::<GenericListArray<S>>()
.expect("Unable to downcast to list array")
@@ -612,9 +610,7 @@ pub fn as_large_list_array(arr: &dyn Array) -> &LargeListArray {
/// Force downcast of an [`Array`], such as an [`ArrayRef`] to
/// [`GenericBinaryArray<S>`], panicking on failure.
#[inline]
pub fn as_generic_binary_array<S: OffsetSizeTrait>(
arr: &dyn Array,
) -> &GenericBinaryArray<S> {
pub fn as_generic_binary_array<S: OffsetSizeTrait>(arr: &dyn Array) -> &GenericBinaryArray<S> {
arr.as_any()
.downcast_ref::<GenericBinaryArray<S>>()
.expect("Unable to downcast to binary array")
@@ -826,8 +822,7 @@ pub trait AsArray: private::Sealed {
}
/// Downcast this to a [`DictionaryArray`] returning `None` if not possible
fn as_dictionary_opt<K: ArrowDictionaryKeyType>(&self)
-> Option<&DictionaryArray<K>>;
fn as_dictionary_opt<K: ArrowDictionaryKeyType>(&self) -> Option<&DictionaryArray<K>>;
/// Downcast this to a [`DictionaryArray`] panicking if not possible
fn as_dictionary<K: ArrowDictionaryKeyType>(&self) -> &DictionaryArray<K> {
@@ -877,9 +872,7 @@ impl AsArray for dyn Array + '_ {
self.as_any().downcast_ref()
}
fn as_dictionary_opt<K: ArrowDictionaryKeyType>(
&self,
) -> Option<&DictionaryArray<K>> {
fn as_dictionary_opt<K: ArrowDictionaryKeyType>(&self) -> Option<&DictionaryArray<K>> {
self.as_any().downcast_ref()
}
@@ -926,9 +919,7 @@ impl AsArray for ArrayRef {
self.as_any().downcast_ref()
}
fn as_dictionary_opt<K: ArrowDictionaryKeyType>(
&self,
) -> Option<&DictionaryArray<K>> {
fn as_dictionary_opt<K: ArrowDictionaryKeyType>(&self) -> Option<&DictionaryArray<K>> {
self.as_ref().as_dictionary_opt()
}
@@ -972,9 +963,7 @@ mod tests {
#[test]
fn test_decimal256array() {
let a = Decimal256Array::from_iter_values(
[1, 2, 4, 5].into_iter().map(i256::from_i128),
);
let a = Decimal256Array::from_iter_values([1, 2, 4, 5].into_iter().map(i256::from_i128));
assert!(!as_primitive_array::<Decimal256Type>(&a).is_empty());
}
}
+2 -8
View File
@@ -55,10 +55,7 @@ pub(crate) fn add_months_datetime<Tz: TimeZone>(
/// Add the given number of days to the given datetime.
///
/// Returns `None` when it will result in overflow.
pub(crate) fn add_days_datetime<Tz: TimeZone>(
dt: DateTime<Tz>,
days: i32,
) -> Option<DateTime<Tz>> {
pub(crate) fn add_days_datetime<Tz: TimeZone>(dt: DateTime<Tz>, days: i32) -> Option<DateTime<Tz>> {
match days.cmp(&0) {
Ordering::Equal => Some(dt),
Ordering::Greater => dt.checked_add_days(Days::new(days as u64)),
@@ -83,10 +80,7 @@ pub(crate) fn sub_months_datetime<Tz: TimeZone>(
/// Substract the given number of days to the given datetime.
///
/// Returns `None` when it will result in overflow.
pub(crate) fn sub_days_datetime<Tz: TimeZone>(
dt: DateTime<Tz>,
days: i32,
) -> Option<DateTime<Tz>> {
pub(crate) fn sub_days_datetime<Tz: TimeZone>(dt: DateTime<Tz>, days: i32) -> Option<DateTime<Tz>> {
match days.cmp(&0) {
Ordering::Equal => Some(dt),
Ordering::Greater => dt.checked_sub_days(Days::new(days as u64)),
+4 -6
View File
@@ -18,8 +18,8 @@
//! Idiomatic iterators for [`Array`](crate::Array)
use crate::array::{
ArrayAccessor, BooleanArray, FixedSizeBinaryArray, GenericBinaryArray,
GenericListArray, GenericStringArray, PrimitiveArray,
ArrayAccessor, BooleanArray, FixedSizeBinaryArray, GenericBinaryArray, GenericListArray,
GenericStringArray, PrimitiveArray,
};
use crate::{FixedSizeListArray, MapArray};
use arrow_buffer::NullBuffer;
@@ -187,8 +187,7 @@ mod tests {
#[test]
fn test_string_array_iter_round_trip() {
let array =
StringArray::from(vec![Some("a"), None, Some("aaa"), None, Some("aaaaa")]);
let array = StringArray::from(vec![Some("a"), None, Some("aaa"), None, Some("aaaaa")]);
let array = Arc::new(array) as ArrayRef;
let array = array.as_any().downcast_ref::<StringArray>().unwrap();
@@ -211,8 +210,7 @@ mod tests {
// check if DoubleEndedIterator is implemented
let result: StringArray = array.iter().rev().collect();
let rev_array =
StringArray::from(vec![Some("aaaaa"), None, Some("aaa"), None, Some("a")]);
let rev_array = StringArray::from(vec![Some("aaaaa"), None, Some("aaa"), None, Some("a")]);
assert_eq!(result, rev_array);
// check if ExactSizeIterator is implemented
let _ = array.iter().rposition(|opt_b| opt_b == Some("a"));
+1 -2
View File
@@ -182,8 +182,7 @@ pub use array::*;
mod record_batch;
pub use record_batch::{
RecordBatch, RecordBatchIterator, RecordBatchOptions, RecordBatchReader,
RecordBatchWriter,
RecordBatch, RecordBatchIterator, RecordBatchOptions, RecordBatchReader, RecordBatchWriter,
};
mod arithmetic;
+19 -37
View File
@@ -179,8 +179,8 @@ macro_rules! make_numeric_type {
16 => {
// same general logic as for 8 lanes, extended to 16 bits
let vecidx = i32x16::new(
1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
8192, 16384, 32768,
1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384,
32768,
);
let vecmask = i32x16::splat((mask & 0xFFFF) as i32);
@@ -194,21 +194,19 @@ macro_rules! make_numeric_type {
let tmp = &mut [0_i16; 32];
let vecidx = i32x16::new(
1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
8192, 16384, 32768,
1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384,
32768,
);
let vecmask = i32x16::splat((mask & 0xFFFF) as i32);
let vecmask = (vecidx & vecmask).eq(vecidx);
i16x16::from_cast(vecmask)
.write_to_slice_unaligned(&mut tmp[0..16]);
i16x16::from_cast(vecmask).write_to_slice_unaligned(&mut tmp[0..16]);
let vecmask = i32x16::splat(((mask >> 16) & 0xFFFF) as i32);
let vecmask = (vecidx & vecmask).eq(vecidx);
i16x16::from_cast(vecmask)
.write_to_slice_unaligned(&mut tmp[16..32]);
i16x16::from_cast(vecmask).write_to_slice_unaligned(&mut tmp[16..32]);
unsafe { std::mem::transmute(i16x32::from_slice_unaligned(tmp)) }
}
@@ -218,33 +216,29 @@ macro_rules! make_numeric_type {
let tmp = &mut [0_i8; 64];
let vecidx = i32x16::new(
1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
8192, 16384, 32768,
1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384,
32768,
);
let vecmask = i32x16::splat((mask & 0xFFFF) as i32);
let vecmask = (vecidx & vecmask).eq(vecidx);
i8x16::from_cast(vecmask)
.write_to_slice_unaligned(&mut tmp[0..16]);
i8x16::from_cast(vecmask).write_to_slice_unaligned(&mut tmp[0..16]);
let vecmask = i32x16::splat(((mask >> 16) & 0xFFFF) as i32);
let vecmask = (vecidx & vecmask).eq(vecidx);
i8x16::from_cast(vecmask)
.write_to_slice_unaligned(&mut tmp[16..32]);
i8x16::from_cast(vecmask).write_to_slice_unaligned(&mut tmp[16..32]);
let vecmask = i32x16::splat(((mask >> 32) & 0xFFFF) as i32);
let vecmask = (vecidx & vecmask).eq(vecidx);
i8x16::from_cast(vecmask)
.write_to_slice_unaligned(&mut tmp[32..48]);
i8x16::from_cast(vecmask).write_to_slice_unaligned(&mut tmp[32..48]);
let vecmask = i32x16::splat(((mask >> 48) & 0xFFFF) as i32);
let vecmask = (vecidx & vecmask).eq(vecidx);
i8x16::from_cast(vecmask)
.write_to_slice_unaligned(&mut tmp[48..64]);
i8x16::from_cast(vecmask).write_to_slice_unaligned(&mut tmp[48..64]);
unsafe { std::mem::transmute(i8x64::from_slice_unaligned(tmp)) }
}
@@ -269,11 +263,7 @@ macro_rules! make_numeric_type {
/// Selects elements of `a` and `b` using `mask`
#[inline]
fn mask_select(
mask: Self::SimdMask,
a: Self::Simd,
b: Self::Simd,
) -> Self::Simd {
fn mask_select(mask: Self::SimdMask, a: Self::Simd, b: Self::Simd) -> Self::Simd {
mask.select(a, b)
}
@@ -327,10 +317,7 @@ macro_rules! make_numeric_type {
}
#[inline]
fn unary_op<F: Fn(Self::Simd) -> Self::Simd>(
a: Self::Simd,
op: F,
) -> Self::Simd {
fn unary_op<F: Fn(Self::Simd) -> Self::Simd>(a: Self::Simd, op: F) -> Self::Simd {
op(a)
}
}
@@ -581,8 +568,7 @@ mod tests {
let mask = 0b1101;
let actual = IntervalMonthDayNanoType::mask_from_u64(mask);
let expected = expected_mask!(i128, mask);
let expected =
m128x4::from_cast(i128x4::from_slice_unaligned(expected.as_slice()));
let expected = m128x4::from_cast(i128x4::from_slice_unaligned(expected.as_slice()));
assert_eq!(expected, actual);
}
@@ -612,8 +598,7 @@ mod tests {
let mask = 0b10101010_10101010;
let actual = Float32Type::mask_from_u64(mask);
let expected = expected_mask!(i32, mask);
let expected =
m32x16::from_cast(i32x16::from_slice_unaligned(expected.as_slice()));
let expected = m32x16::from_cast(i32x16::from_slice_unaligned(expected.as_slice()));
assert_eq!(expected, actual);
}
@@ -623,8 +608,7 @@ mod tests {
let mask = 0b01010101_01010101;
let actual = Int32Type::mask_from_u64(mask);
let expected = expected_mask!(i32, mask);
let expected =
m32x16::from_cast(i32x16::from_slice_unaligned(expected.as_slice()));
let expected = m32x16::from_cast(i32x16::from_slice_unaligned(expected.as_slice()));
assert_eq!(expected, actual);
}
@@ -635,16 +619,14 @@ mod tests {
let actual = UInt16Type::mask_from_u64(mask);
let expected = expected_mask!(i16, mask);
dbg!(&expected);
let expected =
m16x32::from_cast(i16x32::from_slice_unaligned(expected.as_slice()));
let expected = m16x32::from_cast(i16x32::from_slice_unaligned(expected.as_slice()));
assert_eq!(expected, actual);
}
#[test]
fn test_mask_i8() {
let mask =
0b01010101_01010101_10101010_10101010_01010101_01010101_10101010_10101010;
let mask = 0b01010101_01010101_10101010_10101010_01010101_01010101_10101010_10101010;
let actual = Int8Type::mask_from_u64(mask);
let expected = expected_mask!(i8, mask);
let expected = m8x64::from_cast(i8x64::from_slice_unaligned(expected.as_slice()));
+26 -44
View File
@@ -107,10 +107,7 @@ impl RecordBatch {
/// vec![Arc::new(id_array)]
/// ).unwrap();
/// ```
pub fn try_new(
schema: SchemaRef,
columns: Vec<ArrayRef>,
) -> Result<Self, ArrowError> {
pub fn try_new(schema: SchemaRef, columns: Vec<ArrayRef>) -> Result<Self, ArrowError> {
let options = RecordBatchOptions::new();
Self::try_new_impl(schema, columns, &options)
}
@@ -179,9 +176,7 @@ impl RecordBatch {
// check that all columns have the same row count
if columns.iter().any(|c| c.len() != row_count) {
let err = match options.row_count {
Some(_) => {
"all columns in a record batch must have the specified row count"
}
Some(_) => "all columns in a record batch must have the specified row count",
None => "all columns in a record batch must have the same length",
};
return Err(ArrowError::InvalidArgumentError(err.to_string()));
@@ -190,9 +185,7 @@ impl RecordBatch {
// function for comparing column type and field type
// return true if 2 types are not matched
let type_not_match = if options.match_field_names {
|(_, (col_type, field_type)): &(usize, (&DataType, &DataType))| {
col_type != field_type
}
|(_, (col_type, field_type)): &(usize, (&DataType, &DataType))| col_type != field_type
} else {
|(_, (col_type, field_type)): &(usize, (&DataType, &DataType))| {
!col_type.equals_datatype(field_type)
@@ -484,7 +477,11 @@ impl From<StructArray> for RecordBatch {
fn from(value: StructArray) -> Self {
let row_count = value.len();
let (fields, columns, nulls) = value.into_parts();
assert_eq!(nulls.map(|n| n.null_count()).unwrap_or_default(), 0, "Cannot convert nullable StructArray to RecordBatch, see StructArray documentation");
assert_eq!(
nulls.map(|n| n.null_count()).unwrap_or_default(),
0,
"Cannot convert nullable StructArray to RecordBatch, see StructArray documentation"
);
RecordBatch {
schema: Arc::new(Schema::new(fields)),
@@ -588,9 +585,7 @@ where
#[cfg(test)]
mod tests {
use super::*;
use crate::{
BooleanArray, Int32Array, Int64Array, Int8Array, ListArray, StringArray,
};
use crate::{BooleanArray, Int32Array, Int64Array, Int8Array, ListArray, StringArray};
use arrow_buffer::{Buffer, ToByteSlice};
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::Fields;
@@ -606,8 +601,7 @@ mod tests {
let b = StringArray::from(vec!["a", "b", "c", "d", "e"]);
let record_batch =
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])
.unwrap();
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]).unwrap();
check_batch(record_batch, 5)
}
@@ -622,8 +616,7 @@ mod tests {
let b = StringArray::from(vec!["a", "b", "c", "d", "e"]);
let record_batch =
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])
.unwrap();
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]).unwrap();
assert_eq!(record_batch.get_array_memory_size(), 364);
}
@@ -649,8 +642,7 @@ mod tests {
let b = StringArray::from(vec!["a", "b", "c", "d", "e", "f", "h", "i"]);
let record_batch =
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])
.unwrap();
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]).unwrap();
let offset = 2;
let length = 5;
@@ -699,8 +691,8 @@ mod tests {
]));
let b: ArrayRef = Arc::new(StringArray::from(vec!["a", "b", "c", "d", "e"]));
let record_batch = RecordBatch::try_from_iter(vec![("a", a), ("b", b)])
.expect("valid conversion");
let record_batch =
RecordBatch::try_from_iter(vec![("a", a), ("b", b)]).expect("valid conversion");
let expected_schema = Schema::new(vec![
Field::new("a", DataType::Int32, true),
@@ -716,11 +708,9 @@ mod tests {
let b: ArrayRef = Arc::new(StringArray::from(vec!["a", "b", "c", "d", "e"]));
// Note there are no nulls in a or b, but we specify that b is nullable
let record_batch = RecordBatch::try_from_iter_with_nullable(vec![
("a", a, false),
("b", b, true),
])
.expect("valid conversion");
let record_batch =
RecordBatch::try_from_iter_with_nullable(vec![("a", a, false), ("b", b, true)])
.expect("valid conversion");
let expected_schema = Schema::new(vec![
Field::new("a", DataType::Int32, false),
@@ -792,8 +782,7 @@ mod tests {
let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
let b = Int32Array::from(vec![1, 2, 3, 4, 5]);
let batch =
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]);
let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]);
assert!(batch.is_err());
}
@@ -863,11 +852,8 @@ mod tests {
Field::new("id", DataType::Int32, false),
Field::new("val", DataType::Int32, false),
]);
let record_batch = RecordBatch::try_new(
Arc::new(schema1),
vec![id_arr.clone(), val_arr.clone()],
)
.unwrap();
let record_batch =
RecordBatch::try_new(Arc::new(schema1), vec![id_arr.clone(), val_arr.clone()]).unwrap();
assert_eq!(record_batch["id"].as_ref(), id_arr.as_ref());
assert_eq!(record_batch["val"].as_ref(), val_arr.as_ref());
@@ -1005,15 +991,12 @@ mod tests {
let b: ArrayRef = Arc::new(StringArray::from(vec!["a", "b", "c"]));
let c: ArrayRef = Arc::new(StringArray::from(vec!["d", "e", "f"]));
let record_batch = RecordBatch::try_from_iter(vec![
("a", a.clone()),
("b", b.clone()),
("c", c.clone()),
])
.expect("valid conversion");
let record_batch =
RecordBatch::try_from_iter(vec![("a", a.clone()), ("b", b.clone()), ("c", c.clone())])
.expect("valid conversion");
let expected = RecordBatch::try_from_iter(vec![("a", a), ("c", c)])
.expect("valid conversion");
let expected =
RecordBatch::try_from_iter(vec![("a", a), ("c", c)]).expect("valid conversion");
assert_eq!(expected, record_batch.project(&[0, 2]).unwrap());
}
@@ -1049,8 +1032,7 @@ mod tests {
let options = RecordBatchOptions::new().with_row_count(Some(10));
let ok =
RecordBatch::try_new_with_options(schema.clone(), vec![], &options).unwrap();
let ok = RecordBatch::try_new_with_options(schema.clone(), vec![], &options).unwrap();
assert_eq!(ok.num_rows(), 10);
let a = ok.slice(2, 5);
+6 -12
View File
@@ -86,8 +86,7 @@ where
// If current logical index is greater than current run end index then increment
// the physical index.
let run_ends = self.array.run_ends().values();
if self.current_front_logical >= run_ends[self.current_front_physical].as_usize()
{
if self.current_front_logical >= run_ends[self.current_front_physical].as_usize() {
// As the run_ends is expected to be strictly increasing, there
// should be at least one logical entry in one physical entry. Because of this
// reason the next value can be accessed by incrementing physical index once.
@@ -136,8 +135,7 @@ where
let run_ends = self.array.run_ends().values();
if self.current_back_physical > 0
&& self.current_back_logical
< run_ends[self.current_back_physical - 1].as_usize()
&& self.current_back_logical < run_ends[self.current_back_physical - 1].as_usize()
{
// As the run_ends is expected to be strictly increasing, there
// should be at least one logical entry in one physical entry. Because of this
@@ -211,8 +209,7 @@ mod tests {
seed.shuffle(&mut rng);
}
// repeat the items between 1 and 8 times. Cap the length for smaller sized arrays
let num =
max_run_length.min(rand::thread_rng().gen_range(1..=max_run_length));
let num = max_run_length.min(rand::thread_rng().gen_range(1..=max_run_length));
for _ in 0..num {
result.push(seed[ix]);
}
@@ -285,8 +282,7 @@ mod tests {
for logical_len in logical_lengths {
let input_array = build_input_array(logical_len);
let mut run_array_builder =
PrimitiveRunBuilder::<Int32Type, Int32Type>::new();
let mut run_array_builder = PrimitiveRunBuilder::<Int32Type, Int32Type>::new();
run_array_builder.extend(input_array.iter().copied());
let run_array = run_array_builder.finish();
let typed_array = run_array.downcast::<Int32Array>().unwrap();
@@ -327,8 +323,7 @@ mod tests {
})
.collect();
let result_asref: Vec<Option<&str>> =
result.iter().map(|f| f.as_deref()).collect();
let result_asref: Vec<Option<&str>> = result.iter().map(|f| f.as_deref()).collect();
let expected_vec = vec![
Some("abb"),
@@ -364,8 +359,7 @@ mod tests {
// Iterate on sliced typed run array
let actual: Vec<Option<i32>> = sliced_typed_run_array.into_iter().collect();
let expected: Vec<Option<i32>> =
input_array.iter().take(slice_len).copied().collect();
let expected: Vec<Option<i32>> = input_array.iter().take(slice_len).copied().collect();
assert_eq!(expected, actual);
// test for offset = total_len - slice_len, length = slice_len
+4 -9
View File
@@ -20,9 +20,7 @@
use crate::timezone::Tz;
use crate::ArrowPrimitiveType;
use arrow_schema::{DataType, TimeUnit};
use chrono::{
DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Timelike, Utc,
};
use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Timelike, Utc};
/// Number of seconds in a day
pub const SECONDS_IN_DAY: i64 = 86_400;
@@ -221,10 +219,7 @@ pub fn as_datetime<T: ArrowPrimitiveType>(v: i64) -> Option<NaiveDateTime> {
}
/// Converts an [`ArrowPrimitiveType`] to [`DateTime<Tz>`]
pub fn as_datetime_with_timezone<T: ArrowPrimitiveType>(
v: i64,
tz: Tz,
) -> Option<DateTime<Tz>> {
pub fn as_datetime_with_timezone<T: ArrowPrimitiveType>(v: i64, tz: Tz) -> Option<DateTime<Tz>> {
let naive = as_datetime::<T>(v)?;
Some(Utc.from_utc_datetime(&naive).with_timezone(&tz))
}
@@ -274,8 +269,8 @@ pub fn as_duration<T: ArrowPrimitiveType>(v: i64) -> Option<Duration> {
#[cfg(test)]
mod tests {
use crate::temporal_conversions::{
date64_to_datetime, split_second, timestamp_ms_to_datetime,
timestamp_ns_to_datetime, timestamp_us_to_datetime, NANOSECONDS,
date64_to_datetime, split_second, timestamp_ms_to_datetime, timestamp_ns_to_datetime,
timestamp_us_to_datetime, NANOSECONDS,
};
use chrono::NaiveDateTime;
+4 -10
View File
@@ -38,8 +38,8 @@ fn parse_fixed_offset(tz: &str) -> Option<FixedOffset> {
if values.iter().any(|x| *x > 9) {
return None;
}
let secs = (values[0] * 10 + values[1]) as i32 * 60 * 60
+ (values[2] * 10 + values[3]) as i32 * 60;
let secs =
(values[0] * 10 + values[1]) as i32 * 60 * 60 + (values[2] * 10 + values[3]) as i32 * 60;
match bytes[0] {
b'+' => FixedOffset::east_opt(secs),
@@ -122,10 +122,7 @@ mod private {
})
}
fn offset_from_local_datetime(
&self,
local: &NaiveDateTime,
) -> LocalResult<Self::Offset> {
fn offset_from_local_datetime(&self, local: &NaiveDateTime) -> LocalResult<Self::Offset> {
tz!(self, tz, {
tz.offset_from_local_datetime(local).map(|x| TzOffset {
tz: *self,
@@ -285,10 +282,7 @@ mod private {
self.0.offset_from_local_date(local).map(TzOffset)
}
fn offset_from_local_datetime(
&self,
local: &NaiveDateTime,
) -> LocalResult<Self::Offset> {
fn offset_from_local_datetime(&self, local: &NaiveDateTime) -> LocalResult<Self::Offset> {
self.0.offset_from_local_datetime(local).map(TzOffset)
}
+8 -24
View File
@@ -18,8 +18,7 @@
//! Zero-sized types used to parameterize generic array implementations
use crate::delta::{
add_days_datetime, add_months_datetime, shift_months, sub_days_datetime,
sub_months_datetime,
add_days_datetime, add_months_datetime, shift_months, sub_days_datetime, sub_months_datetime,
};
use crate::temporal_conversions::as_datetime_with_timezone;
use crate::timezone::Tz;
@@ -27,9 +26,8 @@ use crate::{ArrowNativeTypeOp, OffsetSizeTrait};
use arrow_buffer::{i256, Buffer, OffsetBuffer};
use arrow_data::decimal::{validate_decimal256_precision, validate_decimal_precision};
use arrow_schema::{
ArrowError, DataType, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION,
DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
DECIMAL_DEFAULT_SCALE,
ArrowError, DataType, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DECIMAL_DEFAULT_SCALE,
};
use chrono::{Duration, NaiveDate, NaiveDateTime};
use half::f16;
@@ -875,9 +873,7 @@ impl IntervalDayTimeType {
///
/// * `i` - The IntervalDayTimeType to convert
#[inline]
pub fn to_parts(
i: <IntervalDayTimeType as ArrowPrimitiveType>::Native,
) -> (i32, i32) {
pub fn to_parts(i: <IntervalDayTimeType as ArrowPrimitiveType>::Native) -> (i32, i32) {
let days = (i >> 32) as i32;
let ms = i as i32;
(days, ms)
@@ -1221,10 +1217,7 @@ pub trait DecimalType:
fn format_decimal(value: Self::Native, precision: u8, scale: i8) -> String;
/// Validates that `value` contains no more than `precision` decimal digits
fn validate_decimal_precision(
value: Self::Native,
precision: u8,
) -> Result<(), ArrowError>;
fn validate_decimal_precision(value: Self::Native, precision: u8) -> Result<(), ArrowError>;
}
/// Validate that `precision` and `scale` are valid for `T`
@@ -1400,10 +1393,7 @@ pub trait ByteArrayType: 'static + Send + Sync + bytes::ByteArrayTypeSealed {
const DATA_TYPE: DataType;
/// Verifies that every consecutive pair of `offsets` denotes a valid slice of `values`
fn validate(
offsets: &OffsetBuffer<Self::Offset>,
values: &Buffer,
) -> Result<(), ArrowError>;
fn validate(offsets: &OffsetBuffer<Self::Offset>, values: &Buffer) -> Result<(), ArrowError>;
}
/// [`ByteArrayType`] for string arrays
@@ -1422,10 +1412,7 @@ impl<O: OffsetSizeTrait> ByteArrayType for GenericStringType<O> {
DataType::Utf8
};
fn validate(
offsets: &OffsetBuffer<Self::Offset>,
values: &Buffer,
) -> Result<(), ArrowError> {
fn validate(offsets: &OffsetBuffer<Self::Offset>, values: &Buffer) -> Result<(), ArrowError> {
// Verify that the slice as a whole is valid UTF-8
let validated = std::str::from_utf8(values).map_err(|e| {
ArrowError::InvalidArgumentError(format!("Encountered non UTF-8 data: {e}"))
@@ -1471,10 +1458,7 @@ impl<O: OffsetSizeTrait> ByteArrayType for GenericBinaryType<O> {
DataType::Binary
};
fn validate(
offsets: &OffsetBuffer<Self::Offset>,
values: &Buffer,
) -> Result<(), ArrowError> {
fn validate(offsets: &OffsetBuffer<Self::Offset>, values: &Buffer) -> Result<(), ArrowError> {
// offsets are guaranteed to be monotonically increasing and non-empty
let max_offset = offsets.last().unwrap().as_usize();
if values.len() < max_offset {
+1 -3
View File
@@ -133,9 +133,7 @@ impl HeaderDecoder {
let remaining = &MAGIC[MAGIC.len() - self.bytes_remaining..];
let to_decode = buf.len().min(remaining.len());
if !buf.starts_with(&remaining[..to_decode]) {
return Err(ArrowError::ParseError(
"Incorrect avro magic".to_string(),
));
return Err(ArrowError::ParseError("Incorrect avro magic".to_string()));
}
self.bytes_remaining -= to_decode;
buf = &buf[to_decode..];
+1 -3
View File
@@ -50,9 +50,7 @@ fn read_header<R: BufRead>(mut reader: R) -> Result<Header, ArrowError> {
}
/// Return an iterator of [`Block`] from the provided [`BufRead`]
fn read_blocks<R: BufRead>(
mut reader: R,
) -> impl Iterator<Item = Result<Block, ArrowError>> {
fn read_blocks<R: BufRead>(mut reader: R) -> impl Iterator<Item = Result<Block, ArrowError>> {
let mut decoder = BlockDecoder::default();
let mut try_next = move || {
+1 -3
View File
@@ -335,9 +335,7 @@ mod tests {
Field {
name: "value",
doc: None,
r#type: Schema::TypeName(TypeName::Primitive(
PrimitiveType::Long
)),
r#type: Schema::TypeName(TypeName::Primitive(PrimitiveType::Long)),
default: None,
},
Field {
+3 -13
View File
@@ -26,10 +26,7 @@
/// # Panics
///
/// Panics if divisor is zero
pub fn div_rem<const N: usize>(
numerator: &[u64; N],
divisor: &[u64; N],
) -> ([u64; N], [u64; N]) {
pub fn div_rem<const N: usize>(numerator: &[u64; N], divisor: &[u64; N]) -> ([u64; N], [u64; N]) {
let numerator_bits = bits(numerator);
let divisor_bits = bits(divisor);
assert_ne!(divisor_bits, 0, "division by zero");
@@ -61,10 +58,7 @@ fn bits(arr: &[u64]) -> usize {
}
/// Division of numerator by a u64 divisor
fn div_rem_small<const N: usize>(
numerator: &[u64; N],
divisor: u64,
) -> ([u64; N], [u64; N]) {
fn div_rem_small<const N: usize>(numerator: &[u64; N], divisor: u64) -> ([u64; N], [u64; N]) {
let mut rem = 0u64;
let mut numerator = *numerator;
numerator.iter_mut().rev().for_each(|d| {
@@ -227,11 +221,7 @@ fn sub_assign(a: &mut [u64], b: &[u64]) -> bool {
}
/// Converts an overflowing binary operation on scalars to one on slices
fn binop_slice(
a: &mut [u64],
b: &[u64],
binop: impl Fn(u64, u64) -> (u64, bool) + Copy,
) -> bool {
fn binop_slice(a: &mut [u64], b: &[u64], binop: impl Fn(u64, u64) -> (u64, bool) + Copy) -> bool {
let mut c = false;
a.iter_mut().zip(b.iter()).for_each(|(x, y)| {
let (res1, overflow1) = y.overflowing_add(u64::from(c));
+12 -19
View File
@@ -310,9 +310,7 @@ impl i256 {
(Self::from_le_bytes(bytes), false)
}
Ordering::Equal => (Self::from_le_bytes(v_bytes.try_into().unwrap()), false),
Ordering::Greater => {
(Self::from_le_bytes(v_bytes[..32].try_into().unwrap()), true)
}
Ordering::Greater => (Self::from_le_bytes(v_bytes[..32].try_into().unwrap()), true),
}
}
@@ -357,8 +355,7 @@ impl i256 {
#[inline]
pub fn checked_add(self, other: Self) -> Option<Self> {
let r = self.wrapping_add(other);
((other.is_negative() && r < self) || (!other.is_negative() && r >= self))
.then_some(r)
((other.is_negative() && r < self) || (!other.is_negative() && r >= self)).then_some(r)
}
/// Performs wrapping subtraction
@@ -373,8 +370,7 @@ impl i256 {
#[inline]
pub fn checked_sub(self, other: Self) -> Option<Self> {
let r = self.wrapping_sub(other);
((other.is_negative() && r > self) || (!other.is_negative() && r <= self))
.then_some(r)
((other.is_negative() && r > self) || (!other.is_negative() && r <= self)).then_some(r)
}
/// Performs wrapping multiplication
@@ -591,9 +587,7 @@ impl i256 {
/// Temporary workaround due to lack of stable const array slicing
/// See <https://github.com/rust-lang/rust/issues/90091>
const fn split_array<const N: usize, const M: usize>(
vals: [u8; N],
) -> ([u8; M], [u8; M]) {
const fn split_array<const N: usize, const M: usize>(vals: [u8; N]) -> ([u8; M], [u8; M]) {
let mut a = [0; M];
let mut b = [0; M];
let mut i = 0;
@@ -915,8 +909,7 @@ mod tests {
// Addition
let actual = il.wrapping_add(ir);
let (expected, overflow) =
i256::from_bigint_with_overflow(bl.clone() + br.clone());
let (expected, overflow) = i256::from_bigint_with_overflow(bl.clone() + br.clone());
assert_eq!(actual, expected);
let checked = il.checked_add(ir);
@@ -927,8 +920,7 @@ mod tests {
// Subtraction
let actual = il.wrapping_sub(ir);
let (expected, overflow) =
i256::from_bigint_with_overflow(bl.clone() - br.clone());
let (expected, overflow) = i256::from_bigint_with_overflow(bl.clone() - br.clone());
assert_eq!(actual.to_string(), expected.to_string());
let checked = il.checked_sub(ir);
@@ -939,8 +931,7 @@ mod tests {
// Multiplication
let actual = il.wrapping_mul(ir);
let (expected, overflow) =
i256::from_bigint_with_overflow(bl.clone() * br.clone());
let (expected, overflow) = i256::from_bigint_with_overflow(bl.clone() * br.clone());
assert_eq!(actual.to_string(), expected.to_string());
let checked = il.checked_mul(ir);
@@ -996,8 +987,7 @@ mod tests {
// Exponentiation
for exp in vec![0, 1, 2, 3, 8, 100].into_iter() {
let actual = il.wrapping_pow(exp);
let (expected, overflow) =
i256::from_bigint_with_overflow(bl.clone().pow(exp));
let (expected, overflow) = i256::from_bigint_with_overflow(bl.clone().pow(exp));
assert_eq!(actual.to_string(), expected.to_string());
let checked = il.checked_pow(exp);
@@ -1212,7 +1202,10 @@ mod tests {
("000000000000000000000000000000000000000", Some(i256::ZERO)),
("0000000000000000000000000000000000000000-11", None),
("11-1111111111111111111111111111111111111", None),
("115792089237316195423570985008687907853269984665640564039457584007913129639936", None)
(
"115792089237316195423570985008687907853269984665640564039457584007913129639936",
None,
),
];
for (case, expected) in cases {
assert_eq!(i256::from_string(case), expected)
+4 -23
View File
@@ -223,13 +223,7 @@ impl BitAnd<&BooleanBuffer> for &BooleanBuffer {
fn bitand(self, rhs: &BooleanBuffer) -> Self::Output {
assert_eq!(self.len, rhs.len);
BooleanBuffer {
buffer: buffer_bin_and(
&self.buffer,
self.offset,
&rhs.buffer,
rhs.offset,
self.len,
),
buffer: buffer_bin_and(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
offset: 0,
len: self.len,
}
@@ -242,13 +236,7 @@ impl BitOr<&BooleanBuffer> for &BooleanBuffer {
fn bitor(self, rhs: &BooleanBuffer) -> Self::Output {
assert_eq!(self.len, rhs.len);
BooleanBuffer {
buffer: buffer_bin_or(
&self.buffer,
self.offset,
&rhs.buffer,
rhs.offset,
self.len,
),
buffer: buffer_bin_or(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
offset: 0,
len: self.len,
}
@@ -261,13 +249,7 @@ impl BitXor<&BooleanBuffer> for &BooleanBuffer {
fn bitxor(self, rhs: &BooleanBuffer) -> Self::Output {
assert_eq!(self.len, rhs.len);
BooleanBuffer {
buffer: buffer_bin_xor(
&self.buffer,
self.offset,
&rhs.buffer,
rhs.offset,
self.len,
),
buffer: buffer_bin_xor(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
offset: 0,
len: self.len,
}
@@ -428,8 +410,7 @@ mod tests {
let buf = Buffer::from(&[0, 1, 1, 0, 0]);
let boolean_buf = &BooleanBuffer::new(buf, offset, len);
let expected =
BooleanBuffer::new(Buffer::from(&[255, 254, 254, 255, 255]), offset, len);
let expected = BooleanBuffer::new(Buffer::from(&[255, 254, 254, 255, 255]), offset, len);
assert_eq!(!boolean_buf, expected);
}
}
+2 -6
View File
@@ -523,9 +523,7 @@ mod tests {
}
#[test]
#[should_panic(
expected = "the offset of the new Buffer cannot exceed the existing length"
)]
#[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
fn test_slice_offset_out_of_bound() {
let buf = Buffer::from(&[2, 4, 6, 8, 10]);
buf.slice(6);
@@ -688,9 +686,7 @@ mod tests {
}
#[test]
#[should_panic(
expected = "the offset of the new Buffer cannot exceed the existing length"
)]
#[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
fn slice_overflow() {
let buffer = Buffer::from(MutableBuffer::from_len_zeroed(12));
buffer.slice_with_length(2, usize::MAX);
+3 -8
View File
@@ -334,9 +334,7 @@ impl MutableBuffer {
#[inline]
pub(super) fn into_buffer(self) -> Buffer {
let bytes = unsafe {
Bytes::new(self.data, self.len, Deallocation::Standard(self.layout))
};
let bytes = unsafe { Bytes::new(self.data, self.len, Deallocation::Standard(self.layout)) };
std::mem::forget(self);
Buffer::from_bytes(bytes)
}
@@ -351,8 +349,7 @@ impl MutableBuffer {
// SAFETY
// ArrowNativeType is trivially transmutable, is sealed to prevent potentially incorrect
// implementation outside this crate, and this method checks alignment
let (prefix, offsets, suffix) =
unsafe { self.as_slice_mut().align_to_mut::<T>() };
let (prefix, offsets, suffix) = unsafe { self.as_slice_mut().align_to_mut::<T>() };
assert!(prefix.is_empty() && suffix.is_empty());
offsets
}
@@ -604,9 +601,7 @@ impl MutableBuffer {
// we can't specialize `extend` for `TrustedLen` like `Vec` does.
// 2. `from_trusted_len_iter_bool` is faster.
#[inline]
pub unsafe fn from_trusted_len_iter_bool<I: Iterator<Item = bool>>(
mut iterator: I,
) -> Self {
pub unsafe fn from_trusted_len_iter_bool<I: Iterator<Item = bool>>(mut iterator: I) -> Self {
let (_, upper) = iterator.size_hint();
let len = upper.expect("from_trusted_len_iter requires an upper limit");
+1 -4
View File
@@ -71,10 +71,7 @@ impl NullBuffer {
/// This is commonly used by binary operations where the result is NULL if either
/// of the input values is NULL. Handling the null mask separately in this way
/// can yield significant performance improvements over an iterator approach
pub fn union(
lhs: Option<&NullBuffer>,
rhs: Option<&NullBuffer>,
) -> Option<NullBuffer> {
pub fn union(lhs: Option<&NullBuffer>, rhs: Option<&NullBuffer>) -> Option<NullBuffer> {
match (lhs, rhs) {
(Some(lhs), Some(rhs)) => Some(Self::new(lhs.inner() & rhs.inner())),
(Some(n), None) | (None, Some(n)) => Some(n.clone()),
+1 -2
View File
@@ -219,8 +219,7 @@ mod tests {
assert_eq!(buffer.as_ref(), &[0, 2, 8, 11, 18, 20]);
let half_max = i32::MAX / 2;
let buffer =
OffsetBuffer::<i32>::from_lengths([half_max as usize, half_max as usize]);
let buffer = OffsetBuffer::<i32>::from_lengths([half_max as usize, half_max as usize]);
assert_eq!(buffer.as_ref(), &[0, half_max, half_max * 2]);
}
+1 -5
View File
@@ -184,10 +184,6 @@ pub fn buffer_bin_xor(
/// Apply a bitwise not to one input and return the result as a Buffer.
/// The input is treated as a bitmap, meaning that offset and length are specified in number of bits.
pub fn buffer_unary_not(
left: &Buffer,
offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
pub fn buffer_unary_not(left: &Buffer, offset_in_bits: usize, len_in_bits: usize) -> Buffer {
bitwise_unary_op_helper(left, offset_in_bits, len_in_bits, |a| !a)
}
+1 -5
View File
@@ -110,11 +110,7 @@ where
///
/// - `buffer` must contain strictly increasing values greater than zero
/// - The last value of `buffer` must be greater than or equal to `offset + len`
pub unsafe fn new_unchecked(
run_ends: ScalarBuffer<E>,
offset: usize,
len: usize,
) -> Self {
pub unsafe fn new_unchecked(run_ends: ScalarBuffer<E>, offset: usize, len: usize) -> Self {
Self {
run_ends,
offset,
+3 -9
View File
@@ -221,9 +221,7 @@ mod tests {
}
#[test]
#[should_panic(
expected = "Memory pointer is not aligned with the specified scalar type"
)]
#[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
fn test_unaligned() {
let expected = [0_i32, 1, 2];
let buffer = Buffer::from_iter(expected.iter().cloned());
@@ -232,18 +230,14 @@ mod tests {
}
#[test]
#[should_panic(
expected = "the offset of the new Buffer cannot exceed the existing length"
)]
#[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
fn test_length_out_of_bounds() {
let buffer = Buffer::from_iter([0_i32, 1, 2]);
ScalarBuffer::<i32>::new(buffer, 1, 3);
}
#[test]
#[should_panic(
expected = "the offset of the new Buffer cannot exceed the existing length"
)]
#[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
fn test_offset_out_of_bounds() {
let buffer = Buffer::from_iter([0_i32, 1, 2]);
ScalarBuffer::<i32>::new(buffer, 4, 0);
+2 -4
View File
@@ -154,14 +154,12 @@ impl BooleanBufferBuilder {
if cur_remainder != 0 {
// Pad last byte with 1s
*self.buffer.as_slice_mut().last_mut().unwrap() |=
!((1 << cur_remainder) - 1)
*self.buffer.as_slice_mut().last_mut().unwrap() |= !((1 << cur_remainder) - 1)
}
self.buffer.resize(new_len_bytes, 0xFF);
if new_remainder != 0 {
// Clear remaining bits
*self.buffer.as_slice_mut().last_mut().unwrap() &=
(1 << new_remainder) - 1
*self.buffer.as_slice_mut().last_mut().unwrap() &= (1 << new_remainder) - 1
}
self.len = new_len;
}
+1 -5
View File
@@ -60,11 +60,7 @@ impl Bytes {
/// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
/// bytes. If the `ptr` and `capacity` come from a `Buffer`, then this is guaranteed.
#[inline]
pub(crate) unsafe fn new(
ptr: NonNull<u8>,
len: usize,
deallocation: Deallocation,
) -> Bytes {
pub(crate) unsafe fn new(ptr: NonNull<u8>, len: usize, deallocation: Deallocation) -> Bytes {
Bytes {
ptr,
len,
+13 -22
View File
@@ -60,8 +60,7 @@ impl<'a> UnalignedBitChunk<'a> {
// If less than 8 bytes, read into prefix
if buffer.len() <= 8 {
let (suffix_mask, trailing_padding) =
compute_suffix_mask(len, offset_padding);
let (suffix_mask, trailing_padding) = compute_suffix_mask(len, offset_padding);
let prefix = read_u64(buffer) & suffix_mask & prefix_mask;
return Self {
@@ -75,8 +74,7 @@ impl<'a> UnalignedBitChunk<'a> {
// If less than 16 bytes, read into prefix and suffix
if buffer.len() <= 16 {
let (suffix_mask, trailing_padding) =
compute_suffix_mask(len, offset_padding);
let (suffix_mask, trailing_padding) = compute_suffix_mask(len, offset_padding);
let prefix = read_u64(&buffer[..8]) & prefix_mask;
let suffix = read_u64(&buffer[8..]) & suffix_mask;
@@ -167,10 +165,7 @@ impl<'a> UnalignedBitChunk<'a> {
}
pub type UnalignedBitChunkIterator<'a> = std::iter::Chain<
std::iter::Chain<
std::option::IntoIter<u64>,
std::iter::Cloned<std::slice::Iter<'a, u64>>,
>,
std::iter::Chain<std::option::IntoIter<u64>, std::iter::Cloned<std::slice::Iter<'a, u64>>>,
std::option::IntoIter<u64>,
>;
@@ -338,9 +333,8 @@ impl Iterator for BitChunkIterator<'_> {
} else {
// the constructor ensures that bit_offset is in 0..8
// that means we need to read at most one additional byte to fill in the high bits
let next = unsafe {
std::ptr::read_unaligned(raw_data.add(index + 1) as *const u8) as u64
};
let next =
unsafe { std::ptr::read_unaligned(raw_data.add(index + 1) as *const u8) as u64 };
(current >> bit_offset) | (next << (64 - bit_offset))
};
@@ -387,8 +381,8 @@ mod tests {
#[test]
fn test_iter_unaligned() {
let input: &[u8] = &[
0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000,
0b00100000, 0b01000000, 0b11111111,
0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000, 0b00100000,
0b01000000, 0b11111111,
];
let buffer: Buffer = Buffer::from(input);
@@ -408,8 +402,8 @@ mod tests {
#[test]
fn test_iter_unaligned_remainder_1_byte() {
let input: &[u8] = &[
0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000,
0b00100000, 0b01000000, 0b11111111,
0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000, 0b00100000,
0b01000000, 0b11111111,
];
let buffer: Buffer = Buffer::from(input);
@@ -442,8 +436,8 @@ mod tests {
#[test]
fn test_iter_unaligned_remainder_bits_large() {
let input: &[u8] = &[
0b11111111, 0b00000000, 0b11111111, 0b00000000, 0b11111111, 0b00000000,
0b11111111, 0b00000000, 0b11111111,
0b11111111, 0b00000000, 0b11111111, 0b00000000, 0b11111111, 0b00000000, 0b11111111,
0b00000000, 0b11111111,
];
let buffer: Buffer = Buffer::from(input);
@@ -637,11 +631,8 @@ mod tests {
let max_truncate = 128.min(mask_len - offset);
let truncate = rng.gen::<usize>().checked_rem(max_truncate).unwrap_or(0);
let unaligned = UnalignedBitChunk::new(
buffer.as_slice(),
offset,
mask_len - offset - truncate,
);
let unaligned =
UnalignedBitChunk::new(buffer.as_slice(), offset, mask_len - offset - truncate);
let bool_slice = &bools[offset..mask_len - truncate];
+2 -2
View File
@@ -276,8 +276,8 @@ mod tests {
assert_eq!(
actual,
&[
false, true, false, false, true, false, true, false, false, false, false,
false, true, false
false, true, false, false, true, false, true, false, false, false, false, false,
true, false
]
);
+18 -20
View File
@@ -42,8 +42,7 @@ pub fn set_bits(
let chunks = BitChunks::new(data, offset_read + bits_to_align, len - bits_to_align);
chunks.iter().for_each(|chunk| {
null_count += chunk.count_zeros();
write_data[write_byte_index..write_byte_index + 8]
.copy_from_slice(&chunk.to_le_bytes());
write_data[write_byte_index..write_byte_index + 8].copy_from_slice(&chunk.to_le_bytes());
write_byte_index += 8;
});
@@ -70,8 +69,8 @@ mod tests {
fn test_set_bits_aligned() {
let mut destination: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
let source: &[u8] = &[
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
0b11100111, 0b10100101,
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b10100101,
];
let destination_offset = 8;
@@ -80,8 +79,8 @@ mod tests {
let len = 64;
let expected_data: &[u8] = &[
0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
0b11100111, 0b10100101, 0,
0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b10100101, 0,
];
let expected_null_count = 24;
let result = set_bits(
@@ -100,8 +99,8 @@ mod tests {
fn test_set_bits_unaligned_destination_start() {
let mut destination: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
let source: &[u8] = &[
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
0b11100111, 0b10100101,
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b10100101,
];
let destination_offset = 3;
@@ -110,8 +109,8 @@ mod tests {
let len = 64;
let expected_data: &[u8] = &[
0b00111000, 0b00101111, 0b11001101, 0b11011100, 0b01011110, 0b00011111,
0b00111110, 0b00101111, 0b00000101, 0b00000000,
0b00111000, 0b00101111, 0b11001101, 0b11011100, 0b01011110, 0b00011111, 0b00111110,
0b00101111, 0b00000101, 0b00000000,
];
let expected_null_count = 24;
let result = set_bits(
@@ -130,8 +129,8 @@ mod tests {
fn test_set_bits_unaligned_destination_end() {
let mut destination: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
let source: &[u8] = &[
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
0b11100111, 0b10100101,
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b10100101,
];
let destination_offset = 8;
@@ -140,8 +139,8 @@ mod tests {
let len = 62;
let expected_data: &[u8] = &[
0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
0b11100111, 0b00100101, 0,
0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b00100101, 0,
];
let expected_null_count = 23;
let result = set_bits(
@@ -160,9 +159,9 @@ mod tests {
fn test_set_bits_unaligned() {
let mut destination: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
let source: &[u8] = &[
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111, 0b10100101,
0b10011001, 0b11011011, 0b11101011, 0b11000011,
];
let destination_offset = 3;
@@ -171,9 +170,8 @@ mod tests {
let len = 95;
let expected_data: &[u8] = &[
0b01111000, 0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000,
0b01111001, 0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000,
0b00000001,
0b01111000, 0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000, 0b01111001,
0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000, 0b00000001,
];
let expected_null_count = 35;
let result = set_bits(
+423 -735
View File
File diff suppressed because it is too large Load Diff
+14 -48
View File
@@ -129,10 +129,7 @@ impl<'a> FormatOptions<'a> {
}
/// Overrides the format used for [`DataType::Timestamp`] columns with a timezone
pub const fn with_timestamp_tz_format(
self,
timestamp_tz_format: Option<&'a str>,
) -> Self {
pub const fn with_timestamp_tz_format(self, timestamp_tz_format: Option<&'a str>) -> Self {
Self {
timestamp_tz_format,
..self
@@ -173,9 +170,7 @@ impl<'a> ValueFormatter<'a> {
match self.formatter.format.write(self.idx, s) {
Ok(_) => Ok(()),
Err(FormatError::Arrow(e)) => Err(e),
Err(FormatError::Format(_)) => {
Err(ArrowError::CastError("Format error".to_string()))
}
Err(FormatError::Format(_)) => Err(ArrowError::CastError("Format error".to_string())),
}
}
@@ -260,10 +255,7 @@ impl<'a> ArrayFormatter<'a> {
/// Returns an [`ArrayFormatter`] that can be used to format `array`
///
/// This returns an error if an array of the given data type cannot be formatted
pub fn try_new(
array: &'a dyn Array,
options: &FormatOptions<'a>,
) -> Result<Self, ArrowError> {
pub fn try_new(array: &'a dyn Array, options: &FormatOptions<'a>) -> Result<Self, ArrowError> {
Ok(Self {
format: make_formatter(array, options)?,
safe: options.safe,
@@ -472,9 +464,7 @@ fn write_timestamp(
let date = Utc.from_utc_datetime(&naive).with_timezone(&tz);
match format {
Some(s) => write!(f, "{}", date.format(s))?,
None => {
write!(f, "{}", date.to_rfc3339_opts(SecondsFormat::AutoSi, true))?
}
None => write!(f, "{}", date.to_rfc3339_opts(SecondsFormat::AutoSi, true))?,
}
}
None => match format {
@@ -526,19 +516,11 @@ macro_rules! temporal_display {
impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
type State = TimeFormat<'a>;
fn prepare(
&self,
options: &FormatOptions<'a>,
) -> Result<Self::State, ArrowError> {
fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
Ok(options.$format)
}
fn write(
&self,
fmt: &Self::State,
idx: usize,
f: &mut dyn Write,
) -> FormatResult {
fn write(&self, fmt: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
let value = self.value(idx);
let naive = $convert(value as _).ok_or_else(|| {
ArrowError::CastError(format!(
@@ -575,19 +557,11 @@ macro_rules! duration_display {
impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
type State = DurationFormat;
fn prepare(
&self,
options: &FormatOptions<'a>,
) -> Result<Self::State, ArrowError> {
fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
Ok(options.duration_format)
}
fn write(
&self,
fmt: &Self::State,
idx: usize,
f: &mut dyn Write,
) -> FormatResult {
fn write(&self, fmt: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
let v = self.value(idx);
match fmt {
DurationFormat::ISO8601 => write!(f, "{}", $convert(v))?,
@@ -704,8 +678,7 @@ impl<'a> DisplayIndex for &'a PrimitiveArray<IntervalMonthDayNanoType> {
fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
let value: u128 = self.value(idx) as u128;
let months_part: i32 =
((value & 0xFFFFFFFF000000000000000000000000) >> 96) as i32;
let months_part: i32 = ((value & 0xFFFFFFFF000000000000000000000000) >> 96) as i32;
let days_part: i32 = ((value & 0xFFFFFFFF0000000000000000) >> 64) as i32;
let nanoseconds_part: i64 = (value & 0xFFFFFFFFFFFFFFFF) as i64;
@@ -937,10 +910,7 @@ impl<'a> DisplayIndexState<'a> for &'a UnionArray {
/// suitable for converting large arrays or record batches.
///
/// Please see [`ArrayFormatter`] for a more performant interface
pub fn array_value_to_string(
column: &dyn Array,
row: usize,
) -> Result<String, ArrowError> {
pub fn array_value_to_string(column: &dyn Array, row: usize) -> Result<String, ArrowError> {
let options = FormatOptions::default().with_display_error(true);
let formatter = ArrayFormatter::try_new(column, &options)?;
Ok(formatter.value(row).to_string())
@@ -986,12 +956,9 @@ mod tests {
// [[a, b, c], [d, e, f], [g, h]]
let entry_offsets = [0, 3, 6, 8];
let map_array = MapArray::new_from_strings(
keys.clone().into_iter(),
&values_data,
&entry_offsets,
)
.unwrap();
let map_array =
MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
.unwrap();
assert_eq!(
"{d: 30, e: 40, f: 50}",
array_value_to_string(&map_array, 1).unwrap()
@@ -1006,8 +973,7 @@ mod tests {
#[test]
fn test_array_value_to_string_duration() {
let iso_fmt = FormatOptions::new();
let pretty_fmt =
FormatOptions::new().with_duration_format(DurationFormat::Pretty);
let pretty_fmt = FormatOptions::new().with_duration_format(DurationFormat::Pretty);
let array = DurationNanosecondArray::from(vec![
1,
+92 -123
View File
@@ -64,10 +64,7 @@ impl TimestampParser {
/// Parses a date of the form `1997-01-31`
fn date(&self) -> Option<NaiveDate> {
if self.mask & 0b1111111111 != 0b1101101111
|| !self.test(4, b'-')
|| !self.test(7, b'-')
{
if self.mask & 0b1111111111 != 0b1101101111 || !self.test(4, b'-') || !self.test(7, b'-') {
return None;
}
@@ -173,13 +170,9 @@ impl TimestampParser {
/// * "2023-01-01 04:05:06.789 PST",
///
/// [IANA timezones]: https://www.iana.org/time-zones
pub fn string_to_datetime<T: TimeZone>(
timezone: &T,
s: &str,
) -> Result<DateTime<T>, ArrowError> {
let err = |ctx: &str| {
ArrowError::ParseError(format!("Error parsing timestamp from '{s}': {ctx}"))
};
pub fn string_to_datetime<T: TimeZone>(timezone: &T, s: &str) -> Result<DateTime<T>, ArrowError> {
let err =
|ctx: &str| ArrowError::ParseError(format!("Error parsing timestamp from '{s}': {ctx}"));
let bytes = s.as_bytes();
if bytes.len() < 10 {
@@ -300,9 +293,8 @@ fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> {
/// This function does not support parsing strings with a timezone
/// or offset specified, as it considers only time since midnight.
pub fn string_to_time_nanoseconds(s: &str) -> Result<i64, ArrowError> {
let nt = string_to_time(s).ok_or_else(|| {
ArrowError::ParseError(format!("Failed to parse \'{s}\' as time"))
})?;
let nt = string_to_time(s)
.ok_or_else(|| ArrowError::ParseError(format!("Failed to parse \'{s}\' as time")))?;
Ok(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
}
@@ -313,12 +305,8 @@ fn string_to_time(s: &str) -> Option<NaiveTime> {
}
let (am, bytes) = match bytes.get(bytes.len() - 3..) {
Some(b" AM" | b" am" | b" Am" | b" aM") => {
(Some(true), &bytes[..bytes.len() - 3])
}
Some(b" PM" | b" pm" | b" pM" | b" Pm") => {
(Some(false), &bytes[..bytes.len() - 3])
}
Some(b" AM" | b" am" | b" Am" | b" aM") => (Some(true), &bytes[..bytes.len() - 3]),
Some(b" PM" | b" pm" | b" pM" | b" Pm") => (Some(false), &bytes[..bytes.len() - 3]),
_ => (None, bytes),
};
@@ -501,10 +489,7 @@ impl Parser for Time64NanosecondType {
fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
let nt = NaiveTime::parse_from_str(string, format).ok()?;
Some(
nt.num_seconds_from_midnight() as i64 * 1_000_000_000
+ nt.nanosecond() as i64,
)
Some(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
}
}
@@ -519,10 +504,7 @@ impl Parser for Time64MicrosecondType {
fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
let nt = NaiveTime::parse_from_str(string, format).ok()?;
Some(
nt.num_seconds_from_midnight() as i64 * 1_000_000
+ nt.nanosecond() as i64 / 1_000,
)
Some(nt.num_seconds_from_midnight() as i64 * 1_000_000 + nt.nanosecond() as i64 / 1_000)
}
}
@@ -537,10 +519,7 @@ impl Parser for Time32MillisecondType {
fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
let nt = NaiveTime::parse_from_str(string, format).ok()?;
Some(
nt.num_seconds_from_midnight() as i32 * 1_000
+ nt.nanosecond() as i32 / 1_000_000,
)
Some(nt.num_seconds_from_midnight() as i32 * 1_000 + nt.nanosecond() as i32 / 1_000_000)
}
}
@@ -555,10 +534,7 @@ impl Parser for Time32SecondType {
fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
let nt = NaiveTime::parse_from_str(string, format).ok()?;
Some(
nt.num_seconds_from_midnight() as i32
+ nt.nanosecond() as i32 / 1_000_000_000,
)
Some(nt.num_seconds_from_midnight() as i32 + nt.nanosecond() as i32 / 1_000_000_000)
}
}
@@ -615,10 +591,8 @@ fn parse_date(string: &str) -> Option<NaiveDate> {
_ => return None,
};
let year = digits[0] as u16 * 1000
+ digits[1] as u16 * 100
+ digits[2] as u16 * 10
+ digits[3] as u16;
let year =
digits[0] as u16 * 1000 + digits[1] as u16 * 100 + digits[2] as u16 * 10 + digits[3] as u16;
NaiveDate::from_ymd_opt(year as _, month as _, day as _)
}
@@ -728,8 +702,7 @@ pub fn parse_decimal<T: DecimalType>(
fractionals += 1;
digits += 1;
result = result.mul_wrapping(base);
result =
result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
}
// Fail on "."
@@ -771,9 +744,11 @@ pub fn parse_interval_year_month(
let config = IntervalParseConfig::new(IntervalUnit::Year);
let interval = Interval::parse(value, &config)?;
let months = interval.to_year_months().map_err(|_| ArrowError::CastError(format!(
let months = interval.to_year_months().map_err(|_| {
ArrowError::CastError(format!(
"Cannot cast {value} to IntervalYearMonth. Only year and month fields are allowed."
)))?;
))
})?;
Ok(IntervalYearMonthType::make_value(0, months))
}
@@ -888,21 +863,16 @@ impl FromStr for IntervalAmount {
Ok(0)
} else {
integer.parse::<i64>().map_err(|_| {
ArrowError::ParseError(format!(
"Failed to parse {s} as interval amount"
))
ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
})
}?;
let frac_unscaled = frac.parse::<i64>().map_err(|_| {
ArrowError::ParseError(format!(
"Failed to parse {s} as interval amount"
))
ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
})?;
// scale fractional part by interval precision
let frac =
frac_unscaled * 10_i64.pow(INTERVAL_PRECISION - frac.len() as u32);
let frac = frac_unscaled * 10_i64.pow(INTERVAL_PRECISION - frac.len() as u32);
// propagate the sign of the integer part to the fractional part
let frac = if integer < 0 || explicit_neg {
@@ -915,9 +885,9 @@ impl FromStr for IntervalAmount {
Ok(result)
}
Some((_, frac)) if frac.starts_with('-') => Err(ArrowError::ParseError(
format!("Failed to parse {s} as interval amount"),
)),
Some((_, frac)) if frac.starts_with('-') => Err(ArrowError::ParseError(format!(
"Failed to parse {s} as interval amount"
))),
Some((_, frac)) if frac.len() > INTERVAL_PRECISION as usize => {
Err(ArrowError::ParseError(format!(
"{s} exceeds the precision available for interval amount"
@@ -925,9 +895,7 @@ impl FromStr for IntervalAmount {
}
Some(_) | None => {
let integer = s.parse::<i64>().map_err(|_| {
ArrowError::ParseError(format!(
"Failed to parse {s} as interval amount"
))
ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
})?;
let result = Self { integer, frac: 0 };
@@ -1005,25 +973,20 @@ impl Interval {
/// e.g. INTERVAL '0.5 MONTH' = 15 days, INTERVAL '1.5 MONTH' = 1 month 15 days
/// e.g. INTERVAL '0.5 DAY' = 12 hours, INTERVAL '1.5 DAY' = 1 day 12 hours
/// [Postgres reference](https://www.postgresql.org/docs/15/datatype-datetime.html#DATATYPE-INTERVAL-INPUT:~:text=Field%20values%20can,fractional%20on%20output.)
fn add(
&self,
amount: IntervalAmount,
unit: IntervalUnit,
) -> Result<Self, ArrowError> {
fn add(&self, amount: IntervalAmount, unit: IntervalUnit) -> Result<Self, ArrowError> {
let result = match unit {
IntervalUnit::Century => {
let months_int = amount.integer.mul_checked(100)?.mul_checked(12)?;
let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 2);
let months =
months_int
.add_checked(month_frac)?
.try_into()
.map_err(|_| {
ArrowError::ParseError(format!(
"Unable to represent {} centuries as months in a signed 32-bit integer",
&amount.integer
))
})?;
let months = months_int
.add_checked(month_frac)?
.try_into()
.map_err(|_| {
ArrowError::ParseError(format!(
"Unable to represent {} centuries as months in a signed 32-bit integer",
&amount.integer
))
})?;
Self::new(self.months.add_checked(months)?, self.days, self.nanos)
}
@@ -1031,32 +994,30 @@ impl Interval {
let months_int = amount.integer.mul_checked(10)?.mul_checked(12)?;
let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 1);
let months =
months_int
.add_checked(month_frac)?
.try_into()
.map_err(|_| {
ArrowError::ParseError(format!(
"Unable to represent {} decades as months in a signed 32-bit integer",
&amount.integer
))
})?;
let months = months_int
.add_checked(month_frac)?
.try_into()
.map_err(|_| {
ArrowError::ParseError(format!(
"Unable to represent {} decades as months in a signed 32-bit integer",
&amount.integer
))
})?;
Self::new(self.months.add_checked(months)?, self.days, self.nanos)
}
IntervalUnit::Year => {
let months_int = amount.integer.mul_checked(12)?;
let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION);
let months =
months_int
.add_checked(month_frac)?
.try_into()
.map_err(|_| {
ArrowError::ParseError(format!(
"Unable to represent {} years as months in a signed 32-bit integer",
&amount.integer
))
})?;
let months = months_int
.add_checked(month_frac)?
.try_into()
.map_err(|_| {
ArrowError::ParseError(format!(
"Unable to represent {} years as months in a signed 32-bit integer",
&amount.integer
))
})?;
Self::new(self.months.add_checked(months)?, self.days, self.nanos)
}
@@ -1090,8 +1051,7 @@ impl Interval {
))
})?;
let nanos =
amount.frac * 7 * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
let nanos = amount.frac * 7 * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
Self::new(
self.months,
@@ -1107,8 +1067,7 @@ impl Interval {
))
})?;
let nanos =
amount.frac * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
let nanos = amount.frac * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
Self::new(
self.months,
@@ -1118,8 +1077,7 @@ impl Interval {
}
IntervalUnit::Hour => {
let nanos_int = amount.integer.mul_checked(NANOS_PER_HOUR)?;
let nanos_frac =
amount.frac * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
let nanos_frac = amount.frac * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
let nanos = nanos_int.add_checked(nanos_frac)?;
Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
@@ -1398,8 +1356,7 @@ mod tests {
"2030-12-04T17:11:10.123456",
];
for case in cases {
let chrono =
NaiveDateTime::parse_from_str(case, "%Y-%m-%dT%H:%M:%S%.f").unwrap();
let chrono = NaiveDateTime::parse_from_str(case, "%Y-%m-%dT%H:%M:%S%.f").unwrap();
let custom = string_to_datetime(&Utc, case).unwrap();
assert_eq!(chrono, custom.naive_utc())
}
@@ -1431,8 +1388,7 @@ mod tests {
];
for (s, ctx) in cases {
let expected =
format!("Parser error: Error parsing timestamp from '{s}': {ctx}");
let expected = format!("Parser error: Error parsing timestamp from '{s}': {ctx}");
let actual = string_to_datetime(&Utc, s).unwrap_err().to_string();
assert_eq!(actual, expected)
}
@@ -1497,8 +1453,7 @@ mod tests {
assert_eq!(local, "2020-09-08 15:42:29");
let dt =
NaiveDateTime::parse_from_str("2020-09-08T13:42:29Z", "%Y-%m-%dT%H:%M:%SZ")
.unwrap();
NaiveDateTime::parse_from_str("2020-09-08T13:42:29Z", "%Y-%m-%dT%H:%M:%SZ").unwrap();
let local: Tz = "+08:00".parse().unwrap();
// Parsed as offset from UTC
@@ -1629,10 +1584,7 @@ mod tests {
// custom format
assert_eq!(
Time64NanosecondType::parse_formatted(
"02 - 10 - 01 - .1234567",
"%H - %M - %S - %.f"
),
Time64NanosecondType::parse_formatted("02 - 10 - 01 - .1234567", "%H - %M - %S - %.f"),
Some(7_801_123_456_700)
);
}
@@ -1709,10 +1661,7 @@ mod tests {
// custom format
assert_eq!(
Time64MicrosecondType::parse_formatted(
"02 - 10 - 01 - .1234",
"%H - %M - %S - %.f"
),
Time64MicrosecondType::parse_formatted("02 - 10 - 01 - .1234", "%H - %M - %S - %.f"),
Some(7_801_123_400)
);
}
@@ -1759,10 +1708,7 @@ mod tests {
// custom format
assert_eq!(
Time32MillisecondType::parse_formatted(
"02 - 10 - 01 - .1",
"%H - %M - %S - %.f"
),
Time32MillisecondType::parse_formatted("02 - 10 - 01 - .1", "%H - %M - %S - %.f"),
Some(7_801_100)
);
}
@@ -2005,8 +1951,19 @@ mod tests {
);
assert_eq!(
Interval::new(-13i32, -8i32, -NANOS_PER_HOUR - NANOS_PER_MINUTE - NANOS_PER_SECOND - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64),
Interval::parse("-1 year -1 month -1 week -1 day -1 hour -1 minute -1 second -1.11 millisecond", &config).unwrap(),
Interval::new(
-13i32,
-8i32,
-NANOS_PER_HOUR
- NANOS_PER_MINUTE
- NANOS_PER_SECOND
- (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
),
Interval::parse(
"-1 year -1 month -1 week -1 day -1 hour -1 minute -1 second -1.11 millisecond",
&config
)
.unwrap(),
);
}
@@ -2280,22 +2237,34 @@ mod tests {
let edge_tests_256 = [
(
"9999999999999999999999999999999999999999999999999999999999999999999999999999",
i256::from_string("9999999999999999999999999999999999999999999999999999999999999999999999999999").unwrap(),
i256::from_string(
"9999999999999999999999999999999999999999999999999999999999999999999999999999",
)
.unwrap(),
0,
),
(
"999999999999999999999999999999999999999999999999999999999999999999999999.9999",
i256::from_string("9999999999999999999999999999999999999999999999999999999999999999999999999999").unwrap(),
i256::from_string(
"9999999999999999999999999999999999999999999999999999999999999999999999999999",
)
.unwrap(),
4,
),
(
"99999999999999999999999999999999999999999999999999.99999999999999999999999999",
i256::from_string("9999999999999999999999999999999999999999999999999999999999999999999999999999").unwrap(),
i256::from_string(
"9999999999999999999999999999999999999999999999999999999999999999999999999999",
)
.unwrap(),
26,
),
(
"99999999999999999999999999999999999999999999999999",
i256::from_string("9999999999999999999999999999999999999999999999999900000000000000000000000000").unwrap(),
i256::from_string(
"9999999999999999999999999999999999999999999999999900000000000000000000000000",
)
.unwrap(),
26,
),
];
+16 -34
View File
@@ -25,9 +25,7 @@ use comfy_table::{Cell, Table};
use std::fmt::Display;
/// Create a visual representation of record batches
pub fn pretty_format_batches(
results: &[RecordBatch],
) -> Result<impl Display, ArrowError> {
pub fn pretty_format_batches(results: &[RecordBatch]) -> Result<impl Display, ArrowError> {
let options = FormatOptions::default().with_display_error(true);
pretty_format_batches_with_options(results, &options)
}
@@ -70,10 +68,7 @@ pub fn print_columns(col_name: &str, results: &[ArrayRef]) -> Result<(), ArrowEr
}
/// Convert a series of record batches into a table
fn create_table(
results: &[RecordBatch],
options: &FormatOptions,
) -> Result<Table, ArrowError> {
fn create_table(results: &[RecordBatch], options: &FormatOptions) -> Result<Table, ArrowError> {
let mut table = Table::new();
table.load_preset("||--+-++| ++++++");
@@ -209,8 +204,8 @@ mod tests {
let table = pretty_format_columns("a", &columns).unwrap().to_string();
let expected = vec![
"+---+", "| a |", "+---+", "| a |", "| b |", "| |", "| d |", "| e |",
"| |", "| g |", "+---+",
"+---+", "| a |", "+---+", "| a |", "| b |", "| |", "| d |", "| e |", "| |",
"| g |", "+---+",
];
let actual: Vec<&str> = table.lines().collect();
@@ -289,10 +284,8 @@ mod tests {
#[test]
fn test_pretty_format_fixed_size_list() {
// define a schema.
let field_type = DataType::FixedSizeList(
Arc::new(Field::new("item", DataType::Int32, true)),
3,
);
let field_type =
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 3);
let schema = Arc::new(Schema::new(vec![Field::new("d1", field_type, true)]));
let keys_builder = Int32Array::builder(3);
@@ -383,10 +376,7 @@ mod tests {
};
}
fn timestamp_batch<T: ArrowTimestampType>(
timezone: &str,
value: T::Native,
) -> RecordBatch {
fn timestamp_batch<T: ArrowTimestampType>(timezone: &str, value: T::Native) -> RecordBatch {
let mut builder = PrimitiveBuilder::<T>::with_capacity(10);
builder.append_value(value);
builder.append_null();
@@ -621,8 +611,8 @@ mod tests {
let table = pretty_format_batches(&[batch]).unwrap().to_string();
let expected = vec![
"+------+", "| f |", "+------+", "| 101 |", "| |", "| 200 |",
"| 3040 |", "+------+",
"+------+", "| f |", "+------+", "| 101 |", "| |", "| 200 |", "| 3040 |",
"+------+",
];
let actual: Vec<&str> = table.lines().collect();
@@ -660,16 +650,14 @@ mod tests {
)),
Arc::new(StructArray::from(vec![(
Arc::new(Field::new("c121", DataType::Utf8, false)),
Arc::new(StringArray::from(vec![Some("e"), Some("f"), Some("g")]))
as ArrayRef,
Arc::new(StringArray::from(vec![Some("e"), Some("f"), Some("g")])) as ArrayRef,
)])) as ArrayRef,
),
]);
let c2 = StringArray::from(vec![Some("a"), Some("b"), Some("c")]);
let batch =
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)])
.unwrap();
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)]).unwrap();
let table = pretty_format_batches(&[batch]).unwrap().to_string();
let expected = vec![
@@ -705,8 +693,7 @@ mod tests {
UnionMode::Dense,
)]);
let batch =
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(union)]).unwrap();
let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(union)]).unwrap();
let table = pretty_format_batches(&[batch]).unwrap().to_string();
let actual: Vec<&str> = table.lines().collect();
let expected = vec![
@@ -742,8 +729,7 @@ mod tests {
UnionMode::Sparse,
)]);
let batch =
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(union)]).unwrap();
let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(union)]).unwrap();
let table = pretty_format_batches(&[batch]).unwrap().to_string();
let actual: Vec<&str> = table.lines().collect();
let expected = vec![
@@ -799,8 +785,7 @@ mod tests {
UnionMode::Sparse,
)]);
let batch =
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(outer)]).unwrap();
let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(outer)]).unwrap();
let table = pretty_format_batches(&[batch]).unwrap().to_string();
let actual: Vec<&str> = table.lines().collect();
let expected = vec![
@@ -882,8 +867,7 @@ mod tests {
let table = pretty_format_batches(&[batch]).unwrap().to_string();
let expected = vec![
"+------+", "| f16 |", "+------+", "| NaN |", "| 4 |", "| -inf |",
"+------+",
"+------+", "| f16 |", "+------+", "| NaN |", "| 4 |", "| -inf |", "+------+",
];
let actual: Vec<&str> = table.lines().collect();
@@ -986,9 +970,7 @@ mod tests {
fn test_format_options() {
let options = FormatOptions::default().with_null("null");
let array = Int32Array::from(vec![Some(1), Some(2), None, Some(3), Some(4)]);
let batch =
RecordBatch::try_from_iter([("my_column_name", Arc::new(array) as _)])
.unwrap();
let batch = RecordBatch::try_from_iter([("my_column_name", Arc::new(array) as _)]).unwrap();
let column = pretty_format_columns_with_options(
"my_column_name",
+41 -102
View File
@@ -292,8 +292,7 @@ impl Format {
let header_length = headers.len();
// keep track of inferred field types
let mut column_types: Vec<InferredDataType> =
vec![Default::default(); header_length];
let mut column_types: Vec<InferredDataType> = vec![Default::default(); header_length];
let mut records_count = 0;
@@ -307,9 +306,7 @@ impl Format {
// Note since we may be looking at a sample of the data, we make the safe assumption that
// they could be nullable
for (i, column_type) in
column_types.iter_mut().enumerate().take(header_length)
{
for (i, column_type) in column_types.iter_mut().enumerate().take(header_length) {
if let Some(string) = record.get(i) {
if !self.null_regex.is_null(string) {
column_type.update(string)
@@ -606,8 +603,7 @@ impl Decoder {
return Ok(bytes);
}
let to_read =
self.batch_size.min(self.end - self.line_number) - self.record_decoder.len();
let to_read = self.batch_size.min(self.end - self.line_number) - self.record_decoder.len();
let (_, bytes) = self.record_decoder.decode(buf, to_read)?;
Ok(bytes)
}
@@ -662,29 +658,23 @@ fn parse(
let i = *i;
let field = &fields[i];
match field.data_type() {
DataType::Boolean => {
build_boolean_array(line_number, rows, i, null_regex)
}
DataType::Decimal128(precision, scale) => {
build_decimal_array::<Decimal128Type>(
line_number,
rows,
i,
*precision,
*scale,
null_regex,
)
}
DataType::Decimal256(precision, scale) => {
build_decimal_array::<Decimal256Type>(
line_number,
rows,
i,
*precision,
*scale,
null_regex,
)
}
DataType::Boolean => build_boolean_array(line_number, rows, i, null_regex),
DataType::Decimal128(precision, scale) => build_decimal_array::<Decimal128Type>(
line_number,
rows,
i,
*precision,
*scale,
null_regex,
),
DataType::Decimal256(precision, scale) => build_decimal_array::<Decimal256Type>(
line_number,
rows,
i,
*precision,
*scale,
null_regex,
),
DataType::Int8 => {
build_primitive_array::<Int8Type>(line_number, rows, i, null_regex)
}
@@ -721,34 +711,17 @@ fn parse(
DataType::Date64 => {
build_primitive_array::<Date64Type>(line_number, rows, i, null_regex)
}
DataType::Time32(TimeUnit::Second) => build_primitive_array::<
Time32SecondType,
>(
line_number, rows, i, null_regex
),
DataType::Time32(TimeUnit::Second) => {
build_primitive_array::<Time32SecondType>(line_number, rows, i, null_regex)
}
DataType::Time32(TimeUnit::Millisecond) => {
build_primitive_array::<Time32MillisecondType>(
line_number,
rows,
i,
null_regex,
)
build_primitive_array::<Time32MillisecondType>(line_number, rows, i, null_regex)
}
DataType::Time64(TimeUnit::Microsecond) => {
build_primitive_array::<Time64MicrosecondType>(
line_number,
rows,
i,
null_regex,
)
build_primitive_array::<Time64MicrosecondType>(line_number, rows, i, null_regex)
}
DataType::Time64(TimeUnit::Nanosecond) => {
build_primitive_array::<Time64NanosecondType>(
line_number,
rows,
i,
null_regex,
)
build_primitive_array::<Time64NanosecondType>(line_number, rows, i, null_regex)
}
DataType::Timestamp(TimeUnit::Second, tz) => {
build_timestamp_array::<TimestampSecondType>(
@@ -786,9 +759,7 @@ fn parse(
null_regex,
)
}
DataType::Null => {
Ok(Arc::new(NullArray::builder(rows.len()).finish()) as ArrayRef)
}
DataType::Null => Ok(Arc::new(NullArray::builder(rows.len()).finish()) as ArrayRef),
DataType::Utf8 => Ok(Arc::new(
rows.iter()
.map(|row| {
@@ -853,8 +824,7 @@ fn parse(
})
.collect();
let projected_fields: Fields =
projection.iter().map(|i| fields[*i].clone()).collect();
let projected_fields: Fields = projection.iter().map(|i| fields[*i].clone()).collect();
let projected_schema = Arc::new(match metadata {
None => Schema::new(projected_fields),
@@ -898,8 +868,7 @@ fn build_decimal_array<T: DecimalType>(
// append null
decimal_builder.append_null();
} else {
let decimal_value: Result<T::Native, _> =
parse_decimal::<T>(s, precision, scale);
let decimal_value: Result<T::Native, _> = parse_decimal::<T>(s, precision, scale);
match decimal_value {
Ok(v) => {
decimal_builder.append_value(v);
@@ -957,22 +926,10 @@ fn build_timestamp_array<T: ArrowTimestampType>(
Ok(Arc::new(match timezone {
Some(timezone) => {
let tz: Tz = timezone.parse()?;
build_timestamp_array_impl::<T, _>(
line_number,
rows,
col_idx,
&tz,
null_regex,
)?
.with_timezone(timezone)
build_timestamp_array_impl::<T, _>(line_number, rows, col_idx, &tz, null_regex)?
.with_timezone(timezone)
}
None => build_timestamp_array_impl::<T, _>(
line_number,
rows,
col_idx,
&Utc,
null_regex,
)?,
None => build_timestamp_array_impl::<T, _>(line_number, rows, col_idx, &Utc, null_regex)?,
}))
}
@@ -1169,10 +1126,7 @@ impl ReaderBuilder {
}
/// Create a new `BufReader` from a buffered reader
pub fn build_buffered<R: BufRead>(
self,
reader: R,
) -> Result<BufReader<R>, ArrowError> {
pub fn build_buffered<R: BufRead>(self, reader: R) -> Result<BufReader<R>, ArrowError> {
Ok(BufReader {
reader,
decoder: self.build_decoder(),
@@ -1318,8 +1272,7 @@ mod tests {
Field::new("lng", DataType::Float64, false),
]);
let file_with_headers =
File::open("test/data/uk_cities_with_headers.csv").unwrap();
let file_with_headers = File::open("test/data/uk_cities_with_headers.csv").unwrap();
let file_without_headers = File::open("test/data/uk_cities.csv").unwrap();
let both_files = file_with_headers
.chain(Cursor::new("\n".to_string()))
@@ -1642,8 +1595,7 @@ mod tests {
schema.field(5).data_type()
);
let names: Vec<&str> =
schema.fields().iter().map(|x| x.name().as_str()).collect();
let names: Vec<&str> = schema.fields().iter().map(|x| x.name().as_str()).collect();
assert_eq!(
names,
vec![
@@ -1819,16 +1771,11 @@ mod tests {
-2203932304000
);
assert_eq!(
Date64Type::parse_formatted("1900-02-28 12:34:56", "%Y-%m-%d %H:%M:%S")
.unwrap(),
Date64Type::parse_formatted("1900-02-28 12:34:56", "%Y-%m-%d %H:%M:%S").unwrap(),
-2203932304000
);
assert_eq!(
Date64Type::parse_formatted(
"1900-02-28 12:34:56+0030",
"%Y-%m-%d %H:%M:%S%z"
)
.unwrap(),
Date64Type::parse_formatted("1900-02-28 12:34:56+0030", "%Y-%m-%d %H:%M:%S%z").unwrap(),
-2203932304000 - (30 * 60 * 1000)
);
}
@@ -1865,10 +1812,7 @@ mod tests {
#[test]
fn test_parse_timestamp() {
test_parse_timestamp_impl::<TimestampNanosecondType>(
None,
&[0, 0, -7_200_000_000_000],
);
test_parse_timestamp_impl::<TimestampNanosecondType>(None, &[0, 0, -7_200_000_000_000]);
test_parse_timestamp_impl::<TimestampNanosecondType>(
Some("+00:00".into()),
&[0, 0, -7_200_000_000_000],
@@ -1885,10 +1829,7 @@ mod tests {
Some("-03".into()),
&[10_800_000, 0, -7_200_000],
);
test_parse_timestamp_impl::<TimestampSecondType>(
Some("-03".into()),
&[10_800, 0, -7_200],
);
test_parse_timestamp_impl::<TimestampSecondType>(Some("-03".into()), &[10_800, 0, -7_200]);
}
#[test]
@@ -2227,10 +2168,8 @@ mod tests {
expected_rows
);
let buffered = std::io::BufReader::with_capacity(
capacity,
File::open(path).unwrap(),
);
let buffered =
std::io::BufReader::with_capacity(capacity, File::open(path).unwrap());
let reader = ReaderBuilder::new(schema.clone())
.with_batch_size(batch_size)
+10 -9
View File
@@ -76,11 +76,7 @@ impl RecordDecoder {
/// Decodes records from `input` returning the number of records and bytes read
///
/// Note: this expects to be called with an empty `input` to signal EOF
pub fn decode(
&mut self,
input: &[u8],
to_read: usize,
) -> Result<(usize, usize), ArrowError> {
pub fn decode(&mut self, input: &[u8], to_read: usize) -> Result<(usize, usize), ArrowError> {
if to_read == 0 {
return Ok((0, 0));
}
@@ -124,11 +120,17 @@ impl RecordDecoder {
// Need to allocate more capacity
ReadRecordResult::OutputFull => break,
ReadRecordResult::OutputEndsFull => {
return Err(ArrowError::CsvError(format!("incorrect number of fields for line {}, expected {} got more than {}", self.line_number, self.num_columns, self.current_field)));
return Err(ArrowError::CsvError(format!(
"incorrect number of fields for line {}, expected {} got more than {}",
self.line_number, self.num_columns, self.current_field
)));
}
ReadRecordResult::Record => {
if self.current_field != self.num_columns {
return Err(ArrowError::CsvError(format!("incorrect number of fields for line {}, expected {} got {}", self.line_number, self.num_columns, self.current_field)));
return Err(ArrowError::CsvError(format!(
"incorrect number of fields for line {}, expected {} got {}",
self.line_number, self.num_columns, self.current_field
)));
}
read += 1;
self.current_field = 0;
@@ -334,8 +336,7 @@ mod tests {
let mut decoder = RecordDecoder::new(Reader::new(), 2);
let err = decoder.decode(csv.as_bytes(), 4).unwrap_err().to_string();
let expected =
"Csv error: incorrect number of fields for line 3, expected 2 got 1";
let expected = "Csv error: incorrect number of fields for line 3, expected 2 got 1";
assert_eq!(err, expected);
+13 -32
View File
@@ -389,18 +389,12 @@ mod tests {
"consectetur adipiscing elit",
"sed do eiusmod tempor",
]);
let c2 = PrimitiveArray::<Float64Type>::from(vec![
Some(123.564532),
None,
Some(-556132.25),
]);
let c2 =
PrimitiveArray::<Float64Type>::from(vec![Some(123.564532), None, Some(-556132.25)]);
let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);
let c5 = TimestampMillisecondArray::from(vec![
None,
Some(1555584887378),
Some(1555555555555),
]);
let c5 =
TimestampMillisecondArray::from(vec![None, Some(1555584887378), Some(1555555555555)]);
let c6 = Time32SecondArray::from(vec![1234, 24680, 85563]);
let c7: DictionaryArray<Int32Type> =
vec!["cupcakes", "cupcakes", "foo"].into_iter().collect();
@@ -451,13 +445,11 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
Field::new("c2", DataType::Decimal256(76, 6), true),
]);
let mut c1_builder =
Decimal128Builder::new().with_data_type(DataType::Decimal128(38, 6));
let mut c1_builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(38, 6));
c1_builder.extend(vec![Some(-3335724), Some(2179404), None, Some(290472)]);
let c1 = c1_builder.finish();
let mut c2_builder =
Decimal256Builder::new().with_data_type(DataType::Decimal256(76, 6));
let mut c2_builder = Decimal256Builder::new().with_data_type(DataType::Decimal256(76, 6));
c2_builder.extend(vec![
Some(i256::from_i128(-3335724)),
Some(i256::from_i128(2179404)),
@@ -467,8 +459,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
let c2 = c2_builder.finish();
let batch =
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)])
.unwrap();
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)]).unwrap();
let mut file = tempfile::tempfile().unwrap();
@@ -512,11 +503,8 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
"consectetur adipiscing elit",
"sed do eiusmod tempor",
]);
let c2 = PrimitiveArray::<Float64Type>::from(vec![
Some(123.564532),
None,
Some(-556132.25),
]);
let c2 =
PrimitiveArray::<Float64Type>::from(vec![Some(123.564532), None, Some(-556132.25)]);
let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);
let c6 = Time32SecondArray::from(vec![1234, 24680, 85563]);
@@ -629,8 +617,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
let c0 = UInt32Array::from(vec![Some(123), Some(234)]);
let c1 = Date64Array::from(vec![Some(1926632005177), Some(1926632005177685347)]);
let batch =
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c0), Arc::new(c1)])
.unwrap();
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c0), Arc::new(c1)]).unwrap();
let mut file = tempfile::tempfile().unwrap();
let mut writer = Writer::new(&mut file);
@@ -656,15 +643,9 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
Field::new("c4", DataType::Time32(TimeUnit::Second), false),
]);
let c1 = TimestampMillisecondArray::from(vec![
Some(1555584887378),
Some(1635577147000),
])
.with_timezone("+00:00".to_string());
let c2 = TimestampMillisecondArray::from(vec![
Some(1555584887378),
Some(1635577147000),
]);
let c1 = TimestampMillisecondArray::from(vec![Some(1555584887378), Some(1635577147000)])
.with_timezone("+00:00".to_string());
let c2 = TimestampMillisecondArray::from(vec![Some(1555584887378), Some(1635577147000)]);
let c3 = Date32Array::from(vec![3, 2]);
let c4 = Time32SecondArray::from(vec![1234, 24680]);
+61 -86
View File
@@ -42,9 +42,7 @@ pub(crate) fn contains_nulls(
) -> bool {
match null_bit_buffer {
Some(buffer) => {
match BitSliceIterator::new(buffer.validity(), buffer.offset() + offset, len)
.next()
{
match BitSliceIterator::new(buffer.validity(), buffer.offset() + offset, len).next() {
Some((start, end)) => start != 0 || end != len,
None => len != 0, // No non-null values
}
@@ -130,9 +128,9 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuff
MutableBuffer::new(capacity * k.primitive_width().unwrap()),
empty_buffer,
],
DataType::FixedSizeList(_, _)
| DataType::Struct(_)
| DataType::RunEndEncoded(_, _) => [empty_buffer, MutableBuffer::new(0)],
DataType::FixedSizeList(_, _) | DataType::Struct(_) | DataType::RunEndEncoded(_, _) => {
[empty_buffer, MutableBuffer::new(0)]
}
DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => [
MutableBuffer::new(capacity * mem::size_of::<u8>()),
empty_buffer,
@@ -159,10 +157,9 @@ pub(crate) fn into_buffers(
) -> Vec<Buffer> {
match data_type {
DataType::Null | DataType::Struct(_) | DataType::FixedSizeList(_, _) => vec![],
DataType::Utf8
| DataType::Binary
| DataType::LargeUtf8
| DataType::LargeBinary => vec![buffer1.into(), buffer2.into()],
DataType::Utf8 | DataType::Binary | DataType::LargeUtf8 | DataType::LargeBinary => {
vec![buffer1.into(), buffer2.into()]
}
DataType::Union(_, mode) => {
match mode {
// Based on Union's DataTypeLayout
@@ -452,12 +449,11 @@ impl ArrayData {
for spec in layout.buffers.iter() {
match spec {
BufferSpec::FixedWidth { byte_width, .. } => {
let buffer_size =
self.len.checked_mul(*byte_width).ok_or_else(|| {
ArrowError::ComputeError(
"Integer overflow computing buffer size".to_string(),
)
})?;
let buffer_size = self.len.checked_mul(*byte_width).ok_or_else(|| {
ArrowError::ComputeError(
"Integer overflow computing buffer size".to_string(),
)
})?;
result += buffer_size;
}
BufferSpec::VariableWidth => {
@@ -590,9 +586,7 @@ impl ArrayData {
DataType::LargeBinary | DataType::LargeUtf8 => {
(vec![zeroed((len + 1) * 8), zeroed(0)], vec![], true)
}
DataType::FixedSizeBinary(i) => {
(vec![zeroed(*i as usize * len)], vec![], true)
}
DataType::FixedSizeBinary(i) => (vec![zeroed(*i as usize * len)], vec![], true),
DataType::List(f) | DataType::Map(f, _) => (
vec![zeroed((len + 1) * 4)],
vec![ArrayData::new_empty(f.data_type())],
@@ -749,9 +743,7 @@ impl ArrayData {
)));
}
for (i, (buffer, spec)) in
self.buffers.iter().zip(layout.buffers.iter()).enumerate()
{
for (i, (buffer, spec)) in self.buffers.iter().zip(layout.buffers.iter()).enumerate() {
match spec {
BufferSpec::FixedWidth {
byte_width,
@@ -999,10 +991,8 @@ impl ArrayData {
}
DataType::RunEndEncoded(run_ends_field, values_field) => {
self.validate_num_child_data(2)?;
let run_ends_data =
self.get_valid_child_data(0, run_ends_field.data_type())?;
let values_data =
self.get_valid_child_data(1, values_field.data_type())?;
let run_ends_data = self.get_valid_child_data(0, run_ends_field.data_type())?;
let values_data = self.get_valid_child_data(1, values_field.data_type())?;
if run_ends_data.len != values_data.len {
return Err(ArrowError::InvalidArgumentError(format!(
"The run_ends array length should be the same as values array length. Run_ends array length is {}, values array length is {}",
@@ -1022,9 +1012,7 @@ impl ArrayData {
for (i, (_, field)) in fields.iter().enumerate() {
let field_data = self.get_valid_child_data(i, field.data_type())?;
if mode == &UnionMode::Sparse
&& field_data.len < (self.len + self.offset)
{
if mode == &UnionMode::Sparse && field_data.len < (self.len + self.offset) {
return Err(ArrowError::InvalidArgumentError(format!(
"Sparse union child array #{} has length smaller than expected for union array ({} < {})",
i, field_data.len, self.len + self.offset
@@ -1083,14 +1071,14 @@ impl ArrayData {
i: usize,
expected_type: &DataType,
) -> Result<&ArrayData, ArrowError> {
let values_data = self.child_data
.get(i)
.ok_or_else(|| {
ArrowError::InvalidArgumentError(format!(
"{} did not have enough child arrays. Expected at least {} but had only {}",
self.data_type, i+1, self.child_data.len()
))
})?;
let values_data = self.child_data.get(i).ok_or_else(|| {
ArrowError::InvalidArgumentError(format!(
"{} did not have enough child arrays. Expected at least {} but had only {}",
self.data_type,
i + 1,
self.child_data.len()
))
})?;
if expected_type != &values_data.data_type {
return Err(ArrowError::InvalidArgumentError(format!(
@@ -1160,7 +1148,8 @@ impl ArrayData {
if actual != nulls.null_count() {
return Err(ArrowError::InvalidArgumentError(format!(
"null_count value ({}) doesn't match actual number of nulls in array ({})",
nulls.null_count(), actual
nulls.null_count(),
actual
)));
}
}
@@ -1209,23 +1198,22 @@ impl ArrayData {
) -> Result<(), ArrowError> {
let mask = match mask {
Some(mask) => mask,
None => return match child.null_count() {
0 => Ok(()),
_ => Err(ArrowError::InvalidArgumentError(format!(
"non-nullable child of type {} contains nulls not present in parent {}",
child.data_type,
self.data_type
))),
},
None => {
return match child.null_count() {
0 => Ok(()),
_ => Err(ArrowError::InvalidArgumentError(format!(
"non-nullable child of type {} contains nulls not present in parent {}",
child.data_type, self.data_type
))),
}
}
};
match child.nulls() {
Some(nulls) if !mask.contains(nulls) => {
Err(ArrowError::InvalidArgumentError(format!(
"non-nullable child of type {} contains nulls not present in parent",
child.data_type
)))
}
Some(nulls) if !mask.contains(nulls) => Err(ArrowError::InvalidArgumentError(format!(
"non-nullable child of type {} contains nulls not present in parent",
child.data_type
))),
_ => Ok(()),
}
}
@@ -1240,9 +1228,7 @@ impl ArrayData {
DataType::Utf8 => self.validate_utf8::<i32>(),
DataType::LargeUtf8 => self.validate_utf8::<i64>(),
DataType::Binary => self.validate_offsets_full::<i32>(self.buffers[1].len()),
DataType::LargeBinary => {
self.validate_offsets_full::<i64>(self.buffers[1].len())
}
DataType::LargeBinary => self.validate_offsets_full::<i64>(self.buffers[1].len()),
DataType::List(_) | DataType::Map(_, _) => {
let child = &self.child_data[0];
self.validate_offsets_full::<i32>(child.len)
@@ -1300,11 +1286,7 @@ impl ArrayData {
///
/// For example, the offsets buffer contained `[1, 2, 4]`, this
/// function would call `validate([1,2])`, and `validate([2,4])`
fn validate_each_offset<T, V>(
&self,
offset_limit: usize,
validate: V,
) -> Result<(), ArrowError>
fn validate_each_offset<T, V>(&self, offset_limit: usize, validate: V) -> Result<(), ArrowError>
where
T: ArrowNativeType + TryInto<usize> + num::Num + std::fmt::Display,
V: Fn(usize, Range<usize>) -> Result<(), ArrowError>,
@@ -1358,32 +1340,26 @@ impl ArrayData {
let values_buffer = &self.buffers[1].as_slice();
if let Ok(values_str) = std::str::from_utf8(values_buffer) {
// Validate Offsets are correct
self.validate_each_offset::<T, _>(
values_buffer.len(),
|string_index, range| {
if !values_str.is_char_boundary(range.start)
|| !values_str.is_char_boundary(range.end)
{
return Err(ArrowError::InvalidArgumentError(format!(
"incomplete utf-8 byte sequence from index {string_index}"
)));
}
Ok(())
},
)
self.validate_each_offset::<T, _>(values_buffer.len(), |string_index, range| {
if !values_str.is_char_boundary(range.start)
|| !values_str.is_char_boundary(range.end)
{
return Err(ArrowError::InvalidArgumentError(format!(
"incomplete utf-8 byte sequence from index {string_index}"
)));
}
Ok(())
})
} else {
// find specific offset that failed utf8 validation
self.validate_each_offset::<T, _>(
values_buffer.len(),
|string_index, range| {
std::str::from_utf8(&values_buffer[range.clone()]).map_err(|e| {
ArrowError::InvalidArgumentError(format!(
"Invalid UTF8 sequence at string index {string_index} ({range:?}): {e}"
))
})?;
Ok(())
},
)
self.validate_each_offset::<T, _>(values_buffer.len(), |string_index, range| {
std::str::from_utf8(&values_buffer[range.clone()]).map_err(|e| {
ArrowError::InvalidArgumentError(format!(
"Invalid UTF8 sequence at string index {string_index} ({range:?}): {e}"
))
})?;
Ok(())
})
}
}
@@ -1414,8 +1390,7 @@ impl ArrayData {
assert!(buffer.len() / mem::size_of::<T>() >= required_len);
// Justification: buffer size was validated above
let indexes: &[T] =
&buffer.typed_data::<T>()[self.offset..self.offset + self.len];
let indexes: &[T] = &buffer.typed_data::<T>()[self.offset..self.offset + self.len];
indexes.iter().enumerate().try_for_each(|(i, &dict_index)| {
// Do not check the value is null (value can be arbitrary)
+372 -375
View File
File diff suppressed because it is too large Load Diff
+5 -6
View File
@@ -78,11 +78,10 @@ pub(super) fn boolean_equal(
// get a ref of the null buffer bytes, to use in testing for nullness
let lhs_nulls = lhs.nulls().unwrap();
BitIndexIterator::new(lhs_nulls.validity(), lhs_start + lhs_nulls.offset(), len)
.all(|i| {
let lhs_pos = lhs_start + lhs.offset() + i;
let rhs_pos = rhs_start + rhs.offset() + i;
get_bit(lhs_values, lhs_pos) == get_bit(rhs_values, rhs_pos)
})
BitIndexIterator::new(lhs_nulls.validity(), lhs_start + lhs_nulls.offset(), len).all(|i| {
let lhs_pos = lhs_start + lhs.offset() + i;
let rhs_pos = rhs_start + rhs.offset() + i;
get_bit(lhs_values, lhs_pos) == get_bit(rhs_values, rhs_pos)
})
}
}
+8 -14
View File
@@ -75,20 +75,15 @@ pub(super) fn fixed_binary_equal(
})
} else {
let lhs_nulls = lhs.nulls().unwrap();
let lhs_slices_iter = BitSliceIterator::new(
lhs_nulls.validity(),
lhs_start + lhs_nulls.offset(),
len,
);
let lhs_slices_iter =
BitSliceIterator::new(lhs_nulls.validity(), lhs_start + lhs_nulls.offset(), len);
let rhs_nulls = rhs.nulls().unwrap();
let rhs_slices_iter = BitSliceIterator::new(
rhs_nulls.validity(),
rhs_start + rhs_nulls.offset(),
len,
);
let rhs_slices_iter =
BitSliceIterator::new(rhs_nulls.validity(), rhs_start + rhs_nulls.offset(), len);
lhs_slices_iter.zip(rhs_slices_iter).all(
|((l_start, l_end), (r_start, r_end))| {
lhs_slices_iter
.zip(rhs_slices_iter)
.all(|((l_start, l_end), (r_start, r_end))| {
l_start == r_start
&& l_end == r_end
&& equal_len(
@@ -98,8 +93,7 @@ pub(super) fn fixed_binary_equal(
(rhs_start + r_start) * size,
(l_end - l_start) * size,
)
},
)
})
}
}
}
+13 -39
View File
@@ -76,24 +76,16 @@ fn equal_values(
DataType::Int64 => primitive_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
DataType::Float32 => primitive_equal::<f32>(lhs, rhs, lhs_start, rhs_start, len),
DataType::Float64 => primitive_equal::<f64>(lhs, rhs, lhs_start, rhs_start, len),
DataType::Decimal128(_, _) => {
primitive_equal::<i128>(lhs, rhs, lhs_start, rhs_start, len)
}
DataType::Decimal256(_, _) => {
primitive_equal::<i256>(lhs, rhs, lhs_start, rhs_start, len)
}
DataType::Date32
| DataType::Time32(_)
| DataType::Interval(IntervalUnit::YearMonth) => {
DataType::Decimal128(_, _) => primitive_equal::<i128>(lhs, rhs, lhs_start, rhs_start, len),
DataType::Decimal256(_, _) => primitive_equal::<i256>(lhs, rhs, lhs_start, rhs_start, len),
DataType::Date32 | DataType::Time32(_) | DataType::Interval(IntervalUnit::YearMonth) => {
primitive_equal::<i32>(lhs, rhs, lhs_start, rhs_start, len)
}
DataType::Date64
| DataType::Interval(IntervalUnit::DayTime)
| DataType::Time64(_)
| DataType::Timestamp(_, _)
| DataType::Duration(_) => {
primitive_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len)
}
| DataType::Duration(_) => primitive_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
DataType::Interval(IntervalUnit::MonthDayNano) => {
primitive_equal::<i128>(lhs, rhs, lhs_start, rhs_start, len)
}
@@ -103,39 +95,21 @@ fn equal_values(
DataType::LargeUtf8 | DataType::LargeBinary => {
variable_sized_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len)
}
DataType::FixedSizeBinary(_) => {
fixed_binary_equal(lhs, rhs, lhs_start, rhs_start, len)
}
DataType::FixedSizeBinary(_) => fixed_binary_equal(lhs, rhs, lhs_start, rhs_start, len),
DataType::List(_) => list_equal::<i32>(lhs, rhs, lhs_start, rhs_start, len),
DataType::LargeList(_) => list_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
DataType::FixedSizeList(_, _) => {
fixed_list_equal(lhs, rhs, lhs_start, rhs_start, len)
}
DataType::FixedSizeList(_, _) => fixed_list_equal(lhs, rhs, lhs_start, rhs_start, len),
DataType::Struct(_) => struct_equal(lhs, rhs, lhs_start, rhs_start, len),
DataType::Union(_, _) => union_equal(lhs, rhs, lhs_start, rhs_start, len),
DataType::Dictionary(data_type, _) => match data_type.as_ref() {
DataType::Int8 => dictionary_equal::<i8>(lhs, rhs, lhs_start, rhs_start, len),
DataType::Int16 => {
dictionary_equal::<i16>(lhs, rhs, lhs_start, rhs_start, len)
}
DataType::Int32 => {
dictionary_equal::<i32>(lhs, rhs, lhs_start, rhs_start, len)
}
DataType::Int64 => {
dictionary_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len)
}
DataType::UInt8 => {
dictionary_equal::<u8>(lhs, rhs, lhs_start, rhs_start, len)
}
DataType::UInt16 => {
dictionary_equal::<u16>(lhs, rhs, lhs_start, rhs_start, len)
}
DataType::UInt32 => {
dictionary_equal::<u32>(lhs, rhs, lhs_start, rhs_start, len)
}
DataType::UInt64 => {
dictionary_equal::<u64>(lhs, rhs, lhs_start, rhs_start, len)
}
DataType::Int16 => dictionary_equal::<i16>(lhs, rhs, lhs_start, rhs_start, len),
DataType::Int32 => dictionary_equal::<i32>(lhs, rhs, lhs_start, rhs_start, len),
DataType::Int64 => dictionary_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
DataType::UInt8 => dictionary_equal::<u8>(lhs, rhs, lhs_start, rhs_start, len),
DataType::UInt16 => dictionary_equal::<u16>(lhs, rhs, lhs_start, rhs_start, len),
DataType::UInt32 => dictionary_equal::<u32>(lhs, rhs, lhs_start, rhs_start, len),
DataType::UInt64 => dictionary_equal::<u64>(lhs, rhs, lhs_start, rhs_start, len),
_ => unreachable!(),
},
DataType::Float16 => primitive_equal::<f16>(lhs, rhs, lhs_start, rhs_start, len),
+8 -14
View File
@@ -73,20 +73,15 @@ pub(super) fn primitive_equal<T>(
})
} else {
let lhs_nulls = lhs.nulls().unwrap();
let lhs_slices_iter = BitSliceIterator::new(
lhs_nulls.validity(),
lhs_start + lhs_nulls.offset(),
len,
);
let lhs_slices_iter =
BitSliceIterator::new(lhs_nulls.validity(), lhs_start + lhs_nulls.offset(), len);
let rhs_nulls = rhs.nulls().unwrap();
let rhs_slices_iter = BitSliceIterator::new(
rhs_nulls.validity(),
rhs_start + rhs_nulls.offset(),
len,
);
let rhs_slices_iter =
BitSliceIterator::new(rhs_nulls.validity(), rhs_start + rhs_nulls.offset(), len);
lhs_slices_iter.zip(rhs_slices_iter).all(
|((l_start, l_end), (r_start, r_end))| {
lhs_slices_iter
.zip(rhs_slices_iter)
.all(|((l_start, l_end), (r_start, r_end))| {
l_start == r_start
&& l_end == r_end
&& equal_len(
@@ -96,8 +91,7 @@ pub(super) fn primitive_equal<T>(
(rhs_start + r_start) * byte_width,
(l_end - l_start) * byte_width,
)
},
)
})
}
}
}
+1 -4
View File
@@ -116,10 +116,7 @@ pub(super) fn union_equal(
rhs_fields,
)
}
(
DataType::Union(_, UnionMode::Sparse),
DataType::Union(_, UnionMode::Sparse),
) => {
(DataType::Union(_, UnionMode::Sparse), DataType::Union(_, UnionMode::Sparse)) => {
lhs_type_id_range == rhs_type_id_range
&& equal_sparse(lhs, rhs, lhs_start, rhs_start, len)
}
+2 -4
View File
@@ -73,11 +73,9 @@ pub(super) fn base_equal(lhs: &ArrayData, rhs: &ArrayData) -> bool {
let r_value_field = r_fields.get(1).unwrap();
// We don't enforce the equality of field names
let data_type_equal = l_key_field.data_type()
== r_key_field.data_type()
let data_type_equal = l_key_field.data_type() == r_key_field.data_type()
&& l_value_field.data_type() == r_value_field.data_type();
let nullability_equal = l_key_field.is_nullable()
== r_key_field.is_nullable()
let nullability_equal = l_key_field.is_nullable() == r_key_field.is_nullable()
&& l_value_field.is_nullable() == r_value_field.is_nullable();
let metadata_equal = l_key_field.metadata() == r_key_field.metadata()
&& l_value_field.metadata() == r_value_field.metadata();
+3 -12
View File
@@ -23,9 +23,7 @@ use crate::ArrayData;
use arrow_buffer::ArrowNativeType;
use num::{CheckedAdd, Integer};
pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(
array: &ArrayData,
) -> Extend {
pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(array: &ArrayData) -> Extend {
let offsets = array.buffer::<T>(0);
Box::new(
move |mutable: &mut _MutableArrayData, index: usize, start: usize, len: usize| {
@@ -35,11 +33,7 @@ pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(
let last_offset: T = unsafe { get_last_offset(offset_buffer) };
// offsets
extend_offsets::<T>(
offset_buffer,
last_offset,
&offsets[start..start + len + 1],
);
extend_offsets::<T>(offset_buffer, last_offset, &offsets[start..start + len + 1]);
mutable.child_data[0].extend(
index,
@@ -50,10 +44,7 @@ pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(
)
}
pub(super) fn extend_nulls<T: ArrowNativeType>(
mutable: &mut _MutableArrayData,
len: usize,
) {
pub(super) fn extend_nulls<T: ArrowNativeType>(mutable: &mut _MutableArrayData, len: usize) {
let offset_buffer = &mut mutable.buffer1;
// this is safe due to how offset is built. See details on `get_last_offset`
+20 -40
View File
@@ -173,11 +173,7 @@ impl<'a> std::fmt::Debug for MutableArrayData<'a> {
/// Builds an extend that adds `offset` to the source primitive
/// Additionally validates that `max` fits into the
/// the underlying primitive returning None if not
fn build_extend_dictionary(
array: &ArrayData,
offset: usize,
max: usize,
) -> Option<Extend> {
fn build_extend_dictionary(array: &ArrayData, offset: usize, max: usize) -> Option<Extend> {
macro_rules! validate_and_build {
($dt: ty) => {{
let _: $dt = max.try_into().ok()?;
@@ -215,27 +211,19 @@ fn build_extend(array: &ArrayData) -> Extend {
DataType::Int64 => primitive::build_extend::<i64>(array),
DataType::Float32 => primitive::build_extend::<f32>(array),
DataType::Float64 => primitive::build_extend::<f64>(array),
DataType::Date32
| DataType::Time32(_)
| DataType::Interval(IntervalUnit::YearMonth) => {
DataType::Date32 | DataType::Time32(_) | DataType::Interval(IntervalUnit::YearMonth) => {
primitive::build_extend::<i32>(array)
}
DataType::Date64
| DataType::Time64(_)
| DataType::Timestamp(_, _)
| DataType::Duration(_)
| DataType::Interval(IntervalUnit::DayTime) => {
primitive::build_extend::<i64>(array)
}
DataType::Interval(IntervalUnit::MonthDayNano) => {
primitive::build_extend::<i128>(array)
}
| DataType::Interval(IntervalUnit::DayTime) => primitive::build_extend::<i64>(array),
DataType::Interval(IntervalUnit::MonthDayNano) => primitive::build_extend::<i128>(array),
DataType::Decimal128(_, _) => primitive::build_extend::<i128>(array),
DataType::Decimal256(_, _) => primitive::build_extend::<i256>(array),
DataType::Utf8 | DataType::Binary => variable_size::build_extend::<i32>(array),
DataType::LargeUtf8 | DataType::LargeBinary => {
variable_size::build_extend::<i64>(array)
}
DataType::LargeUtf8 | DataType::LargeBinary => variable_size::build_extend::<i64>(array),
DataType::Map(_, _) | DataType::List(_) => list::build_extend::<i32>(array),
DataType::LargeList(_) => list::build_extend::<i64>(array),
DataType::Dictionary(_, _) => unreachable!("should use build_extend_dictionary"),
@@ -265,9 +253,9 @@ fn build_extend_nulls(data_type: &DataType) -> ExtendNulls {
DataType::Int64 => primitive::extend_nulls::<i64>,
DataType::Float32 => primitive::extend_nulls::<f32>,
DataType::Float64 => primitive::extend_nulls::<f64>,
DataType::Date32
| DataType::Time32(_)
| DataType::Interval(IntervalUnit::YearMonth) => primitive::extend_nulls::<i32>,
DataType::Date32 | DataType::Time32(_) | DataType::Interval(IntervalUnit::YearMonth) => {
primitive::extend_nulls::<i32>
}
DataType::Date64
| DataType::Time64(_)
| DataType::Timestamp(_, _)
@@ -380,10 +368,7 @@ impl<'a> MutableArrayData<'a> {
array_capacity = *capacity;
preallocate_offset_and_binary_buffer::<i64>(*capacity, *value_cap)
}
(
DataType::Utf8 | DataType::Binary,
Capacities::Binary(capacity, Some(value_cap)),
) => {
(DataType::Utf8 | DataType::Binary, Capacities::Binary(capacity, Some(value_cap))) => {
array_capacity = *capacity;
preallocate_offset_and_binary_buffer::<i32>(*capacity, *value_cap)
}
@@ -391,10 +376,7 @@ impl<'a> MutableArrayData<'a> {
array_capacity = *capacity;
new_buffers(data_type, *capacity)
}
(
DataType::List(_) | DataType::LargeList(_),
Capacities::List(capacity, _),
) => {
(DataType::List(_) | DataType::LargeList(_), Capacities::List(capacity, _)) => {
array_capacity = *capacity;
new_buffers(data_type, *capacity)
}
@@ -435,16 +417,15 @@ impl<'a> MutableArrayData<'a> {
.map(|array| &array.child_data()[0])
.collect::<Vec<_>>();
let capacities = if let Capacities::List(capacity, ref child_capacities) =
capacities
{
child_capacities
.clone()
.map(|c| *c)
.unwrap_or(Capacities::Array(capacity))
} else {
Capacities::Array(array_capacity)
};
let capacities =
if let Capacities::List(capacity, ref child_capacities) = capacities {
child_capacities
.clone()
.map(|c| *c)
.unwrap_or(Capacities::Array(capacity))
} else {
Capacities::Array(array_capacity)
};
vec![MutableArrayData::with_capacities(
children, use_nulls, capacities,
@@ -546,8 +527,7 @@ impl<'a> MutableArrayData<'a> {
.collect();
let capacity = lengths.iter().sum();
let mut mutable =
MutableArrayData::new(dictionaries, false, capacity);
let mut mutable = MutableArrayData::new(dictionaries, false, capacity);
for (i, len) in lengths.iter().enumerate() {
mutable.extend(i, 0, *len)
+1 -4
View File
@@ -47,9 +47,6 @@ where
)
}
pub(super) fn extend_nulls<T: ArrowNativeType>(
mutable: &mut _MutableArrayData,
len: usize,
) {
pub(super) fn extend_nulls<T: ArrowNativeType>(mutable: &mut _MutableArrayData, len: usize) {
mutable.buffer1.extend_zeros(len * size_of::<T>());
}
+1 -3
View File
@@ -45,9 +45,7 @@ pub(super) fn extend_offsets<T: ArrowNativeType + Integer + CheckedAdd>(
}
#[inline]
pub(super) unsafe fn get_last_offset<T: ArrowNativeType>(
offset_buffer: &MutableBuffer,
) -> T {
pub(super) unsafe fn get_last_offset<T: ArrowNativeType>(offset_buffer: &MutableBuffer) -> T {
// JUSTIFICATION
// Benefit
// 20% performance improvement extend of variable sized arrays (see bench `mutable_array`)
+3 -12
View File
@@ -39,9 +39,7 @@ fn extend_offset_values<T: ArrowNativeType + AsPrimitive<usize>>(
buffer.extend_from_slice(new_values);
}
pub(super) fn build_extend<
T: ArrowNativeType + Integer + CheckedAdd + AsPrimitive<usize>,
>(
pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd + AsPrimitive<usize>>(
array: &ArrayData,
) -> Extend {
let offsets = array.buffer::<T>(0);
@@ -54,21 +52,14 @@ pub(super) fn build_extend<
// this is safe due to how offset is built. See details on `get_last_offset`
let last_offset = unsafe { get_last_offset(offset_buffer) };
extend_offsets::<T>(
offset_buffer,
last_offset,
&offsets[start..start + len + 1],
);
extend_offsets::<T>(offset_buffer, last_offset, &offsets[start..start + len + 1]);
// values
extend_offset_values::<T>(values_buffer, offsets, values, start, len);
},
)
}
pub(super) fn extend_nulls<T: ArrowNativeType>(
mutable: &mut _MutableArrayData,
len: usize,
) {
pub(super) fn extend_nulls<T: ArrowNativeType>(mutable: &mut _MutableArrayData, len: usize) {
let offset_buffer = &mut mutable.buffer1;
// this is safe due to how offset is built. See details on `get_last_offset`
+17 -23
View File
@@ -32,28 +32,26 @@ use arrow_array::builder::StringBuilder;
use arrow_array::{ArrayRef, RecordBatch};
use arrow_flight::encode::FlightDataEncoderBuilder;
use arrow_flight::sql::metadata::{
SqlInfoData, SqlInfoDataBuilder, XdbcTypeInfo, XdbcTypeInfoData,
XdbcTypeInfoDataBuilder,
SqlInfoData, SqlInfoDataBuilder, XdbcTypeInfo, XdbcTypeInfoData, XdbcTypeInfoDataBuilder,
};
use arrow_flight::sql::{
server::FlightSqlService, ActionBeginSavepointRequest, ActionBeginSavepointResult,
ActionBeginTransactionRequest, ActionBeginTransactionResult,
ActionCancelQueryRequest, ActionCancelQueryResult,
ActionClosePreparedStatementRequest, ActionCreatePreparedStatementRequest,
ActionCreatePreparedStatementResult, ActionCreatePreparedSubstraitPlanRequest,
ActionEndSavepointRequest, ActionEndTransactionRequest, Any, CommandGetCatalogs,
CommandGetCrossReference, CommandGetDbSchemas, CommandGetExportedKeys,
CommandGetImportedKeys, CommandGetPrimaryKeys, CommandGetSqlInfo,
CommandGetTableTypes, CommandGetTables, CommandGetXdbcTypeInfo,
ActionBeginTransactionRequest, ActionBeginTransactionResult, ActionCancelQueryRequest,
ActionCancelQueryResult, ActionClosePreparedStatementRequest,
ActionCreatePreparedStatementRequest, ActionCreatePreparedStatementResult,
ActionCreatePreparedSubstraitPlanRequest, ActionEndSavepointRequest,
ActionEndTransactionRequest, Any, CommandGetCatalogs, CommandGetCrossReference,
CommandGetDbSchemas, CommandGetExportedKeys, CommandGetImportedKeys, CommandGetPrimaryKeys,
CommandGetSqlInfo, CommandGetTableTypes, CommandGetTables, CommandGetXdbcTypeInfo,
CommandPreparedStatementQuery, CommandPreparedStatementUpdate, CommandStatementQuery,
CommandStatementSubstraitPlan, CommandStatementUpdate, Nullable, ProstMessageExt,
Searchable, SqlInfo, TicketStatementQuery, XdbcDataType,
CommandStatementSubstraitPlan, CommandStatementUpdate, Nullable, ProstMessageExt, Searchable,
SqlInfo, TicketStatementQuery, XdbcDataType,
};
use arrow_flight::utils::batches_to_flight_data;
use arrow_flight::{
flight_service_server::FlightService, flight_service_server::FlightServiceServer,
Action, FlightData, FlightDescriptor, FlightEndpoint, FlightInfo, HandshakeRequest,
HandshakeResponse, IpcMessage, Location, SchemaAsIpc, Ticket,
flight_service_server::FlightService, flight_service_server::FlightServiceServer, Action,
FlightData, FlightDescriptor, FlightEndpoint, FlightInfo, HandshakeRequest, HandshakeResponse,
IpcMessage, Location, SchemaAsIpc, Ticket,
};
use arrow_ipc::writer::IpcWriteOptions;
use arrow_schema::{ArrowError, DataType, Field, Schema};
@@ -167,8 +165,7 @@ impl FlightSqlService for FlightSqlServiceImpl {
let bytes = BASE64_STANDARD
.decode(base64)
.map_err(|e| status!("authorization not decodable", e))?;
let str = String::from_utf8(bytes)
.map_err(|e| status!("authorization not parsable", e))?;
let str = String::from_utf8(bytes).map_err(|e| status!("authorization not parsable", e))?;
let parts: Vec<_> = str.split(':').collect();
let (user, pass) = match parts.as_slice() {
[user, pass] => (user, pass),
@@ -195,8 +192,7 @@ impl FlightSqlService for FlightSqlServiceImpl {
_message: Any,
) -> Result<Response<<Self as FlightService>::DoGetStream>, Status> {
self.check_token(&request)?;
let batch =
Self::fake_result().map_err(|e| status!("Could not fake a result", e))?;
let batch = Self::fake_result().map_err(|e| status!("Could not fake a result", e))?;
let schema = batch.schema();
let batches = vec![batch];
let flight_data = batches_to_flight_data(schema.as_ref(), batches)
@@ -238,8 +234,7 @@ impl FlightSqlService for FlightSqlServiceImpl {
self.check_token(&request)?;
let handle = std::str::from_utf8(&cmd.prepared_statement_handle)
.map_err(|e| status!("Unable to parse handle", e))?;
let batch =
Self::fake_result().map_err(|e| status!("Could not fake a result", e))?;
let batch = Self::fake_result().map_err(|e| status!("Could not fake a result", e))?;
let schema = (*batch.schema()).clone();
let num_rows = batch.num_rows();
let num_bytes = batch.get_array_memory_size();
@@ -736,8 +731,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
if std::env::var("USE_TLS").ok().is_some() {
let cert = std::fs::read_to_string("arrow-flight/examples/data/server.pem")?;
let key = std::fs::read_to_string("arrow-flight/examples/data/server.key")?;
let client_ca =
std::fs::read_to_string("arrow-flight/examples/data/client_ca.pem")?;
let client_ca = std::fs::read_to_string("arrow-flight/examples/data/client_ca.pem")?;
let tls_config = ServerTlsConfig::new()
.identity(Identity::from_pem(&cert, &key))
+3 -3
View File
@@ -20,9 +20,9 @@ use tonic::transport::Server;
use tonic::{Request, Response, Status, Streaming};
use arrow_flight::{
flight_service_server::FlightService, flight_service_server::FlightServiceServer,
Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo,
HandshakeRequest, HandshakeResponse, PutResult, SchemaResult, Ticket,
flight_service_server::FlightService, flight_service_server::FlightServiceServer, Action,
ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo, HandshakeRequest,
HandshakeResponse, PutResult, SchemaResult, Ticket,
};
#[derive(Clone)]
+4 -15
View File
@@ -249,10 +249,7 @@ impl FlightClient {
/// .expect("error fetching data");
/// # }
/// ```
pub async fn get_flight_info(
&mut self,
descriptor: FlightDescriptor,
) -> Result<FlightInfo> {
pub async fn get_flight_info(&mut self, descriptor: FlightDescriptor) -> Result<FlightInfo> {
let request = self.make_request(descriptor);
let response = self.inner.get_flight_info(request).await?.into_inner();
@@ -452,10 +449,7 @@ impl FlightClient {
/// .expect("error making request");
/// # }
/// ```
pub async fn get_schema(
&mut self,
flight_descriptor: FlightDescriptor,
) -> Result<Schema> {
pub async fn get_schema(&mut self, flight_descriptor: FlightDescriptor) -> Result<Schema> {
let request = self.make_request(flight_descriptor);
let schema_result = self.inner.get_schema(request).await?.into_inner();
@@ -488,9 +482,7 @@ impl FlightClient {
/// .expect("error gathering actions");
/// # }
/// ```
pub async fn list_actions(
&mut self,
) -> Result<BoxStream<'static, Result<ActionType>>> {
pub async fn list_actions(&mut self) -> Result<BoxStream<'static, Result<ActionType>>> {
let request = self.make_request(Empty {});
let action_stream = self
@@ -528,10 +520,7 @@ impl FlightClient {
/// .expect("error gathering action results");
/// # }
/// ```
pub async fn do_action(
&mut self,
action: Action,
) -> Result<BoxStream<'static, Result<Bytes>>> {
pub async fn do_action(&mut self, action: Action) -> Result<BoxStream<'static, Result<Bytes>>> {
let request = self.make_request(action);
let result_stream = self
+12 -21
View File
@@ -21,9 +21,7 @@ use arrow_buffer::Buffer;
use arrow_schema::{Schema, SchemaRef};
use bytes::Bytes;
use futures::{ready, stream::BoxStream, Stream, StreamExt};
use std::{
collections::HashMap, convert::TryFrom, fmt::Debug, pin::Pin, sync::Arc, task::Poll,
};
use std::{collections::HashMap, convert::TryFrom, fmt::Debug, pin::Pin, sync::Arc, task::Poll};
use tonic::metadata::MetadataMap;
use crate::error::{FlightError, Result};
@@ -270,16 +268,14 @@ impl FlightDataDecoder {
/// state as necessary.
fn extract_message(&mut self, data: FlightData) -> Result<Option<DecodedFlightData>> {
use arrow_ipc::MessageHeader;
let message = arrow_ipc::root_as_message(&data.data_header[..]).map_err(|e| {
FlightError::DecodeError(format!("Error decoding root message: {e}"))
})?;
let message = arrow_ipc::root_as_message(&data.data_header[..])
.map_err(|e| FlightError::DecodeError(format!("Error decoding root message: {e}")))?;
match message.header_type() {
MessageHeader::NONE => Ok(Some(DecodedFlightData::new_none(data))),
MessageHeader::Schema => {
let schema = Schema::try_from(&data).map_err(|e| {
FlightError::DecodeError(format!("Error decoding schema: {e}"))
})?;
let schema = Schema::try_from(&data)
.map_err(|e| FlightError::DecodeError(format!("Error decoding schema: {e}")))?;
let schema = Arc::new(schema);
let dictionaries_by_field = HashMap::new();
@@ -300,12 +296,11 @@ impl FlightDataDecoder {
};
let buffer = Buffer::from_bytes(data.data_body.into());
let dictionary_batch =
message.header_as_dictionary_batch().ok_or_else(|| {
FlightError::protocol(
"Could not get dictionary batch from DictionaryBatch message",
)
})?;
let dictionary_batch = message.header_as_dictionary_batch().ok_or_else(|| {
FlightError::protocol(
"Could not get dictionary batch from DictionaryBatch message",
)
})?;
arrow_ipc::reader::read_dictionary(
&buffer,
@@ -315,9 +310,7 @@ impl FlightDataDecoder {
&message.version(),
)
.map_err(|e| {
FlightError::DecodeError(format!(
"Error decoding ipc dictionary: {e}"
))
FlightError::DecodeError(format!("Error decoding ipc dictionary: {e}"))
})?;
// Updated internal state, but no decoded message
@@ -338,9 +331,7 @@ impl FlightDataDecoder {
&state.dictionaries_by_field,
)
.map_err(|e| {
FlightError::DecodeError(format!(
"Error decoding ipc RecordBatch: {e}"
))
FlightError::DecodeError(format!("Error decoding ipc RecordBatch: {e}"))
})?;
Ok(Some(DecodedFlightData::new_record_batch(data, batch)))
+42 -84
View File
@@ -159,10 +159,7 @@ impl FlightDataEncoderBuilder {
}
/// Set [`DictionaryHandling`] for encoder
pub fn with_dictionary_handling(
mut self,
dictionary_handling: DictionaryHandling,
) -> Self {
pub fn with_dictionary_handling(mut self, dictionary_handling: DictionaryHandling) -> Self {
self.dictionary_handling = dictionary_handling;
self
}
@@ -191,10 +188,7 @@ impl FlightDataEncoderBuilder {
}
/// Specify a flight descriptor in the first FlightData message.
pub fn with_flight_descriptor(
mut self,
descriptor: Option<FlightDescriptor>,
) -> Self {
pub fn with_flight_descriptor(mut self, descriptor: Option<FlightDescriptor>) -> Self {
self.descriptor = descriptor;
self
}
@@ -334,8 +328,7 @@ impl FlightDataEncoder {
let batch = prepare_batch_for_flight(&batch, schema, send_dictionaries)?;
for batch in split_batch_for_grpc_response(batch, self.max_flight_data_size) {
let (flight_dictionaries, flight_batch) =
self.encoder.encode_batch(&batch)?;
let (flight_dictionaries, flight_batch) = self.encoder.encode_batch(&batch)?;
self.queue_messages(flight_dictionaries);
self.queue_message(flight_batch);
@@ -460,9 +453,8 @@ fn split_batch_for_grpc_response(
.map(|col| col.get_buffer_memory_size())
.sum::<usize>();
let n_batches = (size / max_flight_data_size
+ usize::from(size % max_flight_data_size != 0))
.max(1);
let n_batches =
(size / max_flight_data_size + usize::from(size % max_flight_data_size != 0)).max(1);
let rows_per_batch = (batch.num_rows() / n_batches).max(1);
let mut out = Vec::with_capacity(n_batches + 1);
@@ -505,18 +497,12 @@ impl FlightIpcEncoder {
/// Convert a `RecordBatch` to a Vec of `FlightData` representing
/// dictionaries and a `FlightData` representing the batch
fn encode_batch(
&mut self,
batch: &RecordBatch,
) -> Result<(Vec<FlightData>, FlightData)> {
let (encoded_dictionaries, encoded_batch) = self.data_gen.encoded_batch(
batch,
&mut self.dictionary_tracker,
&self.options,
)?;
fn encode_batch(&mut self, batch: &RecordBatch) -> Result<(Vec<FlightData>, FlightData)> {
let (encoded_dictionaries, encoded_batch) =
self.data_gen
.encoded_batch(batch, &mut self.dictionary_tracker, &self.options)?;
let flight_dictionaries =
encoded_dictionaries.into_iter().map(Into::into).collect();
let flight_dictionaries = encoded_dictionaries.into_iter().map(Into::into).collect();
let flight_batch = encoded_batch.into();
Ok((flight_dictionaries, flight_batch))
@@ -553,9 +539,7 @@ fn prepare_batch_for_flight(
/// but does enable sending DictionaryArray's via Flight.
fn hydrate_dictionary(array: &ArrayRef, send_dictionaries: bool) -> Result<ArrayRef> {
let arr = match array.data_type() {
DataType::Dictionary(_, value) if !send_dictionaries => {
arrow_cast::cast(array, value)?
}
DataType::Dictionary(_, value) if !send_dictionaries => arrow_cast::cast(array, value)?,
_ => Arc::clone(array),
};
Ok(arr)
@@ -586,11 +570,9 @@ mod tests {
let (_, baseline_flight_batch) = make_flight_data(&batch, &options);
let big_batch = batch.slice(0, batch.num_rows() - 1);
let optimized_big_batch =
prepare_batch_for_flight(&big_batch, Arc::clone(&schema), false)
.expect("failed to optimize");
let (_, optimized_big_flight_batch) =
make_flight_data(&optimized_big_batch, &options);
let optimized_big_batch = prepare_batch_for_flight(&big_batch, Arc::clone(&schema), false)
.expect("failed to optimize");
let (_, optimized_big_flight_batch) = make_flight_data(&optimized_big_batch, &options);
assert_eq!(
baseline_flight_batch.data_body.len(),
@@ -601,12 +583,10 @@ mod tests {
let optimized_small_batch =
prepare_batch_for_flight(&small_batch, Arc::clone(&schema), false)
.expect("failed to optimize");
let (_, optimized_small_flight_batch) =
make_flight_data(&optimized_small_batch, &options);
let (_, optimized_small_flight_batch) = make_flight_data(&optimized_small_batch, &options);
assert!(
baseline_flight_batch.data_body.len()
> optimized_small_flight_batch.data_body.len()
baseline_flight_batch.data_body.len() > optimized_small_flight_batch.data_body.len()
);
}
@@ -620,11 +600,10 @@ mod tests {
false,
)]));
let batch = RecordBatch::try_new(schema, vec![Arc::new(arr)]).unwrap();
let encoder = FlightDataEncoderBuilder::default()
.build(futures::stream::once(async { Ok(batch) }));
let encoder =
FlightDataEncoderBuilder::default().build(futures::stream::once(async { Ok(batch) }));
let mut decoder = FlightDataDecoder::new(encoder);
let expected_schema =
Schema::new(vec![Field::new("dict", DataType::Utf8, false)]);
let expected_schema = Schema::new(vec![Field::new("dict", DataType::Utf8, false)]);
let expected_schema = Arc::new(expected_schema);
while let Some(decoded) = decoder.next().await {
let decoded = decoded.unwrap();
@@ -656,10 +635,8 @@ mod tests {
Arc::new(vec!["a", "a", "b"].into_iter().collect());
let arr_two: Arc<DictionaryArray<UInt16Type>> =
Arc::new(vec!["b", "a", "c"].into_iter().collect());
let batch_one =
RecordBatch::try_new(schema.clone(), vec![arr_one.clone()]).unwrap();
let batch_two =
RecordBatch::try_new(schema.clone(), vec![arr_two.clone()]).unwrap();
let batch_one = RecordBatch::try_new(schema.clone(), vec![arr_one.clone()]).unwrap();
let batch_two = RecordBatch::try_new(schema.clone(), vec![arr_two.clone()]).unwrap();
let encoder = FlightDataEncoderBuilder::default()
.with_dictionary_handling(DictionaryHandling::Resend)
@@ -675,10 +652,9 @@ mod tests {
DecodedPayload::RecordBatch(b) => {
assert_eq!(b.schema(), schema);
let actual_array =
Arc::new(downcast_array::<DictionaryArray<UInt16Type>>(
b.column_by_name("dict").unwrap(),
));
let actual_array = Arc::new(downcast_array::<DictionaryArray<UInt16Type>>(
b.column_by_name("dict").unwrap(),
));
assert_eq!(actual_array, expected_array);
@@ -690,10 +666,9 @@ mod tests {
#[test]
fn test_schema_metadata_encoded() {
let schema =
Schema::new(vec![Field::new("data", DataType::Int32, false)]).with_metadata(
HashMap::from([("some_key".to_owned(), "some_value".to_owned())]),
);
let schema = Schema::new(vec![Field::new("data", DataType::Int32, false)]).with_metadata(
HashMap::from([("some_key".to_owned(), "some_value".to_owned())]),
);
let got = prepare_schema_for_flight(&schema, false);
assert!(got.metadata().contains_key("some_key"));
@@ -708,8 +683,7 @@ mod tests {
)
.expect("cannot create record batch");
prepare_batch_for_flight(&batch, batch.schema(), false)
.expect("failed to optimize");
prepare_batch_for_flight(&batch, batch.schema(), false).expect("failed to optimize");
}
pub fn make_flight_data(
@@ -723,8 +697,7 @@ mod tests {
.encoded_batch(batch, &mut dictionary_tracker, options)
.expect("DictionaryTracker configured above to not error on replacement");
let flight_dictionaries =
encoded_dictionaries.into_iter().map(Into::into).collect();
let flight_dictionaries = encoded_dictionaries.into_iter().map(Into::into).collect();
let flight_batch = encoded_batch.into();
(flight_dictionaries, flight_batch)
@@ -745,8 +718,7 @@ mod tests {
// split once
let n_rows = max_flight_data_size + 1;
assert!(n_rows % 2 == 1, "should be an odd number");
let c =
UInt8Array::from((0..n_rows).map(|i| (i % 256) as u8).collect::<Vec<_>>());
let c = UInt8Array::from((0..n_rows).map(|i| (i % 256) as u8).collect::<Vec<_>>());
let batch = RecordBatch::try_from_iter(vec![("a", Arc::new(c) as ArrayRef)])
.expect("cannot create record batch");
let split = split_batch_for_grpc_response(batch.clone(), max_flight_data_size);
@@ -793,8 +765,7 @@ mod tests {
let input_rows = batch.num_rows();
let split =
split_batch_for_grpc_response(batch.clone(), max_flight_data_size_bytes);
let split = split_batch_for_grpc_response(batch.clone(), max_flight_data_size_bytes);
let sizes: Vec<_> = split.iter().map(|batch| batch.num_rows()).collect();
let output_rows: usize = sizes.iter().sum();
@@ -807,8 +778,7 @@ mod tests {
#[tokio::test]
async fn flight_data_size_even() {
let s1 =
StringArray::from_iter_values(std::iter::repeat(".10 bytes.").take(1024));
let s1 = StringArray::from_iter_values(std::iter::repeat(".10 bytes.").take(1024));
let i1 = Int16Array::from_iter_values(0..1024);
let s2 = StringArray::from_iter_values(std::iter::repeat("6bytes").take(1024));
let i2 = Int64Array::from_iter_values(0..1024);
@@ -828,8 +798,7 @@ mod tests {
async fn flight_data_size_uneven_variable_lengths() {
// each row has a longer string than the last with increasing lengths 0 --> 1024
let array = StringArray::from_iter_values((0..1024).map(|i| "*".repeat(i)));
let batch =
RecordBatch::try_from_iter(vec![("data", Arc::new(array) as _)]).unwrap();
let batch = RecordBatch::try_from_iter(vec![("data", Arc::new(array) as _)]).unwrap();
// overage is much higher than ideal
// https://github.com/apache/arrow-rs/issues/3478
@@ -883,8 +852,7 @@ mod tests {
})
.collect();
let batch =
RecordBatch::try_from_iter(vec![("a1", Arc::new(array) as _)]).unwrap();
let batch = RecordBatch::try_from_iter(vec![("a1", Arc::new(array) as _)]).unwrap();
verify_encoded_split(batch, 160).await;
}
@@ -894,11 +862,9 @@ mod tests {
// large dictionary (all distinct values ==> 1024 entries in dictionary)
let values: Vec<_> = (1..1024).map(|i| "**".repeat(i)).collect();
let array: DictionaryArray<Int32Type> =
values.iter().map(|s| Some(s.as_str())).collect();
let array: DictionaryArray<Int32Type> = values.iter().map(|s| Some(s.as_str())).collect();
let batch =
RecordBatch::try_from_iter(vec![("a1", Arc::new(array) as _)]).unwrap();
let batch = RecordBatch::try_from_iter(vec![("a1", Arc::new(array) as _)]).unwrap();
// overage is much higher than ideal
// https://github.com/apache/arrow-rs/issues/3478
@@ -912,8 +878,7 @@ mod tests {
let keys = Int32Array::from_iter_values((0..3000).map(|i| (3000 - i) % 1024));
let array = DictionaryArray::new(keys, Arc::new(values));
let batch =
RecordBatch::try_from_iter(vec![("a1", Arc::new(array) as _)]).unwrap();
let batch = RecordBatch::try_from_iter(vec![("a1", Arc::new(array) as _)]).unwrap();
// overage is much higher than ideal
// https://github.com/apache/arrow-rs/issues/3478
@@ -929,12 +894,9 @@ mod tests {
// medium cardinality
let values3: Vec<_> = (1..1024).map(|i| "**".repeat(i % 100)).collect();
let array1: DictionaryArray<Int32Type> =
values1.iter().map(|s| Some(s.as_str())).collect();
let array2: DictionaryArray<Int32Type> =
values2.iter().map(|s| Some(s.as_str())).collect();
let array3: DictionaryArray<Int32Type> =
values3.iter().map(|s| Some(s.as_str())).collect();
let array1: DictionaryArray<Int32Type> = values1.iter().map(|s| Some(s.as_str())).collect();
let array2: DictionaryArray<Int32Type> = values2.iter().map(|s| Some(s.as_str())).collect();
let array3: DictionaryArray<Int32Type> = values3.iter().map(|s| Some(s.as_str())).collect();
let batch = RecordBatch::try_from_iter(vec![
("a1", Arc::new(array1) as _),
@@ -954,17 +916,13 @@ mod tests {
.flight_descriptor
.as_ref()
.map(|descriptor| {
let path_len: usize =
descriptor.path.iter().map(|p| p.as_bytes().len()).sum();
let path_len: usize = descriptor.path.iter().map(|p| p.as_bytes().len()).sum();
std::mem::size_of_val(descriptor) + descriptor.cmd.len() + path_len
})
.unwrap_or(0);
flight_descriptor_size
+ d.app_metadata.len()
+ d.data_body.len()
+ d.data_header.len()
flight_descriptor_size + d.app_metadata.len() + d.data_body.len() + d.data_header.len()
}
/// Coverage for <https://github.com/apache/arrow-rs/issues/3478>
+1 -4
View File
@@ -133,10 +133,7 @@ pub struct IpcMessage(pub Bytes);
// Useful conversion functions
fn flight_schema_as_encoded_data(
arrow_schema: &Schema,
options: &IpcWriteOptions,
) -> EncodedData {
fn flight_schema_as_encoded_data(arrow_schema: &Schema, options: &IpcWriteOptions) -> EncodedData {
let data_gen = writer::IpcDataGenerator::default();
data_gen.schema_to_bytes(arrow_schema, options)
}
+29 -48
View File
@@ -31,17 +31,16 @@ use crate::flight_service_client::FlightServiceClient;
use crate::sql::server::{CLOSE_PREPARED_STATEMENT, CREATE_PREPARED_STATEMENT};
use crate::sql::{
ActionClosePreparedStatementRequest, ActionCreatePreparedStatementRequest,
ActionCreatePreparedStatementResult, Any, CommandGetCatalogs,
CommandGetCrossReference, CommandGetDbSchemas, CommandGetExportedKeys,
CommandGetImportedKeys, CommandGetPrimaryKeys, CommandGetSqlInfo,
CommandGetTableTypes, CommandGetTables, CommandGetXdbcTypeInfo,
ActionCreatePreparedStatementResult, Any, CommandGetCatalogs, CommandGetCrossReference,
CommandGetDbSchemas, CommandGetExportedKeys, CommandGetImportedKeys, CommandGetPrimaryKeys,
CommandGetSqlInfo, CommandGetTableTypes, CommandGetTables, CommandGetXdbcTypeInfo,
CommandPreparedStatementQuery, CommandPreparedStatementUpdate, CommandStatementQuery,
CommandStatementUpdate, DoPutUpdateResult, ProstMessageExt, SqlInfo,
};
use crate::trailers::extract_lazy_trailers;
use crate::{
Action, FlightData, FlightDescriptor, FlightInfo, HandshakeRequest,
HandshakeResponse, IpcMessage, PutResult, Ticket,
Action, FlightData, FlightDescriptor, FlightInfo, HandshakeRequest, HandshakeResponse,
IpcMessage, PutResult, Ticket,
};
use arrow_array::RecordBatch;
use arrow_buffer::Buffer;
@@ -134,11 +133,7 @@ impl FlightSqlServiceClient<Channel> {
/// Perform a `handshake` with the server, passing credentials and establishing a session
/// Returns arbitrary auth/handshake info binary blob
pub async fn handshake(
&mut self,
username: &str,
password: &str,
) -> Result<Bytes, ArrowError> {
pub async fn handshake(&mut self, username: &str, password: &str) -> Result<Bytes, ArrowError> {
let cmd = HandshakeRequest {
protocol_version: 0,
payload: Default::default(),
@@ -156,9 +151,9 @@ impl FlightSqlServiceClient<Channel> {
.await
.map_err(|e| ArrowError::IpcError(format!("Can't handshake {e}")))?;
if let Some(auth) = resp.metadata().get("authorization") {
let auth = auth.to_str().map_err(|_| {
ArrowError::ParseError("Can't read auth header".to_string())
})?;
let auth = auth
.to_str()
.map_err(|_| ArrowError::ParseError("Can't read auth header".to_string()))?;
let bearer = "Bearer ";
if !auth.starts_with(bearer) {
Err(ArrowError::ParseError("Invalid auth header!".to_string()))?;
@@ -166,10 +161,11 @@ impl FlightSqlServiceClient<Channel> {
let auth = auth[bearer.len()..].to_string();
self.token = Some(auth);
}
let responses: Vec<HandshakeResponse> =
resp.into_inner().try_collect().await.map_err(|_| {
ArrowError::ParseError("Can't collect responses".to_string())
})?;
let responses: Vec<HandshakeResponse> = resp
.into_inner()
.try_collect()
.await
.map_err(|_| ArrowError::ParseError("Can't collect responses".to_string()))?;
let resp = match responses.as_slice() {
[resp] => resp.payload.clone(),
[] => Bytes::new(),
@@ -209,8 +205,7 @@ impl FlightSqlServiceClient<Channel> {
.await
.map_err(status_to_arrow_error)?
.unwrap();
let any =
Any::decode(&*result.app_metadata).map_err(decode_error_to_arrow_error)?;
let any = Any::decode(&*result.app_metadata).map_err(decode_error_to_arrow_error)?;
let result: DoPutUpdateResult = any.unpack()?.unwrap();
Ok(result.record_count)
}
@@ -405,17 +400,13 @@ impl FlightSqlServiceClient<Channel> {
ArrowError::ParseError(format!("Cannot convert header key \"{k}\": {e}"))
})?;
let v = v.parse().map_err(|e| {
ArrowError::ParseError(format!(
"Cannot convert header value \"{v}\": {e}"
))
ArrowError::ParseError(format!("Cannot convert header value \"{v}\": {e}"))
})?;
req.metadata_mut().insert(k, v);
}
if let Some(token) = &self.token {
let val = format!("Bearer {token}").parse().map_err(|e| {
ArrowError::ParseError(format!(
"Cannot convert token to header value: {e}"
))
ArrowError::ParseError(format!("Cannot convert token to header value: {e}"))
})?;
req.metadata_mut().insert("authorization", val);
}
@@ -484,8 +475,7 @@ impl PreparedStatement<Channel> {
.await
.map_err(status_to_arrow_error)?
.unwrap();
let any =
Any::decode(&*result.app_metadata).map_err(decode_error_to_arrow_error)?;
let any = Any::decode(&*result.app_metadata).map_err(decode_error_to_arrow_error)?;
let result: DoPutUpdateResult = any.unpack()?.unwrap();
Ok(result.record_count)
}
@@ -501,10 +491,7 @@ impl PreparedStatement<Channel> {
}
/// Set a RecordBatch that contains the parameters that will be bind.
pub fn set_parameters(
&mut self,
parameter_binding: RecordBatch,
) -> Result<(), ArrowError> {
pub fn set_parameters(&mut self, parameter_binding: RecordBatch) -> Result<(), ArrowError> {
self.parameter_binding = Some(parameter_binding);
Ok(())
}
@@ -580,19 +567,16 @@ pub fn arrow_data_from_flight_data(
flight_data: FlightData,
arrow_schema_ref: &SchemaRef,
) -> Result<ArrowFlightData, ArrowError> {
let ipc_message = root_as_message(&flight_data.data_header[..]).map_err(|err| {
ArrowError::ParseError(format!("Unable to get root as message: {err:?}"))
})?;
let ipc_message = root_as_message(&flight_data.data_header[..])
.map_err(|err| ArrowError::ParseError(format!("Unable to get root as message: {err:?}")))?;
match ipc_message.header_type() {
MessageHeader::RecordBatch => {
let ipc_record_batch =
ipc_message.header_as_record_batch().ok_or_else(|| {
ArrowError::ComputeError(
"Unable to convert flight data header to a record batch"
.to_string(),
)
})?;
let ipc_record_batch = ipc_message.header_as_record_batch().ok_or_else(|| {
ArrowError::ComputeError(
"Unable to convert flight data header to a record batch".to_string(),
)
})?;
let dictionaries_by_field = HashMap::new();
let record_batch = read_record_batch(
@@ -618,13 +602,11 @@ pub fn arrow_data_from_flight_data(
MessageHeader::DictionaryBatch => {
let _ = ipc_message.header_as_dictionary_batch().ok_or_else(|| {
ArrowError::ComputeError(
"Unable to convert flight data header to a dictionary batch"
.to_string(),
"Unable to convert flight data header to a dictionary batch".to_string(),
)
})?;
Err(ArrowError::NotYetImplemented(
"no idea on how to convert an ipc dictionary batch to an arrow type"
.to_string(),
"no idea on how to convert an ipc dictionary batch to an arrow type".to_string(),
))
}
MessageHeader::Tensor => {
@@ -644,8 +626,7 @@ pub fn arrow_data_from_flight_data(
)
})?;
Err(ArrowError::NotYetImplemented(
"no idea on how to convert an ipc sparse tensor to an arrow type"
.to_string(),
"no idea on how to convert an ipc sparse tensor to an arrow type".to_string(),
))
}
_ => Err(ArrowError::ComputeError(format!(
+1 -5
View File
@@ -95,11 +95,7 @@ impl GetDbSchemasBuilder {
/// Append a row
///
/// In case the catalog should be considered as empty, pass in an empty string '""'.
pub fn append(
&mut self,
catalog_name: impl AsRef<str>,
schema_name: impl AsRef<str>,
) {
pub fn append(&mut self, catalog_name: impl AsRef<str>, schema_name: impl AsRef<str>) {
self.catalog_name.append_value(catalog_name);
self.db_schema_name.append_value(schema_name);
}
+5 -14
View File
@@ -30,8 +30,8 @@ use std::sync::Arc;
use arrow_arith::boolean::or;
use arrow_array::array::{Array, UInt32Array, UnionArray};
use arrow_array::builder::{
ArrayBuilder, BooleanBuilder, Int32Builder, Int64Builder, Int8Builder, ListBuilder,
MapBuilder, StringBuilder, UInt32Builder,
ArrayBuilder, BooleanBuilder, Int32Builder, Int64Builder, Int8Builder, ListBuilder, MapBuilder,
StringBuilder, UInt32Builder,
};
use arrow_array::{RecordBatch, Scalar};
use arrow_data::ArrayData;
@@ -184,11 +184,7 @@ static UNION_TYPE: Lazy<DataType> = Lazy::new(|| {
Field::new("keys", DataType::Int32, false),
Field::new(
"values",
DataType::List(Arc::new(Field::new(
"item",
DataType::Int32,
true,
))),
DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
true,
),
])),
@@ -420,10 +416,7 @@ pub struct SqlInfoData {
impl SqlInfoData {
/// Return a [`RecordBatch`] containing only the requested `u32`, if any
/// from [`CommandGetSqlInfo`]
pub fn record_batch(
&self,
info: impl IntoIterator<Item = u32>,
) -> Result<RecordBatch> {
pub fn record_batch(&self, info: impl IntoIterator<Item = u32>) -> Result<RecordBatch> {
let arr = self.batch.column(0);
let type_filter = info
.into_iter()
@@ -493,9 +486,7 @@ mod tests {
use super::SqlInfoDataBuilder;
use crate::sql::metadata::tests::assert_batches_eq;
use crate::sql::{
SqlInfo, SqlNullOrdering, SqlSupportedTransaction, SqlSupportsConvert,
};
use crate::sql::{SqlInfo, SqlNullOrdering, SqlSupportedTransaction, SqlSupportsConvert};
#[test]
fn test_sql_infos() {
+4 -4
View File
@@ -329,12 +329,12 @@ mod tests {
"b_catalog",
])) as ArrayRef,
Arc::new(StringArray::from(vec![
"a_schema", "a_schema", "b_schema", "b_schema", "a_schema",
"a_schema", "b_schema", "b_schema",
"a_schema", "a_schema", "b_schema", "b_schema", "a_schema", "a_schema",
"b_schema", "b_schema",
])) as ArrayRef,
Arc::new(StringArray::from(vec![
"a_table", "b_table", "a_table", "b_table", "a_table", "a_table",
"b_table", "b_table",
"a_table", "b_table", "a_table", "b_table", "a_table", "a_table", "b_table",
"b_table",
])) as ArrayRef,
Arc::new(StringArray::from(vec![
"TABLE", "TABLE", "TABLE", "TABLE", "TABLE", "VIEW", "TABLE", "VIEW",
+3 -7
View File
@@ -36,9 +36,7 @@ use once_cell::sync::Lazy;
use super::lexsort_to_indices;
use crate::error::*;
use crate::sql::{
CommandGetXdbcTypeInfo, Nullable, Searchable, XdbcDataType, XdbcDatetimeSubcode,
};
use crate::sql::{CommandGetXdbcTypeInfo, Nullable, Searchable, XdbcDataType, XdbcDatetimeSubcode};
/// Data structure representing type information for xdbc types.
#[derive(Debug, Clone, Default)]
@@ -201,8 +199,7 @@ impl XdbcTypeInfoDataBuilder {
minimum_scale_builder.append_option(info.minimum_scale);
maximum_scale_builder.append_option(info.maximum_scale);
sql_data_type_builder.append_value(info.sql_data_type as i32);
datetime_subcode_builder
.append_option(info.datetime_subcode.map(|code| code as i32));
datetime_subcode_builder.append_option(info.datetime_subcode.map(|code| code as i32));
num_prec_radix_builder.append_option(info.num_prec_radix);
interval_precision_builder.append_option(info.interval_precision);
});
@@ -215,8 +212,7 @@ impl XdbcTypeInfoDataBuilder {
let (field, offsets, values, nulls) = create_params_builder.finish().into_parts();
// Re-defined the field to be non-nullable
let new_field = Arc::new(field.as_ref().clone().with_nullable(false));
let create_params =
Arc::new(ListArray::new(new_field, offsets, values, nulls)) as ArrayRef;
let create_params = Arc::new(ListArray::new(new_field, offsets, values, nulls)) as ArrayRef;
let nullable = Arc::new(nullable_builder.finish());
let case_sensitive = Arc::new(case_sensitive_builder.finish());
let searchable = Arc::new(searchable_builder.finish());
+2 -3
View File
@@ -295,9 +295,8 @@ impl Any {
if !self.is::<M>() {
return Ok(None);
}
let m = Message::decode(&*self.value).map_err(|err| {
ArrowError::ParseError(format!("Unable to decode Any value: {err}"))
})?;
let m = Message::decode(&*self.value)
.map_err(|err| ArrowError::ParseError(format!("Unable to decode Any value: {err}")))?;
Ok(Some(m))
}
+41 -76
View File
@@ -24,23 +24,21 @@ use prost::Message;
use tonic::{Request, Response, Status, Streaming};
use super::{
ActionBeginSavepointRequest, ActionBeginSavepointResult,
ActionBeginTransactionRequest, ActionBeginTransactionResult,
ActionCancelQueryRequest, ActionCancelQueryResult,
ActionBeginSavepointRequest, ActionBeginSavepointResult, ActionBeginTransactionRequest,
ActionBeginTransactionResult, ActionCancelQueryRequest, ActionCancelQueryResult,
ActionClosePreparedStatementRequest, ActionCreatePreparedStatementRequest,
ActionCreatePreparedStatementResult, ActionCreatePreparedSubstraitPlanRequest,
ActionEndSavepointRequest, ActionEndTransactionRequest, Any, Command,
CommandGetCatalogs, CommandGetCrossReference, CommandGetDbSchemas,
CommandGetExportedKeys, CommandGetImportedKeys, CommandGetPrimaryKeys,
CommandGetSqlInfo, CommandGetTableTypes, CommandGetTables, CommandGetXdbcTypeInfo,
CommandPreparedStatementQuery, CommandPreparedStatementUpdate, CommandStatementQuery,
CommandStatementSubstraitPlan, CommandStatementUpdate, DoPutUpdateResult,
ProstMessageExt, SqlInfo, TicketStatementQuery,
ActionEndSavepointRequest, ActionEndTransactionRequest, Any, Command, CommandGetCatalogs,
CommandGetCrossReference, CommandGetDbSchemas, CommandGetExportedKeys, CommandGetImportedKeys,
CommandGetPrimaryKeys, CommandGetSqlInfo, CommandGetTableTypes, CommandGetTables,
CommandGetXdbcTypeInfo, CommandPreparedStatementQuery, CommandPreparedStatementUpdate,
CommandStatementQuery, CommandStatementSubstraitPlan, CommandStatementUpdate,
DoPutUpdateResult, ProstMessageExt, SqlInfo, TicketStatementQuery,
};
use crate::{
flight_service_server::FlightService, Action, ActionType, Criteria, Empty,
FlightData, FlightDescriptor, FlightInfo, HandshakeRequest, HandshakeResponse,
PutResult, SchemaResult, Ticket,
flight_service_server::FlightService, Action, ActionType, Criteria, Empty, FlightData,
FlightDescriptor, FlightInfo, HandshakeRequest, HandshakeResponse, PutResult, SchemaResult,
Ticket,
};
pub(crate) static CREATE_PREPARED_STATEMENT: &str = "CreatePreparedStatement";
@@ -549,13 +547,10 @@ where
Pin<Box<dyn Stream<Item = Result<HandshakeResponse, Status>> + Send + 'static>>;
type ListFlightsStream =
Pin<Box<dyn Stream<Item = Result<FlightInfo, Status>> + Send + 'static>>;
type DoGetStream =
Pin<Box<dyn Stream<Item = Result<FlightData, Status>> + Send + 'static>>;
type DoPutStream =
Pin<Box<dyn Stream<Item = Result<PutResult, Status>> + Send + 'static>>;
type DoActionStream = Pin<
Box<dyn Stream<Item = Result<super::super::Result, Status>> + Send + 'static>,
>;
type DoGetStream = Pin<Box<dyn Stream<Item = Result<FlightData, Status>> + Send + 'static>>;
type DoPutStream = Pin<Box<dyn Stream<Item = Result<PutResult, Status>> + Send + 'static>>;
type DoActionStream =
Pin<Box<dyn Stream<Item = Result<super::super::Result, Status>> + Send + 'static>>;
type ListActionsStream =
Pin<Box<dyn Stream<Item = Result<ActionType, Status>> + Send + 'static>>;
type DoExchangeStream =
@@ -580,8 +575,7 @@ where
&self,
request: Request<FlightDescriptor>,
) -> Result<Response<FlightInfo>, Status> {
let message =
Any::decode(&*request.get_ref().cmd).map_err(decode_error_to_status)?;
let message = Any::decode(&*request.get_ref().cmd).map_err(decode_error_to_status)?;
match Command::try_from(message).map_err(arrow_error_to_status)? {
Command::CommandStatementQuery(token) => {
@@ -600,9 +594,7 @@ where
Command::CommandGetDbSchemas(token) => {
return self.get_flight_info_schemas(token, request).await
}
Command::CommandGetTables(token) => {
self.get_flight_info_tables(token, request).await
}
Command::CommandGetTables(token) => self.get_flight_info_tables(token, request).await,
Command::CommandGetTableTypes(token) => {
self.get_flight_info_table_types(token, request).await
}
@@ -642,31 +634,21 @@ where
&self,
request: Request<Ticket>,
) -> Result<Response<Self::DoGetStream>, Status> {
let msg: Any = Message::decode(&*request.get_ref().ticket)
.map_err(decode_error_to_status)?;
let msg: Any =
Message::decode(&*request.get_ref().ticket).map_err(decode_error_to_status)?;
match Command::try_from(msg).map_err(arrow_error_to_status)? {
Command::TicketStatementQuery(command) => {
self.do_get_statement(command, request).await
}
Command::TicketStatementQuery(command) => self.do_get_statement(command, request).await,
Command::CommandPreparedStatementQuery(command) => {
self.do_get_prepared_statement(command, request).await
}
Command::CommandGetCatalogs(command) => {
self.do_get_catalogs(command, request).await
}
Command::CommandGetDbSchemas(command) => {
self.do_get_schemas(command, request).await
}
Command::CommandGetTables(command) => {
self.do_get_tables(command, request).await
}
Command::CommandGetCatalogs(command) => self.do_get_catalogs(command, request).await,
Command::CommandGetDbSchemas(command) => self.do_get_schemas(command, request).await,
Command::CommandGetTables(command) => self.do_get_tables(command, request).await,
Command::CommandGetTableTypes(command) => {
self.do_get_table_types(command, request).await
}
Command::CommandGetSqlInfo(command) => {
self.do_get_sql_info(command, request).await
}
Command::CommandGetSqlInfo(command) => self.do_get_sql_info(command, request).await,
Command::CommandGetPrimaryKeys(command) => {
self.do_get_primary_keys(command, request).await
}
@@ -699,8 +681,8 @@ where
let mut request = request.map(PeekableFlightDataStream::new);
let cmd = Pin::new(request.get_mut()).peek().await.unwrap().clone()?;
let message = Any::decode(&*cmd.flight_descriptor.unwrap().cmd)
.map_err(decode_error_to_status)?;
let message =
Any::decode(&*cmd.flight_descriptor.unwrap().cmd).map_err(decode_error_to_status)?;
match Command::try_from(message).map_err(arrow_error_to_status)? {
Command::CommandStatementUpdate(command) => {
let record_count = self.do_put_statement_update(command, request).await?;
@@ -755,11 +737,10 @@ where
};
let create_prepared_substrait_plan_action_type = ActionType {
r#type: CREATE_PREPARED_SUBSTRAIT_PLAN.to_string(),
description:
"Creates a reusable prepared substrait plan resource on the server.\n
description: "Creates a reusable prepared substrait plan resource on the server.\n
Request Message: ActionCreatePreparedSubstraitPlanRequest\n
Response Message: ActionCreatePreparedStatementResult"
.into(),
.into(),
};
let begin_transaction_action_type = ActionType {
r#type: BEGIN_TRANSACTION.to_string(),
@@ -820,8 +801,7 @@ where
request: Request<Action>,
) -> Result<Response<Self::DoActionStream>, Status> {
if request.get_ref().r#type == CREATE_PREPARED_STATEMENT {
let any =
Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
let any = Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
let cmd: ActionCreatePreparedStatementRequest = any
.unpack()
@@ -839,8 +819,7 @@ where
})]);
return Ok(Response::new(Box::pin(output)));
} else if request.get_ref().r#type == CLOSE_PREPARED_STATEMENT {
let any =
Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
let any = Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
let cmd: ActionClosePreparedStatementRequest = any
.unpack()
@@ -854,8 +833,7 @@ where
.await?;
return Ok(Response::new(Box::pin(futures::stream::empty())));
} else if request.get_ref().r#type == CREATE_PREPARED_SUBSTRAIT_PLAN {
let any =
Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
let any = Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
let cmd: ActionCreatePreparedSubstraitPlanRequest = any
.unpack()
@@ -869,47 +847,38 @@ where
.await?;
return Ok(Response::new(Box::pin(futures::stream::empty())));
} else if request.get_ref().r#type == BEGIN_TRANSACTION {
let any =
Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
let any = Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
let cmd: ActionBeginTransactionRequest = any
.unpack()
.map_err(arrow_error_to_status)?
.ok_or_else(|| {
Status::invalid_argument(
"Unable to unpack ActionBeginTransactionRequest.",
)
})?;
Status::invalid_argument("Unable to unpack ActionBeginTransactionRequest.")
})?;
let stmt = self.do_action_begin_transaction(cmd, request).await?;
let output = futures::stream::iter(vec![Ok(super::super::gen::Result {
body: stmt.as_any().encode_to_vec().into(),
})]);
return Ok(Response::new(Box::pin(output)));
} else if request.get_ref().r#type == END_TRANSACTION {
let any =
Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
let any = Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
let cmd: ActionEndTransactionRequest = any
.unpack()
.map_err(arrow_error_to_status)?
.ok_or_else(|| {
Status::invalid_argument(
"Unable to unpack ActionEndTransactionRequest.",
)
Status::invalid_argument("Unable to unpack ActionEndTransactionRequest.")
})?;
self.do_action_end_transaction(cmd, request).await?;
return Ok(Response::new(Box::pin(futures::stream::empty())));
} else if request.get_ref().r#type == BEGIN_SAVEPOINT {
let any =
Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
let any = Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
let cmd: ActionBeginSavepointRequest = any
.unpack()
.map_err(arrow_error_to_status)?
.ok_or_else(|| {
Status::invalid_argument(
"Unable to unpack ActionBeginSavepointRequest.",
)
Status::invalid_argument("Unable to unpack ActionBeginSavepointRequest.")
})?;
let stmt = self.do_action_begin_savepoint(cmd, request).await?;
let output = futures::stream::iter(vec![Ok(super::super::gen::Result {
@@ -917,22 +886,18 @@ where
})]);
return Ok(Response::new(Box::pin(output)));
} else if request.get_ref().r#type == END_SAVEPOINT {
let any =
Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
let any = Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
let cmd: ActionEndSavepointRequest = any
.unpack()
.map_err(arrow_error_to_status)?
.ok_or_else(|| {
Status::invalid_argument(
"Unable to unpack ActionEndSavepointRequest.",
)
Status::invalid_argument("Unable to unpack ActionEndSavepointRequest.")
})?;
self.do_action_end_savepoint(cmd, request).await?;
return Ok(Response::new(Box::pin(futures::stream::empty())));
} else if request.get_ref().r#type == CANCEL_QUERY {
let any =
Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
let any = Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
let cmd: ActionCancelQueryRequest = any
.unpack()
+2 -7
View File
@@ -28,9 +28,7 @@ use tonic::{metadata::MetadataMap, Status, Streaming};
///
/// Note that [`LazyTrailers`] has inner mutability and will only hold actual data after [`ExtractTrailersStream`] is
/// fully consumed (dropping it is not required though).
pub fn extract_lazy_trailers<T>(
s: Streaming<T>,
) -> (ExtractTrailersStream<T>, LazyTrailers) {
pub fn extract_lazy_trailers<T>(s: Streaming<T>) -> (ExtractTrailersStream<T>, LazyTrailers) {
let trailers: SharedTrailers = Default::default();
let stream = ExtractTrailersStream {
inner: s,
@@ -54,10 +52,7 @@ pub struct ExtractTrailersStream<T> {
impl<T> Stream for ExtractTrailersStream<T> {
type Item = Result<T, Status>;
fn poll_next(
mut self: Pin<&mut Self>,
cx: &mut Context<'_>,
) -> Poll<Option<Self::Item>> {
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
let res = ready!(self.inner.poll_next_unpin(cx));
if res.is_none() {
+8 -15
View File
@@ -52,26 +52,23 @@ pub fn flight_data_from_arrow_batch(
}
/// Convert a slice of wire protocol `FlightData`s into a vector of `RecordBatch`es
pub fn flight_data_to_batches(
flight_data: &[FlightData],
) -> Result<Vec<RecordBatch>, ArrowError> {
pub fn flight_data_to_batches(flight_data: &[FlightData]) -> Result<Vec<RecordBatch>, ArrowError> {
let schema = flight_data.get(0).ok_or_else(|| {
ArrowError::CastError("Need at least one FlightData for schema".to_string())
})?;
let message = root_as_message(&schema.data_header[..])
.map_err(|_| ArrowError::CastError("Cannot get root as message".to_string()))?;
let ipc_schema: arrow_ipc::Schema = message.header_as_schema().ok_or_else(|| {
ArrowError::CastError("Cannot get header as Schema".to_string())
})?;
let ipc_schema: arrow_ipc::Schema = message
.header_as_schema()
.ok_or_else(|| ArrowError::CastError("Cannot get header as Schema".to_string()))?;
let schema = fb_to_schema(ipc_schema);
let schema = Arc::new(schema);
let mut batches = vec![];
let dictionaries_by_id = HashMap::new();
for datum in flight_data[1..].iter() {
let batch =
flight_data_to_arrow_batch(datum, schema.clone(), &dictionaries_by_id)?;
let batch = flight_data_to_arrow_batch(datum, schema.clone(), &dictionaries_by_id)?;
batches.push(batch);
}
Ok(batches)
@@ -84,9 +81,8 @@ pub fn flight_data_to_arrow_batch(
dictionaries_by_id: &HashMap<i64, ArrayRef>,
) -> Result<RecordBatch, ArrowError> {
// check that the data_header is a record batch message
let message = arrow_ipc::root_as_message(&data.data_header[..]).map_err(|err| {
ArrowError::ParseError(format!("Unable to get root as message: {err:?}"))
})?;
let message = arrow_ipc::root_as_message(&data.data_header[..])
.map_err(|err| ArrowError::ParseError(format!("Unable to get root as message: {err:?}")))?;
message
.header_as_record_batch()
@@ -124,10 +120,7 @@ pub fn flight_schema_from_arrow_schema(
since = "4.4.0",
note = "Use From trait, e.g.: SchemaAsIpc::new(schema, options).into()"
)]
pub fn flight_data_from_arrow_schema(
schema: &Schema,
options: &IpcWriteOptions,
) -> FlightData {
pub fn flight_data_from_arrow_schema(schema: &Schema, options: &IpcWriteOptions) -> FlightData {
SchemaAsIpc::new(schema, options).into()
}
+6 -8
View File
@@ -23,9 +23,9 @@ mod common {
}
use arrow_array::{RecordBatch, UInt64Array};
use arrow_flight::{
decode::FlightRecordBatchStream, encode::FlightDataEncoderBuilder,
error::FlightError, Action, ActionType, Criteria, Empty, FlightClient, FlightData,
FlightDescriptor, FlightInfo, HandshakeRequest, HandshakeResponse, PutResult, Ticket,
decode::FlightRecordBatchStream, encode::FlightDataEncoderBuilder, error::FlightError, Action,
ActionType, Criteria, Empty, FlightClient, FlightData, FlightDescriptor, FlightInfo,
HandshakeRequest, HandshakeResponse, PutResult, Ticket,
};
use arrow_schema::{DataType, Field, Schema};
use bytes::Bytes;
@@ -271,8 +271,7 @@ async fn test_do_put() {
},
];
test_server
.set_do_put_response(expected_response.clone().into_iter().map(Ok).collect());
test_server.set_do_put_response(expected_response.clone().into_iter().map(Ok).collect());
let input_stream = futures::stream::iter(input_flight_data.clone()).map(Ok);
@@ -446,9 +445,8 @@ async fn test_do_exchange() {
let input_flight_data = test_flight_data().await;
let output_flight_data = test_flight_data2().await;
test_server.set_do_exchange_response(
output_flight_data.clone().into_iter().map(Ok).collect(),
);
test_server
.set_do_exchange_response(output_flight_data.clone().into_iter().map(Ok).collect());
let response_stream = client
.do_exchange(futures::stream::iter(input_flight_data.clone()))
+13 -13
View File
@@ -174,10 +174,7 @@ impl TestFlightServer {
}
/// Specify the response returned from the next call to `do_action`
pub fn set_do_action_response(
&self,
response: Vec<Result<arrow_flight::Result, Status>>,
) {
pub fn set_do_action_response(&self, response: Vec<Result<arrow_flight::Result, Status>>) {
let mut state = self.state.lock().expect("mutex not poisoned");
state.do_action_response.replace(response);
}
@@ -278,9 +275,10 @@ impl FlightService for TestFlightServer {
let mut state = self.state.lock().expect("mutex not poisoned");
state.handshake_request = Some(handshake_request);
let response = state.handshake_response.take().unwrap_or_else(|| {
Err(Status::internal("No handshake response configured"))
})?;
let response = state
.handshake_response
.take()
.unwrap_or_else(|| Err(Status::internal("No handshake response configured")))?;
// turn into a streaming response
let output = futures::stream::iter(std::iter::once(Ok(response)));
@@ -313,9 +311,10 @@ impl FlightService for TestFlightServer {
self.save_metadata(&request);
let mut state = self.state.lock().expect("mutex not poisoned");
state.get_flight_info_request = Some(request.into_inner());
let response = state.get_flight_info_response.take().unwrap_or_else(|| {
Err(Status::internal("No get_flight_info response configured"))
})?;
let response = state
.get_flight_info_response
.take()
.unwrap_or_else(|| Err(Status::internal("No get_flight_info response configured")))?;
Ok(Response::new(response))
}
@@ -326,9 +325,10 @@ impl FlightService for TestFlightServer {
self.save_metadata(&request);
let mut state = self.state.lock().expect("mutex not poisoned");
state.get_schema_request = Some(request.into_inner());
let schema = state.get_schema_response.take().unwrap_or_else(|| {
Err(Status::internal("No get_schema response configured"))
})?;
let schema = state
.get_schema_response
.take()
.unwrap_or_else(|| Err(Status::internal("No get_schema response configured")))?;
// encode the schema
let options = arrow_ipc::writer::IpcWriteOptions::default();
+1 -3
View File
@@ -81,9 +81,7 @@ where
ready!(self.as_mut().project().inner.poll(cx));
match result {
Ok(response) => {
Poll::Ready(Ok(response.map(|body| WrappedBody { inner: body })))
}
Ok(response) => Poll::Ready(Ok(response.map(|body| WrappedBody { inner: body }))),
Err(e) => Poll::Ready(Err(e)),
}
}
+17 -24
View File
@@ -195,8 +195,7 @@ async fn test_app_metadata() {
let encode_stream = encoder.build(input_batch_stream);
// use lower level stream to get access to app metadata
let decode_stream =
FlightRecordBatchStream::new_from_flight_data(encode_stream).into_inner();
let decode_stream = FlightRecordBatchStream::new_from_flight_data(encode_stream).into_inner();
let mut messages: Vec<_> = decode_stream.try_collect().await.expect("encode fails");
@@ -225,8 +224,7 @@ async fn test_max_message_size() {
let encode_stream = encoder.build(input_batch_stream);
// use lower level stream to get access to app metadata
let decode_stream =
FlightRecordBatchStream::new_from_flight_data(encode_stream).into_inner();
let decode_stream = FlightRecordBatchStream::new_from_flight_data(encode_stream).into_inner();
let messages: Vec<_> = decode_stream.try_collect().await.expect("encode fails");
@@ -254,8 +252,8 @@ async fn test_max_message_size_fuzz() {
];
for max_message_size_bytes in [10, 1024, 2048, 6400, 3211212] {
let encoder = FlightDataEncoderBuilder::default()
.with_max_flight_data_size(max_message_size_bytes);
let encoder =
FlightDataEncoderBuilder::default().with_max_flight_data_size(max_message_size_bytes);
let input_batch_stream = futures::stream::iter(input.clone()).map(Ok);
@@ -299,10 +297,10 @@ async fn test_chained_streams_batch_decoder() {
let batch2 = make_dictionary_batch(3);
// Model sending two flight streams back to back, with different schemas
let encode_stream1 = FlightDataEncoderBuilder::default()
.build(futures::stream::iter(vec![Ok(batch1.clone())]));
let encode_stream2 = FlightDataEncoderBuilder::default()
.build(futures::stream::iter(vec![Ok(batch2.clone())]));
let encode_stream1 =
FlightDataEncoderBuilder::default().build(futures::stream::iter(vec![Ok(batch1.clone())]));
let encode_stream2 =
FlightDataEncoderBuilder::default().build(futures::stream::iter(vec![Ok(batch2.clone())]));
// append the two streams (so they will have two different schema messages)
let encode_stream = encode_stream1.chain(encode_stream2);
@@ -324,10 +322,10 @@ async fn test_chained_streams_data_decoder() {
let batch2 = make_dictionary_batch(3);
// Model sending two flight streams back to back, with different schemas
let encode_stream1 = FlightDataEncoderBuilder::default()
.build(futures::stream::iter(vec![Ok(batch1.clone())]));
let encode_stream2 = FlightDataEncoderBuilder::default()
.build(futures::stream::iter(vec![Ok(batch2.clone())]));
let encode_stream1 =
FlightDataEncoderBuilder::default().build(futures::stream::iter(vec![Ok(batch1.clone())]));
let encode_stream2 =
FlightDataEncoderBuilder::default().build(futures::stream::iter(vec![Ok(batch2.clone())]));
// append the two streams (so they will have two different schema messages)
let encode_stream = encode_stream1.chain(encode_stream2);
@@ -335,8 +333,7 @@ async fn test_chained_streams_data_decoder() {
// lower level decode stream can handle multiple schema messages
let decode_stream = FlightDataDecoder::new(encode_stream);
let decoded_data: Vec<_> =
decode_stream.try_collect().await.expect("encode / decode");
let decoded_data: Vec<_> = decode_stream.try_collect().await.expect("encode / decode");
println!("decoded data: {decoded_data:#?}");
@@ -425,8 +422,7 @@ fn make_primitive_batch(num_rows: usize) -> RecordBatch {
})
.collect();
RecordBatch::try_from_iter(vec![("i", Arc::new(i) as ArrayRef), ("f", Arc::new(f))])
.unwrap()
RecordBatch::try_from_iter(vec![("i", Arc::new(i) as ArrayRef), ("f", Arc::new(f))]).unwrap()
}
/// Make a dictionary batch for testing
@@ -459,8 +455,7 @@ fn make_dictionary_batch(num_rows: usize) -> RecordBatch {
/// match the input.
async fn roundtrip(input: Vec<RecordBatch>) {
let expected_output = input.clone();
roundtrip_with_encoder(FlightDataEncoderBuilder::default(), input, expected_output)
.await
roundtrip_with_encoder(FlightDataEncoderBuilder::default(), input, expected_output).await
}
/// Encodes input as a FlightData stream, and then decodes it using
@@ -475,8 +470,7 @@ async fn roundtrip_dictionary(input: Vec<RecordBatch>) {
.iter()
.map(|batch| prepare_batch_for_flight(batch, schema.clone()).unwrap())
.collect();
roundtrip_with_encoder(FlightDataEncoderBuilder::default(), input, expected_output)
.await
roundtrip_with_encoder(FlightDataEncoderBuilder::default(), input, expected_output).await
}
async fn roundtrip_with_encoder(
@@ -491,8 +485,7 @@ async fn roundtrip_with_encoder(
let encode_stream = encoder.build(input_batch_stream);
let decode_stream = FlightRecordBatchStream::new_from_flight_data(encode_stream);
let output_batches: Vec<_> =
decode_stream.try_collect().await.expect("encode / decode");
let output_batches: Vec<_> = decode_stream.try_collect().await.expect("encode / decode");
// remove any empty batches from input as they are not transmitted
let expected_batches: Vec<_> = expected_batches

Some files were not shown because too many files have changed in this diff Show More