mirror of
https://github.com/langchain-ai/arrow-rs.git
synced 2026-07-01 21:34:01 -04:00
Use rustfmt default line width (#4960)
* Use rustfmt default line width * Further format
This commit is contained in:
committed by
GitHub
parent
f597d3a687
commit
7e134f4d27
@@ -207,15 +207,15 @@ where
|
||||
}
|
||||
|
||||
let iter = ArrayIter::new(array);
|
||||
let sum =
|
||||
iter.into_iter()
|
||||
.try_fold(T::default_value(), |accumulator, value| {
|
||||
if let Some(value) = value {
|
||||
accumulator.add_checked(value)
|
||||
} else {
|
||||
Ok(accumulator)
|
||||
}
|
||||
})?;
|
||||
let sum = iter
|
||||
.into_iter()
|
||||
.try_fold(T::default_value(), |accumulator, value| {
|
||||
if let Some(value) = value {
|
||||
accumulator.add_checked(value)
|
||||
} else {
|
||||
Ok(accumulator)
|
||||
}
|
||||
})?;
|
||||
|
||||
Ok(Some(sum))
|
||||
}
|
||||
@@ -230,11 +230,7 @@ where
|
||||
T: ArrowNumericType,
|
||||
T::Native: ArrowNativeType,
|
||||
{
|
||||
min_max_array_helper::<T, A, _, _>(
|
||||
array,
|
||||
|a, b| (is_nan(*a) & !is_nan(*b)) || a > b,
|
||||
min,
|
||||
)
|
||||
min_max_array_helper::<T, A, _, _>(array, |a, b| (is_nan(*a) & !is_nan(*b)) || a > b, min)
|
||||
}
|
||||
|
||||
/// Returns the max of values in the array of `ArrowNumericType` type, or dictionary
|
||||
@@ -244,11 +240,7 @@ where
|
||||
T: ArrowNumericType,
|
||||
T::Native: ArrowNativeType,
|
||||
{
|
||||
min_max_array_helper::<T, A, _, _>(
|
||||
array,
|
||||
|a, b| (!is_nan(*a) & is_nan(*b)) || a < b,
|
||||
max,
|
||||
)
|
||||
min_max_array_helper::<T, A, _, _>(array, |a, b| (!is_nan(*a) & is_nan(*b)) || a < b, max)
|
||||
}
|
||||
|
||||
fn min_max_array_helper<T, A: ArrayAccessor<Item = T::Native>, F, M>(
|
||||
@@ -501,10 +493,7 @@ mod simd {
|
||||
fn init_accumulator_chunk() -> Self::SimdAccumulator;
|
||||
|
||||
/// Updates the accumulator with the values of one chunk
|
||||
fn accumulate_chunk_non_null(
|
||||
accumulator: &mut Self::SimdAccumulator,
|
||||
chunk: T::Simd,
|
||||
);
|
||||
fn accumulate_chunk_non_null(accumulator: &mut Self::SimdAccumulator, chunk: T::Simd);
|
||||
|
||||
/// Updates the accumulator with the values of one chunk according to the given vector mask
|
||||
fn accumulate_chunk_nullable(
|
||||
@@ -602,10 +591,7 @@ mod simd {
|
||||
(T::init(T::default_value()), T::mask_init(false))
|
||||
}
|
||||
|
||||
fn accumulate_chunk_non_null(
|
||||
accumulator: &mut Self::SimdAccumulator,
|
||||
chunk: T::Simd,
|
||||
) {
|
||||
fn accumulate_chunk_non_null(accumulator: &mut Self::SimdAccumulator, chunk: T::Simd) {
|
||||
let acc_is_nan = !T::eq(accumulator.0, accumulator.0);
|
||||
let is_lt = acc_is_nan | T::lt(chunk, accumulator.0);
|
||||
let first_or_lt = !accumulator.1 | is_lt;
|
||||
@@ -627,10 +613,7 @@ mod simd {
|
||||
accumulator.1 |= vecmask;
|
||||
}
|
||||
|
||||
fn accumulate_scalar(
|
||||
accumulator: &mut Self::ScalarAccumulator,
|
||||
value: T::Native,
|
||||
) {
|
||||
fn accumulate_scalar(accumulator: &mut Self::ScalarAccumulator, value: T::Native) {
|
||||
if !accumulator.1 {
|
||||
accumulator.0 = value;
|
||||
} else {
|
||||
@@ -690,10 +673,7 @@ mod simd {
|
||||
(T::init(T::default_value()), T::mask_init(false))
|
||||
}
|
||||
|
||||
fn accumulate_chunk_non_null(
|
||||
accumulator: &mut Self::SimdAccumulator,
|
||||
chunk: T::Simd,
|
||||
) {
|
||||
fn accumulate_chunk_non_null(accumulator: &mut Self::SimdAccumulator, chunk: T::Simd) {
|
||||
let chunk_is_nan = !T::eq(chunk, chunk);
|
||||
let is_gt = chunk_is_nan | T::gt(chunk, accumulator.0);
|
||||
let first_or_gt = !accumulator.1 | is_gt;
|
||||
@@ -715,10 +695,7 @@ mod simd {
|
||||
accumulator.1 |= vecmask;
|
||||
}
|
||||
|
||||
fn accumulate_scalar(
|
||||
accumulator: &mut Self::ScalarAccumulator,
|
||||
value: T::Native,
|
||||
) {
|
||||
fn accumulate_scalar(accumulator: &mut Self::ScalarAccumulator, value: T::Native) {
|
||||
if !accumulator.1 {
|
||||
accumulator.0 = value;
|
||||
} else {
|
||||
@@ -1009,8 +986,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_primitive_array_bool_or_with_nulls() {
|
||||
let a =
|
||||
BooleanArray::from(vec![None, Some(false), Some(false), None, Some(false)]);
|
||||
let a = BooleanArray::from(vec![None, Some(false), Some(false), None, Some(false)]);
|
||||
assert!(!bool_or(&a).unwrap());
|
||||
}
|
||||
|
||||
@@ -1297,8 +1273,7 @@ mod tests {
|
||||
assert_eq!(Some(false), min_boolean(&a));
|
||||
assert_eq!(Some(true), max_boolean(&a));
|
||||
|
||||
let a =
|
||||
BooleanArray::from(vec![Some(false), Some(true), None, Some(false), None]);
|
||||
let a = BooleanArray::from(vec![Some(false), Some(true), None, Some(false), None]);
|
||||
assert_eq!(Some(false), min_boolean(&a));
|
||||
assert_eq!(Some(true), max_boolean(&a));
|
||||
}
|
||||
|
||||
@@ -48,8 +48,7 @@ fn get_fixed_point_info(
|
||||
)));
|
||||
}
|
||||
|
||||
let divisor =
|
||||
i256::from_i128(10).pow_wrapping((product_scale - required_scale) as u32);
|
||||
let divisor = i256::from_i128(10).pow_wrapping((product_scale - required_scale) as u32);
|
||||
|
||||
Ok((precision, product_scale, divisor))
|
||||
}
|
||||
@@ -78,8 +77,7 @@ pub fn multiply_fixed_point_dyn(
|
||||
let left = left.as_any().downcast_ref::<Decimal128Array>().unwrap();
|
||||
let right = right.as_any().downcast_ref::<Decimal128Array>().unwrap();
|
||||
|
||||
multiply_fixed_point(left, right, required_scale)
|
||||
.map(|a| Arc::new(a) as ArrayRef)
|
||||
multiply_fixed_point(left, right, required_scale).map(|a| Arc::new(a) as ArrayRef)
|
||||
}
|
||||
(_, _) => Err(ArrowError::CastError(format!(
|
||||
"Unsupported data type {}, {}",
|
||||
@@ -113,10 +111,8 @@ pub fn multiply_fixed_point_checked(
|
||||
)?;
|
||||
|
||||
if required_scale == product_scale {
|
||||
return try_binary::<_, _, _, Decimal128Type>(left, right, |a, b| {
|
||||
a.mul_checked(b)
|
||||
})?
|
||||
.with_precision_and_scale(precision, required_scale);
|
||||
return try_binary::<_, _, _, Decimal128Type>(left, right, |a, b| a.mul_checked(b))?
|
||||
.with_precision_and_scale(precision, required_scale);
|
||||
}
|
||||
|
||||
try_binary::<_, _, _, Decimal128Type>(left, right, |a, b| {
|
||||
@@ -213,17 +209,16 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
let err = mul(&a, &b).unwrap_err();
|
||||
assert!(err.to_string().contains(
|
||||
"Overflow happened on: 123456789000000000000000000 * 10000000000000000000"
|
||||
));
|
||||
assert!(err
|
||||
.to_string()
|
||||
.contains("Overflow happened on: 123456789000000000000000000 * 10000000000000000000"));
|
||||
|
||||
// Allow precision loss.
|
||||
let result = multiply_fixed_point_checked(&a, &b, 28).unwrap();
|
||||
// [1234567890]
|
||||
let expected =
|
||||
Decimal128Array::from(vec![12345678900000000000000000000000000000])
|
||||
.with_precision_and_scale(38, 28)
|
||||
.unwrap();
|
||||
let expected = Decimal128Array::from(vec![12345678900000000000000000000000000000])
|
||||
.with_precision_and_scale(38, 28)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(&expected, &result);
|
||||
assert_eq!(
|
||||
@@ -233,13 +228,9 @@ mod tests {
|
||||
|
||||
// Rounding case
|
||||
// [0.000000000000000001, 123456789.555555555555555555, 1.555555555555555555]
|
||||
let a = Decimal128Array::from(vec![
|
||||
1,
|
||||
123456789555555555555555555,
|
||||
1555555555555555555,
|
||||
])
|
||||
.with_precision_and_scale(38, 18)
|
||||
.unwrap();
|
||||
let a = Decimal128Array::from(vec![1, 123456789555555555555555555, 1555555555555555555])
|
||||
.with_precision_and_scale(38, 18)
|
||||
.unwrap();
|
||||
|
||||
// [1.555555555555555555, 11.222222222222222222, 0.000000000000000001]
|
||||
let b = Decimal128Array::from(vec![1555555555555555555, 11222222222222222222, 1])
|
||||
@@ -311,10 +302,9 @@ mod tests {
|
||||
));
|
||||
|
||||
let result = multiply_fixed_point(&a, &b, 28).unwrap();
|
||||
let expected =
|
||||
Decimal128Array::from(vec![62946009661555981610246871926660136960])
|
||||
.with_precision_and_scale(38, 28)
|
||||
.unwrap();
|
||||
let expected = Decimal128Array::from(vec![62946009661555981610246871926660136960])
|
||||
.with_precision_and_scale(38, 28)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(&expected, &result);
|
||||
}
|
||||
@@ -338,10 +328,9 @@ mod tests {
|
||||
// Avoid overflow by reducing the scale.
|
||||
let result = multiply_fixed_point(&a, &b, 28).unwrap();
|
||||
// [1234567890]
|
||||
let expected =
|
||||
Decimal128Array::from(vec![12345678900000000000000000000000000000])
|
||||
.with_precision_and_scale(38, 28)
|
||||
.unwrap();
|
||||
let expected = Decimal128Array::from(vec![12345678900000000000000000000000000000])
|
||||
.with_precision_and_scale(38, 28)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(&expected, &result);
|
||||
assert_eq!(
|
||||
|
||||
@@ -49,10 +49,7 @@ where
|
||||
}
|
||||
|
||||
/// See [`PrimitiveArray::try_unary`]
|
||||
pub fn try_unary<I, F, O>(
|
||||
array: &PrimitiveArray<I>,
|
||||
op: F,
|
||||
) -> Result<PrimitiveArray<O>, ArrowError>
|
||||
pub fn try_unary<I, F, O>(array: &PrimitiveArray<I>, op: F) -> Result<PrimitiveArray<O>, ArrowError>
|
||||
where
|
||||
I: ArrowPrimitiveType,
|
||||
O: ArrowPrimitiveType,
|
||||
@@ -86,10 +83,7 @@ where
|
||||
}
|
||||
|
||||
/// A helper function that applies a fallible unary function to a dictionary array with primitive value type.
|
||||
fn try_unary_dict<K, F, T>(
|
||||
array: &DictionaryArray<K>,
|
||||
op: F,
|
||||
) -> Result<ArrayRef, ArrowError>
|
||||
fn try_unary_dict<K, F, T>(array: &DictionaryArray<K>, op: F) -> Result<ArrayRef, ArrowError>
|
||||
where
|
||||
K: ArrowDictionaryKeyType + ArrowNumericType,
|
||||
T: ArrowPrimitiveType,
|
||||
@@ -299,8 +293,7 @@ where
|
||||
try_binary_no_nulls(len, a, b, op)
|
||||
} else {
|
||||
let nulls =
|
||||
NullBuffer::union(a.logical_nulls().as_ref(), b.logical_nulls().as_ref())
|
||||
.unwrap();
|
||||
NullBuffer::union(a.logical_nulls().as_ref(), b.logical_nulls().as_ref()).unwrap();
|
||||
|
||||
let mut buffer = BufferBuilder::<O::Native>::new(len);
|
||||
buffer.append_n_zeroed(len);
|
||||
@@ -308,8 +301,7 @@ where
|
||||
|
||||
nulls.try_for_each_valid_idx(|idx| {
|
||||
unsafe {
|
||||
*slice.get_unchecked_mut(idx) =
|
||||
op(a.value_unchecked(idx), b.value_unchecked(idx))?
|
||||
*slice.get_unchecked_mut(idx) = op(a.value_unchecked(idx), b.value_unchecked(idx))?
|
||||
};
|
||||
Ok::<_, ArrowError>(())
|
||||
})?;
|
||||
@@ -360,8 +352,7 @@ where
|
||||
try_binary_no_nulls_mut(len, a, b, op)
|
||||
} else {
|
||||
let nulls =
|
||||
NullBuffer::union(a.logical_nulls().as_ref(), b.logical_nulls().as_ref())
|
||||
.unwrap();
|
||||
NullBuffer::union(a.logical_nulls().as_ref(), b.logical_nulls().as_ref()).unwrap();
|
||||
|
||||
let mut builder = a.into_builder()?;
|
||||
|
||||
@@ -440,8 +431,7 @@ mod tests {
|
||||
#[test]
|
||||
#[allow(deprecated)]
|
||||
fn test_unary_f64_slice() {
|
||||
let input =
|
||||
Float64Array::from(vec![Some(5.1f64), None, Some(6.8), None, Some(7.2)]);
|
||||
let input = Float64Array::from(vec![Some(5.1f64), None, Some(6.8), None, Some(7.2)]);
|
||||
let input_slice = input.slice(1, 4);
|
||||
let result = unary(&input_slice, |n| n.round());
|
||||
assert_eq!(
|
||||
|
||||
@@ -212,10 +212,8 @@ mod tests {
|
||||
#[test]
|
||||
fn test_bitwise_shift_left() {
|
||||
let left = UInt64Array::from(vec![Some(1), Some(2), None, Some(4), Some(8)]);
|
||||
let right =
|
||||
UInt64Array::from(vec![Some(5), Some(10), Some(8), Some(12), Some(u64::MAX)]);
|
||||
let expected =
|
||||
UInt64Array::from(vec![Some(32), Some(2048), None, Some(16384), Some(0)]);
|
||||
let right = UInt64Array::from(vec![Some(5), Some(10), Some(8), Some(12), Some(u64::MAX)]);
|
||||
let expected = UInt64Array::from(vec![Some(32), Some(2048), None, Some(16384), Some(0)]);
|
||||
let result = bitwise_shift_left(&left, &right).unwrap();
|
||||
assert_eq!(expected, result);
|
||||
}
|
||||
@@ -224,18 +222,15 @@ mod tests {
|
||||
fn test_bitwise_shift_left_scalar() {
|
||||
let left = UInt64Array::from(vec![Some(1), Some(2), None, Some(4), Some(8)]);
|
||||
let scalar = 2;
|
||||
let expected =
|
||||
UInt64Array::from(vec![Some(4), Some(8), None, Some(16), Some(32)]);
|
||||
let expected = UInt64Array::from(vec![Some(4), Some(8), None, Some(16), Some(32)]);
|
||||
let result = bitwise_shift_left_scalar(&left, scalar).unwrap();
|
||||
assert_eq!(expected, result);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bitwise_shift_right() {
|
||||
let left =
|
||||
UInt64Array::from(vec![Some(32), Some(2048), None, Some(16384), Some(3)]);
|
||||
let right =
|
||||
UInt64Array::from(vec![Some(5), Some(10), Some(8), Some(12), Some(65)]);
|
||||
let left = UInt64Array::from(vec![Some(32), Some(2048), None, Some(16384), Some(3)]);
|
||||
let right = UInt64Array::from(vec![Some(5), Some(10), Some(8), Some(12), Some(65)]);
|
||||
let expected = UInt64Array::from(vec![Some(1), Some(2), None, Some(4), Some(1)]);
|
||||
let result = bitwise_shift_right(&left, &right).unwrap();
|
||||
assert_eq!(expected, result);
|
||||
@@ -243,11 +238,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_bitwise_shift_right_scalar() {
|
||||
let left =
|
||||
UInt64Array::from(vec![Some(32), Some(2048), None, Some(16384), Some(3)]);
|
||||
let left = UInt64Array::from(vec![Some(32), Some(2048), None, Some(16384), Some(3)]);
|
||||
let scalar = 2;
|
||||
let expected =
|
||||
UInt64Array::from(vec![Some(8), Some(512), None, Some(4096), Some(0)]);
|
||||
let expected = UInt64Array::from(vec![Some(8), Some(512), None, Some(4096), Some(0)]);
|
||||
let result = bitwise_shift_right_scalar(&left, scalar).unwrap();
|
||||
assert_eq!(expected, result);
|
||||
}
|
||||
|
||||
+11
-28
@@ -57,10 +57,7 @@ use arrow_schema::ArrowError;
|
||||
/// # Fails
|
||||
///
|
||||
/// If the operands have different lengths
|
||||
pub fn and_kleene(
|
||||
left: &BooleanArray,
|
||||
right: &BooleanArray,
|
||||
) -> Result<BooleanArray, ArrowError> {
|
||||
pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, ArrowError> {
|
||||
if left.len() != right.len() {
|
||||
return Err(ArrowError::ComputeError(
|
||||
"Cannot perform bitwise operation on arrays of different length".to_string(),
|
||||
@@ -155,10 +152,7 @@ pub fn and_kleene(
|
||||
/// # Fails
|
||||
///
|
||||
/// If the operands have different lengths
|
||||
pub fn or_kleene(
|
||||
left: &BooleanArray,
|
||||
right: &BooleanArray,
|
||||
) -> Result<BooleanArray, ArrowError> {
|
||||
pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, ArrowError> {
|
||||
if left.len() != right.len() {
|
||||
return Err(ArrowError::ComputeError(
|
||||
"Cannot perform bitwise operation on arrays of different length".to_string(),
|
||||
@@ -257,10 +251,7 @@ where
|
||||
/// let and_ab = and(&a, &b).unwrap();
|
||||
/// assert_eq!(and_ab, BooleanArray::from(vec![Some(false), Some(true), None]));
|
||||
/// ```
|
||||
pub fn and(
|
||||
left: &BooleanArray,
|
||||
right: &BooleanArray,
|
||||
) -> Result<BooleanArray, ArrowError> {
|
||||
pub fn and(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, ArrowError> {
|
||||
binary_boolean_kernel(left, right, |a, b| a & b)
|
||||
}
|
||||
|
||||
@@ -581,8 +572,7 @@ mod tests {
|
||||
let a = a.as_any().downcast_ref::<BooleanArray>().unwrap();
|
||||
let c = not(a).unwrap();
|
||||
|
||||
let expected =
|
||||
BooleanArray::from(vec![Some(false), Some(true), None, Some(false)]);
|
||||
let expected = BooleanArray::from(vec![Some(false), Some(true), None, Some(false)]);
|
||||
|
||||
assert_eq!(c, expected);
|
||||
}
|
||||
@@ -631,12 +621,10 @@ mod tests {
|
||||
#[test]
|
||||
fn test_bool_array_and_sliced_same_offset() {
|
||||
let a = BooleanArray::from(vec![
|
||||
false, false, false, false, false, false, false, false, false, false, true,
|
||||
true,
|
||||
false, false, false, false, false, false, false, false, false, false, true, true,
|
||||
]);
|
||||
let b = BooleanArray::from(vec![
|
||||
false, false, false, false, false, false, false, false, false, true, false,
|
||||
true,
|
||||
false, false, false, false, false, false, false, false, false, true, false, true,
|
||||
]);
|
||||
|
||||
let a = a.slice(8, 4);
|
||||
@@ -654,12 +642,10 @@ mod tests {
|
||||
#[test]
|
||||
fn test_bool_array_and_sliced_same_offset_mod8() {
|
||||
let a = BooleanArray::from(vec![
|
||||
false, false, true, true, false, false, false, false, false, false, false,
|
||||
false,
|
||||
false, false, true, true, false, false, false, false, false, false, false, false,
|
||||
]);
|
||||
let b = BooleanArray::from(vec![
|
||||
false, false, false, false, false, false, false, false, false, true, false,
|
||||
true,
|
||||
false, false, false, false, false, false, false, false, false, true, false, true,
|
||||
]);
|
||||
|
||||
let a = a.slice(0, 4);
|
||||
@@ -677,8 +663,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_bool_array_and_sliced_offset1() {
|
||||
let a = BooleanArray::from(vec![
|
||||
false, false, false, false, false, false, false, false, false, false, true,
|
||||
true,
|
||||
false, false, false, false, false, false, false, false, false, false, true, true,
|
||||
]);
|
||||
let b = BooleanArray::from(vec![false, true, false, true]);
|
||||
|
||||
@@ -696,8 +681,7 @@ mod tests {
|
||||
fn test_bool_array_and_sliced_offset2() {
|
||||
let a = BooleanArray::from(vec![false, false, true, true]);
|
||||
let b = BooleanArray::from(vec![
|
||||
false, false, false, false, false, false, false, false, false, true, false,
|
||||
true,
|
||||
false, false, false, false, false, false, false, false, false, true, false, true,
|
||||
]);
|
||||
|
||||
let b = b.slice(8, 4);
|
||||
@@ -730,8 +714,7 @@ mod tests {
|
||||
|
||||
let c = and(a, b).unwrap();
|
||||
|
||||
let expected =
|
||||
BooleanArray::from(vec![Some(false), Some(false), None, Some(true)]);
|
||||
let expected = BooleanArray::from(vec![Some(false), Some(false), None, Some(true)]);
|
||||
|
||||
assert_eq!(expected, c);
|
||||
}
|
||||
|
||||
+15
-20
@@ -144,13 +144,13 @@ pub fn neg(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
|
||||
let a = array
|
||||
.as_primitive::<IntervalMonthDayNanoType>()
|
||||
.try_unary::<_, IntervalMonthDayNanoType, ArrowError>(|x| {
|
||||
let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(x);
|
||||
Ok(IntervalMonthDayNanoType::make_value(
|
||||
months.neg_checked()?,
|
||||
days.neg_checked()?,
|
||||
nanos.neg_checked()?,
|
||||
))
|
||||
})?;
|
||||
let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(x);
|
||||
Ok(IntervalMonthDayNanoType::make_value(
|
||||
months.neg_checked()?,
|
||||
days.neg_checked()?,
|
||||
nanos.neg_checked()?,
|
||||
))
|
||||
})?;
|
||||
Ok(Arc::new(a))
|
||||
}
|
||||
t => Err(ArrowError::InvalidArgumentError(format!(
|
||||
@@ -201,11 +201,7 @@ impl Op {
|
||||
}
|
||||
|
||||
/// Dispatch the given `op` to the appropriate specialized kernel
|
||||
fn arithmetic_op(
|
||||
op: Op,
|
||||
lhs: &dyn Datum,
|
||||
rhs: &dyn Datum,
|
||||
) -> Result<ArrayRef, ArrowError> {
|
||||
fn arithmetic_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result<ArrayRef, ArrowError> {
|
||||
use DataType::*;
|
||||
use IntervalUnit::*;
|
||||
use TimeUnit::*;
|
||||
@@ -675,8 +671,7 @@ fn date_op<T: DateOp>(
|
||||
(Date64, Op::Sub | Op::SubWrapping, Date64) => {
|
||||
let l = l.as_primitive::<Date64Type>();
|
||||
let r = r.as_primitive::<Date64Type>();
|
||||
let result =
|
||||
try_op_ref!(DurationMillisecondType, l, l_s, r, r_s, l.sub_checked(r));
|
||||
let result = try_op_ref!(DurationMillisecondType, l, l_s, r, r_s, l.sub_checked(r));
|
||||
return Ok(result);
|
||||
}
|
||||
_ => {}
|
||||
@@ -800,8 +795,7 @@ fn decimal_op<T: DecimalType>(
|
||||
let mul_pow = result_scale - s1 + s2;
|
||||
|
||||
// p1 - s1 + s2 + result_scale
|
||||
let result_precision =
|
||||
(mul_pow.saturating_add(*p1 as i8) as u8).min(T::MAX_PRECISION);
|
||||
let result_precision = (mul_pow.saturating_add(*p1 as i8) as u8).min(T::MAX_PRECISION);
|
||||
|
||||
let (l_mul, r_mul) = match mul_pow.cmp(&0) {
|
||||
Ordering::Greater => (
|
||||
@@ -1158,7 +1152,10 @@ mod tests {
|
||||
.with_precision_and_scale(3, -1)
|
||||
.unwrap();
|
||||
let err = add(&a, &b).unwrap_err().to_string();
|
||||
assert_eq!(err, "Compute error: Overflow happened on: 10 * 100000000000000000000000000000000000000");
|
||||
assert_eq!(
|
||||
err,
|
||||
"Compute error: Overflow happened on: 10 * 100000000000000000000000000000000000000"
|
||||
);
|
||||
|
||||
let b = Decimal128Array::from(vec![0])
|
||||
.with_precision_and_scale(1, 1)
|
||||
@@ -1199,9 +1196,7 @@ mod tests {
|
||||
"1960-01-30T04:23:20Z",
|
||||
]
|
||||
.into_iter()
|
||||
.map(|x| {
|
||||
T::make_value(DateTime::parse_from_rfc3339(x).unwrap().naive_utc()).unwrap()
|
||||
})
|
||||
.map(|x| T::make_value(DateTime::parse_from_rfc3339(x).unwrap().naive_utc()).unwrap())
|
||||
.collect();
|
||||
|
||||
let a = PrimitiveArray::<T>::new(values, None);
|
||||
|
||||
+33
-75
@@ -23,9 +23,7 @@ use chrono::{DateTime, Datelike, NaiveDateTime, NaiveTime, Offset, Timelike};
|
||||
|
||||
use arrow_array::builder::*;
|
||||
use arrow_array::iterator::ArrayIter;
|
||||
use arrow_array::temporal_conversions::{
|
||||
as_datetime, as_datetime_with_timezone, as_time,
|
||||
};
|
||||
use arrow_array::temporal_conversions::{as_datetime, as_datetime_with_timezone, as_time};
|
||||
use arrow_array::timezone::Tz;
|
||||
use arrow_array::types::*;
|
||||
use arrow_array::*;
|
||||
@@ -209,12 +207,9 @@ where
|
||||
}
|
||||
DataType::Timestamp(_, Some(tz)) => {
|
||||
let iter = ArrayIter::new(array);
|
||||
extract_component_from_datetime_array::<&PrimitiveArray<T>, T, _>(
|
||||
iter,
|
||||
b,
|
||||
tz,
|
||||
|t| t.hour() as i32,
|
||||
)
|
||||
extract_component_from_datetime_array::<&PrimitiveArray<T>, T, _>(iter, b, tz, |t| {
|
||||
t.hour() as i32
|
||||
})
|
||||
}
|
||||
_ => return_compute_error_with!("hour does not support", array.data_type()),
|
||||
}
|
||||
@@ -289,9 +284,7 @@ pub fn num_days_from_monday_dyn(array: &dyn Array) -> Result<ArrayRef, ArrowErro
|
||||
/// Monday is encoded as `0`, Tuesday as `1`, etc.
|
||||
///
|
||||
/// See also [`num_days_from_sunday`] which starts at Sunday.
|
||||
pub fn num_days_from_monday<T>(
|
||||
array: &PrimitiveArray<T>,
|
||||
) -> Result<Int32Array, ArrowError>
|
||||
pub fn num_days_from_monday<T>(array: &PrimitiveArray<T>) -> Result<Int32Array, ArrowError>
|
||||
where
|
||||
T: ArrowTemporalType + ArrowNumericType,
|
||||
i64: From<T::Native>,
|
||||
@@ -318,9 +311,7 @@ pub fn num_days_from_sunday_dyn(array: &dyn Array) -> Result<ArrayRef, ArrowErro
|
||||
/// Sunday is encoded as `0`, Monday as `1`, etc.
|
||||
///
|
||||
/// See also [`num_days_from_monday`] which starts at Monday.
|
||||
pub fn num_days_from_sunday<T>(
|
||||
array: &PrimitiveArray<T>,
|
||||
) -> Result<Int32Array, ArrowError>
|
||||
pub fn num_days_from_sunday<T>(array: &PrimitiveArray<T>) -> Result<Int32Array, ArrowError>
|
||||
where
|
||||
T: ArrowTemporalType + ArrowNumericType,
|
||||
i64: From<T::Native>,
|
||||
@@ -449,11 +440,7 @@ pub fn millisecond_dyn(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
|
||||
}
|
||||
|
||||
/// Extracts the time fraction of a given temporal array as an array of integers
|
||||
fn time_fraction_dyn<F>(
|
||||
array: &dyn Array,
|
||||
name: &str,
|
||||
op: F,
|
||||
) -> Result<ArrayRef, ArrowError>
|
||||
fn time_fraction_dyn<F>(array: &dyn Array, name: &str, op: F) -> Result<ArrayRef, ArrowError>
|
||||
where
|
||||
F: Fn(NaiveDateTime) -> i32,
|
||||
{
|
||||
@@ -498,14 +485,9 @@ where
|
||||
}
|
||||
DataType::Timestamp(_, Some(tz)) => {
|
||||
let iter = ArrayIter::new(array);
|
||||
extract_component_from_datetime_array::<_, T, _>(iter, b, tz, |t| {
|
||||
op(t.naive_local())
|
||||
})
|
||||
extract_component_from_datetime_array::<_, T, _>(iter, b, tz, |t| op(t.naive_local()))
|
||||
}
|
||||
_ => return_compute_error_with!(
|
||||
format!("{name} does not support"),
|
||||
array.data_type()
|
||||
),
|
||||
_ => return_compute_error_with!(format!("{name} does not support"), array.data_type()),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -559,8 +541,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_temporal_array_time64_micro_hour() {
|
||||
let a: PrimitiveArray<Time64MicrosecondType> =
|
||||
vec![37800000000, 86339000000].into();
|
||||
let a: PrimitiveArray<Time64MicrosecondType> = vec![37800000000, 86339000000].into();
|
||||
|
||||
let b = hour(&a).unwrap();
|
||||
assert_eq!(10, b.value(0));
|
||||
@@ -623,12 +604,10 @@ mod tests {
|
||||
#[test]
|
||||
fn test_temporal_array_timestamp_quarter_with_timezone() {
|
||||
// 24 * 60 * 60 = 86400
|
||||
let a = TimestampSecondArray::from(vec![86400 * 90])
|
||||
.with_timezone("+00:00".to_string());
|
||||
let a = TimestampSecondArray::from(vec![86400 * 90]).with_timezone("+00:00".to_string());
|
||||
let b = quarter(&a).unwrap();
|
||||
assert_eq!(2, b.value(0));
|
||||
let a = TimestampSecondArray::from(vec![86400 * 90])
|
||||
.with_timezone("-10:00".to_string());
|
||||
let a = TimestampSecondArray::from(vec![86400 * 90]).with_timezone("-10:00".to_string());
|
||||
let b = quarter(&a).unwrap();
|
||||
assert_eq!(1, b.value(0));
|
||||
}
|
||||
@@ -659,12 +638,10 @@ mod tests {
|
||||
#[test]
|
||||
fn test_temporal_array_timestamp_month_with_timezone() {
|
||||
// 24 * 60 * 60 = 86400
|
||||
let a = TimestampSecondArray::from(vec![86400 * 31])
|
||||
.with_timezone("+00:00".to_string());
|
||||
let a = TimestampSecondArray::from(vec![86400 * 31]).with_timezone("+00:00".to_string());
|
||||
let b = month(&a).unwrap();
|
||||
assert_eq!(2, b.value(0));
|
||||
let a = TimestampSecondArray::from(vec![86400 * 31])
|
||||
.with_timezone("-10:00".to_string());
|
||||
let a = TimestampSecondArray::from(vec![86400 * 31]).with_timezone("-10:00".to_string());
|
||||
let b = month(&a).unwrap();
|
||||
assert_eq!(1, b.value(0));
|
||||
}
|
||||
@@ -672,12 +649,10 @@ mod tests {
|
||||
#[test]
|
||||
fn test_temporal_array_timestamp_day_with_timezone() {
|
||||
// 24 * 60 * 60 = 86400
|
||||
let a =
|
||||
TimestampSecondArray::from(vec![86400]).with_timezone("+00:00".to_string());
|
||||
let a = TimestampSecondArray::from(vec![86400]).with_timezone("+00:00".to_string());
|
||||
let b = day(&a).unwrap();
|
||||
assert_eq!(2, b.value(0));
|
||||
let a =
|
||||
TimestampSecondArray::from(vec![86400]).with_timezone("-10:00".to_string());
|
||||
let a = TimestampSecondArray::from(vec![86400]).with_timezone("-10:00".to_string());
|
||||
let b = day(&a).unwrap();
|
||||
assert_eq!(1, b.value(0));
|
||||
}
|
||||
@@ -857,8 +832,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_temporal_array_timestamp_second_with_timezone() {
|
||||
let a =
|
||||
TimestampSecondArray::from(vec![10, 20]).with_timezone("+00:00".to_string());
|
||||
let a = TimestampSecondArray::from(vec![10, 20]).with_timezone("+00:00".to_string());
|
||||
let b = second(&a).unwrap();
|
||||
assert_eq!(10, b.value(0));
|
||||
assert_eq!(20, b.value(1));
|
||||
@@ -866,8 +840,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_temporal_array_timestamp_minute_with_timezone() {
|
||||
let a =
|
||||
TimestampSecondArray::from(vec![0, 60]).with_timezone("+00:50".to_string());
|
||||
let a = TimestampSecondArray::from(vec![0, 60]).with_timezone("+00:50".to_string());
|
||||
let b = minute(&a).unwrap();
|
||||
assert_eq!(50, b.value(0));
|
||||
assert_eq!(51, b.value(1));
|
||||
@@ -875,48 +848,42 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_temporal_array_timestamp_minute_with_negative_timezone() {
|
||||
let a =
|
||||
TimestampSecondArray::from(vec![60 * 55]).with_timezone("-00:50".to_string());
|
||||
let a = TimestampSecondArray::from(vec![60 * 55]).with_timezone("-00:50".to_string());
|
||||
let b = minute(&a).unwrap();
|
||||
assert_eq!(5, b.value(0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_temporal_array_timestamp_hour_with_timezone() {
|
||||
let a = TimestampSecondArray::from(vec![60 * 60 * 10])
|
||||
.with_timezone("+01:00".to_string());
|
||||
let a = TimestampSecondArray::from(vec![60 * 60 * 10]).with_timezone("+01:00".to_string());
|
||||
let b = hour(&a).unwrap();
|
||||
assert_eq!(11, b.value(0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_temporal_array_timestamp_hour_with_timezone_without_colon() {
|
||||
let a = TimestampSecondArray::from(vec![60 * 60 * 10])
|
||||
.with_timezone("+0100".to_string());
|
||||
let a = TimestampSecondArray::from(vec![60 * 60 * 10]).with_timezone("+0100".to_string());
|
||||
let b = hour(&a).unwrap();
|
||||
assert_eq!(11, b.value(0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_temporal_array_timestamp_hour_with_timezone_without_minutes() {
|
||||
let a = TimestampSecondArray::from(vec![60 * 60 * 10])
|
||||
.with_timezone("+01".to_string());
|
||||
let a = TimestampSecondArray::from(vec![60 * 60 * 10]).with_timezone("+01".to_string());
|
||||
let b = hour(&a).unwrap();
|
||||
assert_eq!(11, b.value(0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_temporal_array_timestamp_hour_with_timezone_without_initial_sign() {
|
||||
let a = TimestampSecondArray::from(vec![60 * 60 * 10])
|
||||
.with_timezone("0100".to_string());
|
||||
let a = TimestampSecondArray::from(vec![60 * 60 * 10]).with_timezone("0100".to_string());
|
||||
let err = hour(&a).unwrap_err().to_string();
|
||||
assert!(err.contains("Invalid timezone"), "{}", err);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_temporal_array_timestamp_hour_with_timezone_with_only_colon() {
|
||||
let a = TimestampSecondArray::from(vec![60 * 60 * 10])
|
||||
.with_timezone("01:00".to_string());
|
||||
let a = TimestampSecondArray::from(vec![60 * 60 * 10]).with_timezone("01:00".to_string());
|
||||
let err = hour(&a).unwrap_err().to_string();
|
||||
assert!(err.contains("Invalid timezone"), "{}", err);
|
||||
}
|
||||
@@ -960,10 +927,8 @@ mod tests {
|
||||
|
||||
let b = hour_dyn(&dict).unwrap();
|
||||
|
||||
let expected_dict = DictionaryArray::new(
|
||||
keys.clone(),
|
||||
Arc::new(Int32Array::from(vec![11, 21, 7])),
|
||||
);
|
||||
let expected_dict =
|
||||
DictionaryArray::new(keys.clone(), Arc::new(Int32Array::from(vec![11, 21, 7])));
|
||||
let expected = Arc::new(expected_dict) as ArrayRef;
|
||||
assert_eq!(&expected, &b);
|
||||
|
||||
@@ -987,8 +952,7 @@ mod tests {
|
||||
assert_eq!(&expected, &b);
|
||||
assert_eq!(&expected, &b_old);
|
||||
|
||||
let b =
|
||||
time_fraction_dyn(&dict, "nanosecond", |t| t.nanosecond() as i32).unwrap();
|
||||
let b = time_fraction_dyn(&dict, "nanosecond", |t| t.nanosecond() as i32).unwrap();
|
||||
|
||||
let expected_dict =
|
||||
DictionaryArray::new(keys, Arc::new(Int32Array::from(vec![0, 0, 0, 0, 0])));
|
||||
@@ -998,8 +962,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_year_dictionary_array() {
|
||||
let a: PrimitiveArray<Date64Type> =
|
||||
vec![Some(1514764800000), Some(1550636625000)].into();
|
||||
let a: PrimitiveArray<Date64Type> = vec![Some(1514764800000), Some(1550636625000)].into();
|
||||
|
||||
let keys = Int8Array::from_iter_values([0_i8, 1, 1, 0]);
|
||||
let dict = DictionaryArray::new(keys.clone(), Arc::new(a));
|
||||
@@ -1018,24 +981,20 @@ mod tests {
|
||||
fn test_quarter_month_dictionary_array() {
|
||||
//1514764800000 -> 2018-01-01
|
||||
//1566275025000 -> 2019-08-20
|
||||
let a: PrimitiveArray<Date64Type> =
|
||||
vec![Some(1514764800000), Some(1566275025000)].into();
|
||||
let a: PrimitiveArray<Date64Type> = vec![Some(1514764800000), Some(1566275025000)].into();
|
||||
|
||||
let keys = Int8Array::from_iter_values([0_i8, 1, 1, 0]);
|
||||
let dict = DictionaryArray::new(keys.clone(), Arc::new(a));
|
||||
|
||||
let b = quarter_dyn(&dict).unwrap();
|
||||
|
||||
let expected = DictionaryArray::new(
|
||||
keys.clone(),
|
||||
Arc::new(Int32Array::from(vec![1, 3, 3, 1])),
|
||||
);
|
||||
let expected =
|
||||
DictionaryArray::new(keys.clone(), Arc::new(Int32Array::from(vec![1, 3, 3, 1])));
|
||||
assert_eq!(b.as_ref(), &expected);
|
||||
|
||||
let b = month_dyn(&dict).unwrap();
|
||||
|
||||
let expected =
|
||||
DictionaryArray::new(keys, Arc::new(Int32Array::from(vec![1, 8, 8, 1])));
|
||||
let expected = DictionaryArray::new(keys, Arc::new(Int32Array::from(vec![1, 8, 8, 1])));
|
||||
assert_eq!(b.as_ref(), &expected);
|
||||
}
|
||||
|
||||
@@ -1043,8 +1002,7 @@ mod tests {
|
||||
fn test_num_days_from_monday_sunday_day_doy_week_dictionary_array() {
|
||||
//1514764800000 -> 2018-01-01 (Monday)
|
||||
//1550636625000 -> 2019-02-20 (Wednesday)
|
||||
let a: PrimitiveArray<Date64Type> =
|
||||
vec![Some(1514764800000), Some(1550636625000)].into();
|
||||
let a: PrimitiveArray<Date64Type> = vec![Some(1514764800000), Some(1550636625000)].into();
|
||||
|
||||
let keys = Int8Array::from(vec![Some(0_i8), Some(1), Some(1), Some(0), None]);
|
||||
let dict = DictionaryArray::new(keys.clone(), Arc::new(a));
|
||||
|
||||
@@ -229,10 +229,7 @@ macro_rules! native_type_op {
|
||||
#[inline]
|
||||
fn pow_checked(self, exp: u32) -> Result<Self, ArrowError> {
|
||||
self.checked_pow(exp).ok_or_else(|| {
|
||||
ArrowError::ComputeError(format!(
|
||||
"Overflow happened on: {:?} ^ {exp:?}",
|
||||
self
|
||||
))
|
||||
ArrowError::ComputeError(format!("Overflow happened on: {:?} ^ {exp:?}", self))
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -16,9 +16,7 @@
|
||||
// under the License.
|
||||
|
||||
use crate::types::{ByteArrayType, GenericBinaryType};
|
||||
use crate::{
|
||||
Array, GenericByteArray, GenericListArray, GenericStringArray, OffsetSizeTrait,
|
||||
};
|
||||
use crate::{Array, GenericByteArray, GenericListArray, GenericStringArray, OffsetSizeTrait};
|
||||
use arrow_data::ArrayData;
|
||||
use arrow_schema::DataType;
|
||||
|
||||
@@ -102,9 +100,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericBinaryArray<OffsetSize> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<&[u8]>>>
|
||||
for GenericBinaryArray<OffsetSize>
|
||||
{
|
||||
impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<&[u8]>>> for GenericBinaryArray<OffsetSize> {
|
||||
fn from(v: Vec<Option<&[u8]>>) -> Self {
|
||||
Self::from_opt_vec(v)
|
||||
}
|
||||
@@ -376,9 +372,11 @@ mod tests {
|
||||
.unwrap();
|
||||
let binary_array1 = GenericBinaryArray::<O>::from(array_data1);
|
||||
|
||||
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
|
||||
Field::new("item", DataType::UInt8, false),
|
||||
));
|
||||
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
|
||||
"item",
|
||||
DataType::UInt8,
|
||||
false,
|
||||
)));
|
||||
|
||||
let array_data2 = ArrayData::builder(data_type)
|
||||
.len(3)
|
||||
@@ -423,9 +421,11 @@ mod tests {
|
||||
|
||||
let offsets = [0, 5, 8, 15].map(|n| O::from_usize(n).unwrap());
|
||||
let null_buffer = Buffer::from_slice_ref([0b101]);
|
||||
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
|
||||
Field::new("item", DataType::UInt8, false),
|
||||
));
|
||||
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
|
||||
"item",
|
||||
DataType::UInt8,
|
||||
false,
|
||||
)));
|
||||
|
||||
// [None, Some(b"Parquet")]
|
||||
let array_data = ArrayData::builder(data_type)
|
||||
@@ -456,9 +456,7 @@ mod tests {
|
||||
_test_generic_binary_array_from_list_array_with_offset::<i64>();
|
||||
}
|
||||
|
||||
fn _test_generic_binary_array_from_list_array_with_child_nulls_failed<
|
||||
O: OffsetSizeTrait,
|
||||
>() {
|
||||
fn _test_generic_binary_array_from_list_array_with_child_nulls_failed<O: OffsetSizeTrait>() {
|
||||
let values = b"HelloArrow";
|
||||
let child_data = ArrayData::builder(DataType::UInt8)
|
||||
.len(10)
|
||||
@@ -468,9 +466,11 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
let offsets = [0, 5, 10].map(|n| O::from_usize(n).unwrap());
|
||||
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
|
||||
Field::new("item", DataType::UInt8, true),
|
||||
));
|
||||
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
|
||||
"item",
|
||||
DataType::UInt8,
|
||||
true,
|
||||
)));
|
||||
|
||||
// [None, Some(b"Parquet")]
|
||||
let array_data = ArrayData::builder(data_type)
|
||||
@@ -558,8 +558,7 @@ mod tests {
|
||||
.unwrap();
|
||||
let offsets: [i32; 4] = [0, 5, 5, 12];
|
||||
|
||||
let data_type =
|
||||
DataType::List(Arc::new(Field::new("item", DataType::UInt32, false)));
|
||||
let data_type = DataType::List(Arc::new(Field::new("item", DataType::UInt32, false)));
|
||||
let array_data = ArrayData::builder(data_type)
|
||||
.len(3)
|
||||
.add_buffer(Buffer::from_slice_ref(offsets))
|
||||
@@ -575,8 +574,7 @@ mod tests {
|
||||
expected = "Trying to access an element at index 4 from a BinaryArray of length 3"
|
||||
)]
|
||||
fn test_binary_array_get_value_index_out_of_bound() {
|
||||
let values: [u8; 12] =
|
||||
[104, 101, 108, 108, 111, 112, 97, 114, 113, 117, 101, 116];
|
||||
let values: [u8; 12] = [104, 101, 108, 108, 111, 112, 97, 114, 113, 117, 101, 116];
|
||||
let offsets: [i32; 4] = [0, 5, 5, 12];
|
||||
let array_data = ArrayData::builder(DataType::Binary)
|
||||
.len(3)
|
||||
|
||||
@@ -238,11 +238,7 @@ impl BooleanArray {
|
||||
///
|
||||
/// This function panics if left and right are not the same length
|
||||
///
|
||||
pub fn from_binary<T: ArrayAccessor, S: ArrayAccessor, F>(
|
||||
left: T,
|
||||
right: S,
|
||||
mut op: F,
|
||||
) -> Self
|
||||
pub fn from_binary<T: ArrayAccessor, S: ArrayAccessor, F>(left: T, right: S, mut op: F) -> Self
|
||||
where
|
||||
F: FnMut(T::Item, S::Item) -> bool,
|
||||
{
|
||||
@@ -362,8 +358,7 @@ impl From<ArrayData> for BooleanArray {
|
||||
1,
|
||||
"BooleanArray data should contain a single buffer only (values buffer)"
|
||||
);
|
||||
let values =
|
||||
BooleanBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
|
||||
let values = BooleanBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
|
||||
|
||||
Self {
|
||||
values,
|
||||
@@ -591,9 +586,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(
|
||||
expected = "BooleanArray expected ArrayData with type Boolean got Int32"
|
||||
)]
|
||||
#[should_panic(expected = "BooleanArray expected ArrayData with type Boolean got Int32")]
|
||||
fn test_from_array_data_validation() {
|
||||
let _ = BooleanArray::from(ArrayData::new_empty(&DataType::Int32));
|
||||
}
|
||||
|
||||
@@ -197,8 +197,7 @@ impl<T: ByteArrayType> GenericByteArray<T> {
|
||||
let (_, data_len) = iter.size_hint();
|
||||
let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.
|
||||
|
||||
let mut offsets =
|
||||
MutableBuffer::new((data_len + 1) * std::mem::size_of::<T::Offset>());
|
||||
let mut offsets = MutableBuffer::new((data_len + 1) * std::mem::size_of::<T::Offset>());
|
||||
offsets.push(T::Offset::usize_as(0));
|
||||
|
||||
let mut values = MutableBuffer::new(0);
|
||||
@@ -335,8 +334,7 @@ impl<T: ByteArrayType> GenericByteArray<T> {
|
||||
/// offset and data buffers are not shared by others.
|
||||
pub fn into_builder(self) -> Result<GenericByteBuilder<T>, Self> {
|
||||
let len = self.len();
|
||||
let value_len =
|
||||
T::Offset::as_usize(self.value_offsets()[len] - self.value_offsets()[0]);
|
||||
let value_len = T::Offset::as_usize(self.value_offsets()[len] - self.value_offsets()[0]);
|
||||
|
||||
let data = self.into_data();
|
||||
let null_bit_buffer = data.nulls().map(|b| b.inner().sliced());
|
||||
@@ -578,17 +576,14 @@ mod tests {
|
||||
|
||||
let nulls = NullBuffer::new_null(3);
|
||||
let err =
|
||||
StringArray::try_new(offsets.clone(), data.clone(), Some(nulls.clone()))
|
||||
.unwrap_err();
|
||||
StringArray::try_new(offsets.clone(), data.clone(), Some(nulls.clone())).unwrap_err();
|
||||
assert_eq!(err.to_string(), "Invalid argument error: Incorrect length of null buffer for StringArray, expected 2 got 3");
|
||||
|
||||
let err =
|
||||
BinaryArray::try_new(offsets.clone(), data.clone(), Some(nulls)).unwrap_err();
|
||||
let err = BinaryArray::try_new(offsets.clone(), data.clone(), Some(nulls)).unwrap_err();
|
||||
assert_eq!(err.to_string(), "Invalid argument error: Incorrect length of null buffer for BinaryArray, expected 2 got 3");
|
||||
|
||||
let non_utf8_data = Buffer::from_slice_ref(b"he\xFFloworld");
|
||||
let err = StringArray::try_new(offsets.clone(), non_utf8_data.clone(), None)
|
||||
.unwrap_err();
|
||||
let err = StringArray::try_new(offsets.clone(), non_utf8_data.clone(), None).unwrap_err();
|
||||
assert_eq!(err.to_string(), "Invalid argument error: Encountered non UTF-8 data: invalid utf-8 sequence of 1 bytes from index 2");
|
||||
|
||||
BinaryArray::new(offsets, non_utf8_data, None);
|
||||
@@ -611,8 +606,7 @@ mod tests {
|
||||
BinaryArray::new(offsets, non_ascii_data.clone(), None);
|
||||
|
||||
let offsets = OffsetBuffer::new(vec![0, 3, 10].into());
|
||||
let err = StringArray::try_new(offsets.clone(), non_ascii_data.clone(), None)
|
||||
.unwrap_err();
|
||||
let err = StringArray::try_new(offsets.clone(), non_ascii_data.clone(), None).unwrap_err();
|
||||
assert_eq!(
|
||||
err.to_string(),
|
||||
"Invalid argument error: Split UTF-8 codepoint at offset 3"
|
||||
|
||||
@@ -286,10 +286,7 @@ impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns an error if any `keys[i] >= values.len() || keys[i] < 0`
|
||||
pub fn try_new(
|
||||
keys: PrimitiveArray<K>,
|
||||
values: ArrayRef,
|
||||
) -> Result<Self, ArrowError> {
|
||||
pub fn try_new(keys: PrimitiveArray<K>, values: ArrayRef) -> Result<Self, ArrowError> {
|
||||
let data_type = DataType::Dictionary(
|
||||
Box::new(keys.data_type().clone()),
|
||||
Box::new(values.data_type().clone()),
|
||||
@@ -298,9 +295,11 @@ impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
|
||||
let zero = K::Native::usize_as(0);
|
||||
let values_len = values.len();
|
||||
|
||||
if let Some((idx, v)) = keys.values().iter().enumerate().find(|(idx, v)| {
|
||||
(v.is_lt(zero) || v.as_usize() >= values_len) && keys.is_valid(*idx)
|
||||
}) {
|
||||
if let Some((idx, v)) =
|
||||
keys.values().iter().enumerate().find(|(idx, v)| {
|
||||
(v.is_lt(zero) || v.as_usize() >= values_len) && keys.is_valid(*idx)
|
||||
})
|
||||
{
|
||||
return Err(ArrowError::InvalidArgumentError(format!(
|
||||
"Invalid dictionary key {v:?} at index {idx}, expected 0 <= key < {values_len}",
|
||||
)));
|
||||
@@ -349,8 +348,7 @@ impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
|
||||
///
|
||||
/// Panics if `values` is not a [`StringArray`].
|
||||
pub fn lookup_key(&self, value: &str) -> Option<K::Native> {
|
||||
let rd_buf: &StringArray =
|
||||
self.values.as_any().downcast_ref::<StringArray>().unwrap();
|
||||
let rd_buf: &StringArray = self.values.as_any().downcast_ref::<StringArray>().unwrap();
|
||||
|
||||
(0..rd_buf.len())
|
||||
.position(|i| rd_buf.value(i) == value)
|
||||
@@ -463,10 +461,8 @@ impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
|
||||
///
|
||||
pub fn with_values(&self, values: ArrayRef) -> Self {
|
||||
assert!(values.len() >= self.values.len());
|
||||
let data_type = DataType::Dictionary(
|
||||
Box::new(K::DATA_TYPE),
|
||||
Box::new(values.data_type().clone()),
|
||||
);
|
||||
let data_type =
|
||||
DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(values.data_type().clone()));
|
||||
Self {
|
||||
data_type,
|
||||
keys: self.keys.clone(),
|
||||
@@ -477,9 +473,7 @@ impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
|
||||
|
||||
/// Returns `PrimitiveDictionaryBuilder` of this dictionary array for mutating
|
||||
/// its keys and values if the underlying data buffer is not shared by others.
|
||||
pub fn into_primitive_dict_builder<V>(
|
||||
self,
|
||||
) -> Result<PrimitiveDictionaryBuilder<K, V>, Self>
|
||||
pub fn into_primitive_dict_builder<V>(self) -> Result<PrimitiveDictionaryBuilder<K, V>, Self>
|
||||
where
|
||||
V: ArrowPrimitiveType,
|
||||
{
|
||||
@@ -540,8 +534,7 @@ impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
|
||||
V: ArrowPrimitiveType,
|
||||
F: Fn(V::Native) -> V::Native,
|
||||
{
|
||||
let mut builder: PrimitiveDictionaryBuilder<K, V> =
|
||||
self.into_primitive_dict_builder()?;
|
||||
let mut builder: PrimitiveDictionaryBuilder<K, V> = self.into_primitive_dict_builder()?;
|
||||
builder
|
||||
.values_slice_mut()
|
||||
.iter_mut()
|
||||
@@ -806,9 +799,7 @@ impl<'a, K: ArrowDictionaryKeyType, V> Clone for TypedDictionaryArray<'a, K, V>
|
||||
|
||||
impl<'a, K: ArrowDictionaryKeyType, V> Copy for TypedDictionaryArray<'a, K, V> {}
|
||||
|
||||
impl<'a, K: ArrowDictionaryKeyType, V> std::fmt::Debug
|
||||
for TypedDictionaryArray<'a, K, V>
|
||||
{
|
||||
impl<'a, K: ArrowDictionaryKeyType, V> std::fmt::Debug for TypedDictionaryArray<'a, K, V> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
writeln!(f, "TypedDictionaryArray({:?})", self.dictionary)
|
||||
}
|
||||
@@ -1040,8 +1031,7 @@ mod tests {
|
||||
// Construct a dictionary array from the above two
|
||||
let key_type = DataType::Int16;
|
||||
let value_type = DataType::Int8;
|
||||
let dict_data_type =
|
||||
DataType::Dictionary(Box::new(key_type), Box::new(value_type));
|
||||
let dict_data_type = DataType::Dictionary(Box::new(key_type), Box::new(value_type));
|
||||
let dict_data = ArrayData::builder(dict_data_type.clone())
|
||||
.len(3)
|
||||
.add_buffer(keys.clone())
|
||||
@@ -1079,8 +1069,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_dictionary_array_fmt_debug() {
|
||||
let mut builder =
|
||||
PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(3, 2);
|
||||
let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(3, 2);
|
||||
builder.append(12345678).unwrap();
|
||||
builder.append_null();
|
||||
builder.append(22345678).unwrap();
|
||||
@@ -1090,8 +1079,7 @@ mod tests {
|
||||
format!("{array:?}")
|
||||
);
|
||||
|
||||
let mut builder =
|
||||
PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(20, 2);
|
||||
let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(20, 2);
|
||||
for _ in 0..20 {
|
||||
builder.append(1).unwrap();
|
||||
}
|
||||
@@ -1267,9 +1255,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(
|
||||
expected = "Invalid dictionary key 3 at index 1, expected 0 <= key < 2"
|
||||
)]
|
||||
#[should_panic(expected = "Invalid dictionary key 3 at index 1, expected 0 <= key < 2")]
|
||||
fn test_try_new_index_too_large() {
|
||||
let values: StringArray = [Some("foo"), Some("bar")].into_iter().collect();
|
||||
// dictionary only has 2 values, so offset 3 is out of bounds
|
||||
@@ -1278,9 +1264,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(
|
||||
expected = "Invalid dictionary key -100 at index 0, expected 0 <= key < 2"
|
||||
)]
|
||||
#[should_panic(expected = "Invalid dictionary key -100 at index 0, expected 0 <= key < 2")]
|
||||
fn test_try_new_index_too_small() {
|
||||
let values: StringArray = [Some("foo"), Some("bar")].into_iter().collect();
|
||||
let keys: Int32Array = [Some(-100)].into_iter().collect();
|
||||
@@ -1288,9 +1272,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(
|
||||
expected = "DictionaryArray's data type must match, expected Int64 got Int32"
|
||||
)]
|
||||
#[should_panic(expected = "DictionaryArray's data type must match, expected Int64 got Int32")]
|
||||
fn test_from_array_data_validation() {
|
||||
let a = DictionaryArray::<Int32Type>::from_iter(["32"]);
|
||||
let _ = DictionaryArray::<Int64Type>::from(a.into_data());
|
||||
@@ -1335,8 +1317,7 @@ mod tests {
|
||||
|
||||
let boxed: ArrayRef = Arc::new(dict_array);
|
||||
|
||||
let col: DictionaryArray<Int8Type> =
|
||||
DictionaryArray::<Int8Type>::from(boxed.to_data());
|
||||
let col: DictionaryArray<Int8Type> = DictionaryArray::<Int8Type>::from(boxed.to_data());
|
||||
let err = col.into_primitive_dict_builder::<Int32Type>();
|
||||
|
||||
let returned = err.unwrap_err();
|
||||
|
||||
@@ -81,10 +81,7 @@ impl FixedSizeBinaryArray {
|
||||
) -> Result<Self, ArrowError> {
|
||||
let data_type = DataType::FixedSizeBinary(size);
|
||||
let s = size.to_usize().ok_or_else(|| {
|
||||
ArrowError::InvalidArgumentError(format!(
|
||||
"Size cannot be negative, got {}",
|
||||
size
|
||||
))
|
||||
ArrowError::InvalidArgumentError(format!("Size cannot be negative, got {}", size))
|
||||
})?;
|
||||
|
||||
let len = values.len() / s;
|
||||
@@ -333,10 +330,7 @@ impl FixedSizeBinaryArray {
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns error if argument has length zero, or sizes of nested slices don't match.
|
||||
pub fn try_from_sparse_iter_with_size<T, U>(
|
||||
mut iter: T,
|
||||
size: i32,
|
||||
) -> Result<Self, ArrowError>
|
||||
pub fn try_from_sparse_iter_with_size<T, U>(mut iter: T, size: i32) -> Result<Self, ArrowError>
|
||||
where
|
||||
T: Iterator<Item = Option<U>>,
|
||||
U: AsRef<[u8]>,
|
||||
@@ -812,8 +806,7 @@ mod tests {
|
||||
let none_option: Option<[u8; 32]> = None;
|
||||
let input_arg = vec![none_option, none_option, none_option];
|
||||
#[allow(deprecated)]
|
||||
let arr =
|
||||
FixedSizeBinaryArray::try_from_sparse_iter(input_arg.into_iter()).unwrap();
|
||||
let arr = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.into_iter()).unwrap();
|
||||
assert_eq!(0, arr.value_length());
|
||||
assert_eq!(3, arr.len())
|
||||
}
|
||||
@@ -828,16 +821,12 @@ mod tests {
|
||||
Some(vec![13, 14]),
|
||||
];
|
||||
#[allow(deprecated)]
|
||||
let arr = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.iter().cloned())
|
||||
.unwrap();
|
||||
let arr = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.iter().cloned()).unwrap();
|
||||
assert_eq!(2, arr.value_length());
|
||||
assert_eq!(5, arr.len());
|
||||
|
||||
let arr = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
|
||||
input_arg.into_iter(),
|
||||
2,
|
||||
)
|
||||
.unwrap();
|
||||
let arr =
|
||||
FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 2).unwrap();
|
||||
assert_eq!(2, arr.value_length());
|
||||
assert_eq!(5, arr.len());
|
||||
}
|
||||
@@ -846,11 +835,8 @@ mod tests {
|
||||
fn test_fixed_size_binary_array_from_sparse_iter_with_size_all_none() {
|
||||
let input_arg = vec![None, None, None, None, None] as Vec<Option<Vec<u8>>>;
|
||||
|
||||
let arr = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
|
||||
input_arg.into_iter(),
|
||||
16,
|
||||
)
|
||||
.unwrap();
|
||||
let arr = FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 16)
|
||||
.unwrap();
|
||||
assert_eq!(16, arr.value_length());
|
||||
assert_eq!(5, arr.len())
|
||||
}
|
||||
@@ -917,8 +903,7 @@ mod tests {
|
||||
fn fixed_size_binary_array_all_null() {
|
||||
let data = vec![None] as Vec<Option<String>>;
|
||||
let array =
|
||||
FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.into_iter(), 0)
|
||||
.unwrap();
|
||||
FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.into_iter(), 0).unwrap();
|
||||
array
|
||||
.into_data()
|
||||
.validate_full()
|
||||
@@ -928,8 +913,7 @@ mod tests {
|
||||
#[test]
|
||||
// Test for https://github.com/apache/arrow-rs/issues/1390
|
||||
fn fixed_size_binary_array_all_null_in_batch_with_schema() {
|
||||
let schema =
|
||||
Schema::new(vec![Field::new("a", DataType::FixedSizeBinary(2), true)]);
|
||||
let schema = Schema::new(vec![Field::new("a", DataType::FixedSizeBinary(2), true)]);
|
||||
|
||||
let none_option: Option<[u8; 2]> = None;
|
||||
let item = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
|
||||
|
||||
@@ -130,12 +130,7 @@ impl FixedSizeListArray {
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if [`Self::try_new`] returns an error
|
||||
pub fn new(
|
||||
field: FieldRef,
|
||||
size: i32,
|
||||
values: ArrayRef,
|
||||
nulls: Option<NullBuffer>,
|
||||
) -> Self {
|
||||
pub fn new(field: FieldRef, size: i32, values: ArrayRef, nulls: Option<NullBuffer>) -> Self {
|
||||
Self::try_new(field, size, values, nulls).unwrap()
|
||||
}
|
||||
|
||||
@@ -154,10 +149,7 @@ impl FixedSizeListArray {
|
||||
nulls: Option<NullBuffer>,
|
||||
) -> Result<Self, ArrowError> {
|
||||
let s = size.to_usize().ok_or_else(|| {
|
||||
ArrowError::InvalidArgumentError(format!(
|
||||
"Size cannot be negative, got {}",
|
||||
size
|
||||
))
|
||||
ArrowError::InvalidArgumentError(format!("Size cannot be negative, got {}", size))
|
||||
})?;
|
||||
|
||||
let len = values.len() / s.max(1);
|
||||
@@ -350,9 +342,8 @@ impl From<ArrayData> for FixedSizeListArray {
|
||||
};
|
||||
|
||||
let size = value_length as usize;
|
||||
let values = make_array(
|
||||
data.child_data()[0].slice(data.offset() * size, data.len() * size),
|
||||
);
|
||||
let values =
|
||||
make_array(data.child_data()[0].slice(data.offset() * size, data.len() * size));
|
||||
Self {
|
||||
data_type: data.data_type().clone(),
|
||||
values,
|
||||
@@ -483,10 +474,8 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
// Construct a list array from the above two
|
||||
let list_data_type = DataType::FixedSizeList(
|
||||
Arc::new(Field::new("item", DataType::Int32, false)),
|
||||
3,
|
||||
);
|
||||
let list_data_type =
|
||||
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 3);
|
||||
let list_data = ArrayData::builder(list_data_type.clone())
|
||||
.len(3)
|
||||
.add_child_data(value_data.clone())
|
||||
@@ -538,10 +527,8 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
// Construct a list array from the above two
|
||||
let list_data_type = DataType::FixedSizeList(
|
||||
Arc::new(Field::new("item", DataType::Int32, false)),
|
||||
3,
|
||||
);
|
||||
let list_data_type =
|
||||
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 3);
|
||||
let list_data = unsafe {
|
||||
ArrayData::builder(list_data_type)
|
||||
.len(3)
|
||||
@@ -569,10 +556,8 @@ mod tests {
|
||||
bit_util::set_bit(&mut null_bits, 4);
|
||||
|
||||
// Construct a fixed size list array from the above two
|
||||
let list_data_type = DataType::FixedSizeList(
|
||||
Arc::new(Field::new("item", DataType::Int32, false)),
|
||||
2,
|
||||
);
|
||||
let list_data_type =
|
||||
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 2);
|
||||
let list_data = ArrayData::builder(list_data_type)
|
||||
.len(5)
|
||||
.add_child_data(value_data.clone())
|
||||
@@ -611,9 +596,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(
|
||||
expected = "the offset of the new Buffer cannot exceed the existing length"
|
||||
)]
|
||||
#[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
|
||||
fn test_fixed_size_list_array_index_out_of_bound() {
|
||||
// Construct a value array
|
||||
let value_data = ArrayData::builder(DataType::Int32)
|
||||
@@ -631,10 +614,8 @@ mod tests {
|
||||
bit_util::set_bit(&mut null_bits, 4);
|
||||
|
||||
// Construct a fixed size list array from the above two
|
||||
let list_data_type = DataType::FixedSizeList(
|
||||
Arc::new(Field::new("item", DataType::Int32, false)),
|
||||
2,
|
||||
);
|
||||
let list_data_type =
|
||||
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 2);
|
||||
let list_data = ArrayData::builder(list_data_type)
|
||||
.len(5)
|
||||
.add_child_data(value_data)
|
||||
@@ -668,8 +649,7 @@ mod tests {
|
||||
let list = FixedSizeListArray::new(field.clone(), 4, values.clone(), None);
|
||||
assert_eq!(list.len(), 1);
|
||||
|
||||
let err = FixedSizeListArray::try_new(field.clone(), -1, values.clone(), None)
|
||||
.unwrap_err();
|
||||
let err = FixedSizeListArray::try_new(field.clone(), -1, values.clone(), None).unwrap_err();
|
||||
assert_eq!(
|
||||
err.to_string(),
|
||||
"Invalid argument error: Size cannot be negative, got -1"
|
||||
@@ -679,13 +659,11 @@ mod tests {
|
||||
assert_eq!(list.len(), 6);
|
||||
|
||||
let nulls = NullBuffer::new_null(2);
|
||||
let err = FixedSizeListArray::try_new(field, 2, values.clone(), Some(nulls))
|
||||
.unwrap_err();
|
||||
let err = FixedSizeListArray::try_new(field, 2, values.clone(), Some(nulls)).unwrap_err();
|
||||
assert_eq!(err.to_string(), "Invalid argument error: Incorrect length of null buffer for FixedSizeListArray, expected 3 got 2");
|
||||
|
||||
let field = Arc::new(Field::new("item", DataType::Int32, false));
|
||||
let err = FixedSizeListArray::try_new(field.clone(), 2, values.clone(), None)
|
||||
.unwrap_err();
|
||||
let err = FixedSizeListArray::try_new(field.clone(), 2, values.clone(), None).unwrap_err();
|
||||
assert_eq!(err.to_string(), "Invalid argument error: Found unmasked nulls for non-nullable FixedSizeListArray field \"item\"");
|
||||
|
||||
// Valid as nulls in child masked by parent
|
||||
|
||||
@@ -372,9 +372,8 @@ impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
|
||||
|
||||
impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListArray<OffsetSize> {
|
||||
fn from(data: ArrayData) -> Self {
|
||||
Self::try_new_from_array_data(data).expect(
|
||||
"Expected infallible creation of GenericListArray from ArrayDataRef failed",
|
||||
)
|
||||
Self::try_new_from_array_data(data)
|
||||
.expect("Expected infallible creation of GenericListArray from ArrayDataRef failed")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -391,17 +390,14 @@ impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>> for ArrayDa
|
||||
}
|
||||
}
|
||||
|
||||
impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray>
|
||||
for GenericListArray<OffsetSize>
|
||||
{
|
||||
impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListArray<OffsetSize> {
|
||||
fn from(value: FixedSizeListArray) -> Self {
|
||||
let (field, size) = match value.data_type() {
|
||||
DataType::FixedSizeList(f, size) => (f, *size as usize),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let offsets =
|
||||
OffsetBuffer::from_lengths(std::iter::repeat(size).take(value.len()));
|
||||
let offsets = OffsetBuffer::from_lengths(std::iter::repeat(size).take(value.len()));
|
||||
|
||||
Self {
|
||||
data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
|
||||
@@ -415,9 +411,10 @@ impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray>
|
||||
impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
|
||||
fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
|
||||
if data.buffers().len() != 1 {
|
||||
return Err(ArrowError::InvalidArgumentError(
|
||||
format!("ListArray data should contain a single buffer only (value offsets), had {}",
|
||||
data.buffers().len())));
|
||||
return Err(ArrowError::InvalidArgumentError(format!(
|
||||
"ListArray data should contain a single buffer only (value offsets), had {}",
|
||||
data.buffers().len()
|
||||
)));
|
||||
}
|
||||
|
||||
if data.child_data().len() != 1 {
|
||||
@@ -593,8 +590,7 @@ mod tests {
|
||||
let value_offsets = Buffer::from([]);
|
||||
|
||||
// Construct a list array from the above two
|
||||
let list_data_type =
|
||||
DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
|
||||
let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
|
||||
let list_data = ArrayData::builder(list_data_type)
|
||||
.len(0)
|
||||
.add_buffer(value_offsets)
|
||||
@@ -620,8 +616,7 @@ mod tests {
|
||||
let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
|
||||
|
||||
// Construct a list array from the above two
|
||||
let list_data_type =
|
||||
DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
|
||||
let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
|
||||
let list_data = ArrayData::builder(list_data_type.clone())
|
||||
.len(3)
|
||||
.add_buffer(value_offsets.clone())
|
||||
@@ -807,8 +802,7 @@ mod tests {
|
||||
bit_util::set_bit(&mut null_bits, 8);
|
||||
|
||||
// Construct a list array from the above two
|
||||
let list_data_type =
|
||||
DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
|
||||
let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
|
||||
let list_data = ArrayData::builder(list_data_type)
|
||||
.len(9)
|
||||
.add_buffer(value_offsets)
|
||||
@@ -839,8 +833,7 @@ mod tests {
|
||||
}
|
||||
|
||||
// Check offset and length for each non-null value.
|
||||
let sliced_list_array =
|
||||
sliced_array.as_any().downcast_ref::<ListArray>().unwrap();
|
||||
let sliced_list_array = sliced_array.as_any().downcast_ref::<ListArray>().unwrap();
|
||||
assert_eq!(2, sliced_list_array.value_offsets()[2]);
|
||||
assert_eq!(2, sliced_list_array.value_length(2));
|
||||
assert_eq!(4, sliced_list_array.value_offsets()[3]);
|
||||
@@ -951,9 +944,7 @@ mod tests {
|
||||
list_array.value(10);
|
||||
}
|
||||
#[test]
|
||||
#[should_panic(
|
||||
expected = "ListArray data should contain a single buffer only (value offsets)"
|
||||
)]
|
||||
#[should_panic(expected = "ListArray data should contain a single buffer only (value offsets)")]
|
||||
// Different error messages, so skip for now
|
||||
// https://github.com/apache/arrow-rs/issues/1545
|
||||
#[cfg(not(feature = "force_validate"))]
|
||||
@@ -964,8 +955,7 @@ mod tests {
|
||||
.add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
|
||||
.build_unchecked()
|
||||
};
|
||||
let list_data_type =
|
||||
DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
|
||||
let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
|
||||
let list_data = unsafe {
|
||||
ArrayData::builder(list_data_type)
|
||||
.len(3)
|
||||
@@ -976,16 +966,13 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(
|
||||
expected = "ListArray should contain a single child array (values array)"
|
||||
)]
|
||||
#[should_panic(expected = "ListArray should contain a single child array (values array)")]
|
||||
// Different error messages, so skip for now
|
||||
// https://github.com/apache/arrow-rs/issues/1545
|
||||
#[cfg(not(feature = "force_validate"))]
|
||||
fn test_list_array_invalid_child_array_len() {
|
||||
let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
|
||||
let list_data_type =
|
||||
DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
|
||||
let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
|
||||
let list_data = unsafe {
|
||||
ArrayData::builder(list_data_type)
|
||||
.len(3)
|
||||
@@ -996,9 +983,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(
|
||||
expected = "[Large]ListArray's datatype must be [Large]ListArray(). It is List"
|
||||
)]
|
||||
#[should_panic(expected = "[Large]ListArray's datatype must be [Large]ListArray(). It is List")]
|
||||
fn test_from_array_data_validation() {
|
||||
let mut builder = ListBuilder::new(Int32Builder::new());
|
||||
builder.values().append_value(1);
|
||||
@@ -1017,8 +1002,7 @@ mod tests {
|
||||
|
||||
let value_offsets = Buffer::from_slice_ref([2, 2, 5, 7]);
|
||||
|
||||
let list_data_type =
|
||||
DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
|
||||
let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
|
||||
let list_data = ArrayData::builder(list_data_type)
|
||||
.len(3)
|
||||
.add_buffer(value_offsets)
|
||||
@@ -1033,9 +1017,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(
|
||||
expected = "Memory pointer is not aligned with the specified scalar type"
|
||||
)]
|
||||
#[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
|
||||
// Different error messages, so skip for now
|
||||
// https://github.com/apache/arrow-rs/issues/1545
|
||||
#[cfg(not(feature = "force_validate"))]
|
||||
@@ -1051,9 +1033,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(
|
||||
expected = "Memory pointer is not aligned with the specified scalar type"
|
||||
)]
|
||||
#[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
|
||||
// Different error messages, so skip for now
|
||||
// https://github.com/apache/arrow-rs/issues/1545
|
||||
#[cfg(not(feature = "force_validate"))]
|
||||
@@ -1068,8 +1048,7 @@ mod tests {
|
||||
.build_unchecked()
|
||||
};
|
||||
|
||||
let list_data_type =
|
||||
DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
|
||||
let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
|
||||
let list_data = unsafe {
|
||||
ArrayData::builder(list_data_type)
|
||||
.add_buffer(buf2)
|
||||
@@ -1187,9 +1166,8 @@ mod tests {
|
||||
|
||||
let nulls = NullBuffer::new_null(3);
|
||||
let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
|
||||
let err =
|
||||
LargeListArray::try_new(field, offsets.clone(), values.clone(), Some(nulls))
|
||||
.unwrap_err();
|
||||
let err = LargeListArray::try_new(field, offsets.clone(), values.clone(), Some(nulls))
|
||||
.unwrap_err();
|
||||
|
||||
assert_eq!(
|
||||
err.to_string(),
|
||||
@@ -1197,9 +1175,8 @@ mod tests {
|
||||
);
|
||||
|
||||
let field = Arc::new(Field::new("element", DataType::Int64, false));
|
||||
let err =
|
||||
LargeListArray::try_new(field.clone(), offsets.clone(), values.clone(), None)
|
||||
.unwrap_err();
|
||||
let err = LargeListArray::try_new(field.clone(), offsets.clone(), values.clone(), None)
|
||||
.unwrap_err();
|
||||
|
||||
assert_eq!(
|
||||
err.to_string(),
|
||||
@@ -1210,8 +1187,8 @@ mod tests {
|
||||
let values = Int64Array::new(vec![0; 7].into(), Some(nulls));
|
||||
let values = Arc::new(values);
|
||||
|
||||
let err = LargeListArray::try_new(field, offsets.clone(), values.clone(), None)
|
||||
.unwrap_err();
|
||||
let err =
|
||||
LargeListArray::try_new(field, offsets.clone(), values.clone(), None).unwrap_err();
|
||||
|
||||
assert_eq!(
|
||||
err.to_string(),
|
||||
@@ -1222,8 +1199,7 @@ mod tests {
|
||||
LargeListArray::new(field.clone(), offsets.clone(), values, None);
|
||||
|
||||
let values = Int64Array::new(vec![0; 2].into(), None);
|
||||
let err =
|
||||
LargeListArray::try_new(field, offsets, Arc::new(values), None).unwrap_err();
|
||||
let err = LargeListArray::try_new(field, offsets, Arc::new(values), None).unwrap_err();
|
||||
|
||||
assert_eq!(
|
||||
err.to_string(),
|
||||
|
||||
@@ -17,9 +17,7 @@
|
||||
|
||||
use crate::array::{get_offsets, print_long_array};
|
||||
use crate::iterator::MapArrayIter;
|
||||
use crate::{
|
||||
make_array, Array, ArrayAccessor, ArrayRef, ListArray, StringArray, StructArray,
|
||||
};
|
||||
use crate::{make_array, Array, ArrayAccessor, ArrayRef, ListArray, StringArray, StructArray};
|
||||
use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, OffsetBuffer, ToByteSlice};
|
||||
use arrow_data::{ArrayData, ArrayDataBuilder};
|
||||
use arrow_schema::{ArrowError, DataType, Field, FieldRef};
|
||||
@@ -264,9 +262,10 @@ impl MapArray {
|
||||
}
|
||||
|
||||
if data.buffers().len() != 1 {
|
||||
return Err(ArrowError::InvalidArgumentError(
|
||||
format!("MapArray data should contain a single buffer only (value offsets), had {}",
|
||||
data.len())));
|
||||
return Err(ArrowError::InvalidArgumentError(format!(
|
||||
"MapArray data should contain a single buffer only (value offsets), had {}",
|
||||
data.len()
|
||||
)));
|
||||
}
|
||||
|
||||
if data.child_data().len() != 1 {
|
||||
@@ -281,9 +280,9 @@ impl MapArray {
|
||||
if let DataType::Struct(fields) = entries.data_type() {
|
||||
if fields.len() != 2 {
|
||||
return Err(ArrowError::InvalidArgumentError(format!(
|
||||
"MapArray should contain a struct array with 2 fields, have {} fields",
|
||||
fields.len()
|
||||
)));
|
||||
"MapArray should contain a struct array with 2 fields, have {} fields",
|
||||
fields.len()
|
||||
)));
|
||||
}
|
||||
} else {
|
||||
return Err(ArrowError::InvalidArgumentError(format!(
|
||||
@@ -576,8 +575,7 @@ mod tests {
|
||||
assert_eq!(2, map_array.value_length(1));
|
||||
|
||||
let key_array = Arc::new(Int32Array::from(vec![3, 4, 5])) as ArrayRef;
|
||||
let value_array =
|
||||
Arc::new(UInt32Array::from(vec![None, Some(40), None])) as ArrayRef;
|
||||
let value_array = Arc::new(UInt32Array::from(vec![None, Some(40), None])) as ArrayRef;
|
||||
let struct_array =
|
||||
StructArray::from(vec![(keys_field, key_array), (values_field, value_array)]);
|
||||
assert_eq!(
|
||||
@@ -669,9 +667,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(
|
||||
expected = "MapArray expected ArrayData with DataType::Map got Dictionary"
|
||||
)]
|
||||
#[should_panic(expected = "MapArray expected ArrayData with DataType::Map got Dictionary")]
|
||||
fn test_from_array_data_validation() {
|
||||
// A DictionaryArray has similar buffer layout to a MapArray
|
||||
// but the meaning of the values differs
|
||||
@@ -692,12 +688,9 @@ mod tests {
|
||||
// [[a, b, c], [d, e, f], [g, h]]
|
||||
let entry_offsets = [0, 3, 6, 8];
|
||||
|
||||
let map_array = MapArray::new_from_strings(
|
||||
keys.clone().into_iter(),
|
||||
&values_data,
|
||||
&entry_offsets,
|
||||
)
|
||||
.unwrap();
|
||||
let map_array =
|
||||
MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
&values_data,
|
||||
@@ -768,9 +761,8 @@ mod tests {
|
||||
"Invalid argument error: Incorrect length of null buffer for MapArray, expected 4 got 3"
|
||||
);
|
||||
|
||||
let err =
|
||||
MapArray::try_new(field, offsets.clone(), entries.slice(0, 2), None, false)
|
||||
.unwrap_err();
|
||||
let err = MapArray::try_new(field, offsets.clone(), entries.slice(0, 2), None, false)
|
||||
.unwrap_err();
|
||||
|
||||
assert_eq!(
|
||||
err.to_string(),
|
||||
@@ -783,9 +775,7 @@ mod tests {
|
||||
.to_string();
|
||||
|
||||
assert!(
|
||||
err.starts_with(
|
||||
"Invalid argument error: MapArray expected data type Int64 got Struct"
|
||||
),
|
||||
err.starts_with("Invalid argument error: MapArray expected data type Int64 got Struct"),
|
||||
"{err}"
|
||||
);
|
||||
|
||||
|
||||
@@ -536,9 +536,7 @@ pub fn make_array(data: ArrayData) -> ArrayRef {
|
||||
DataType::Float64 => Arc::new(Float64Array::from(data)) as ArrayRef,
|
||||
DataType::Date32 => Arc::new(Date32Array::from(data)) as ArrayRef,
|
||||
DataType::Date64 => Arc::new(Date64Array::from(data)) as ArrayRef,
|
||||
DataType::Time32(TimeUnit::Second) => {
|
||||
Arc::new(Time32SecondArray::from(data)) as ArrayRef
|
||||
}
|
||||
DataType::Time32(TimeUnit::Second) => Arc::new(Time32SecondArray::from(data)) as ArrayRef,
|
||||
DataType::Time32(TimeUnit::Millisecond) => {
|
||||
Arc::new(Time32MillisecondArray::from(data)) as ArrayRef
|
||||
}
|
||||
@@ -583,9 +581,7 @@ pub fn make_array(data: ArrayData) -> ArrayRef {
|
||||
}
|
||||
DataType::Binary => Arc::new(BinaryArray::from(data)) as ArrayRef,
|
||||
DataType::LargeBinary => Arc::new(LargeBinaryArray::from(data)) as ArrayRef,
|
||||
DataType::FixedSizeBinary(_) => {
|
||||
Arc::new(FixedSizeBinaryArray::from(data)) as ArrayRef
|
||||
}
|
||||
DataType::FixedSizeBinary(_) => Arc::new(FixedSizeBinaryArray::from(data)) as ArrayRef,
|
||||
DataType::Utf8 => Arc::new(StringArray::from(data)) as ArrayRef,
|
||||
DataType::LargeUtf8 => Arc::new(LargeStringArray::from(data)) as ArrayRef,
|
||||
DataType::List(_) => Arc::new(ListArray::from(data)) as ArrayRef,
|
||||
@@ -593,50 +589,24 @@ pub fn make_array(data: ArrayData) -> ArrayRef {
|
||||
DataType::Struct(_) => Arc::new(StructArray::from(data)) as ArrayRef,
|
||||
DataType::Map(_, _) => Arc::new(MapArray::from(data)) as ArrayRef,
|
||||
DataType::Union(_, _) => Arc::new(UnionArray::from(data)) as ArrayRef,
|
||||
DataType::FixedSizeList(_, _) => {
|
||||
Arc::new(FixedSizeListArray::from(data)) as ArrayRef
|
||||
}
|
||||
DataType::FixedSizeList(_, _) => Arc::new(FixedSizeListArray::from(data)) as ArrayRef,
|
||||
DataType::Dictionary(ref key_type, _) => match key_type.as_ref() {
|
||||
DataType::Int8 => {
|
||||
Arc::new(DictionaryArray::<Int8Type>::from(data)) as ArrayRef
|
||||
}
|
||||
DataType::Int16 => {
|
||||
Arc::new(DictionaryArray::<Int16Type>::from(data)) as ArrayRef
|
||||
}
|
||||
DataType::Int32 => {
|
||||
Arc::new(DictionaryArray::<Int32Type>::from(data)) as ArrayRef
|
||||
}
|
||||
DataType::Int64 => {
|
||||
Arc::new(DictionaryArray::<Int64Type>::from(data)) as ArrayRef
|
||||
}
|
||||
DataType::UInt8 => {
|
||||
Arc::new(DictionaryArray::<UInt8Type>::from(data)) as ArrayRef
|
||||
}
|
||||
DataType::UInt16 => {
|
||||
Arc::new(DictionaryArray::<UInt16Type>::from(data)) as ArrayRef
|
||||
}
|
||||
DataType::UInt32 => {
|
||||
Arc::new(DictionaryArray::<UInt32Type>::from(data)) as ArrayRef
|
||||
}
|
||||
DataType::UInt64 => {
|
||||
Arc::new(DictionaryArray::<UInt64Type>::from(data)) as ArrayRef
|
||||
}
|
||||
DataType::Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)) as ArrayRef,
|
||||
DataType::Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)) as ArrayRef,
|
||||
DataType::Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)) as ArrayRef,
|
||||
DataType::Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)) as ArrayRef,
|
||||
DataType::UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)) as ArrayRef,
|
||||
DataType::UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)) as ArrayRef,
|
||||
DataType::UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)) as ArrayRef,
|
||||
DataType::UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)) as ArrayRef,
|
||||
dt => panic!("Unexpected dictionary key type {dt:?}"),
|
||||
},
|
||||
DataType::RunEndEncoded(ref run_ends_type, _) => {
|
||||
match run_ends_type.data_type() {
|
||||
DataType::Int16 => {
|
||||
Arc::new(RunArray::<Int16Type>::from(data)) as ArrayRef
|
||||
}
|
||||
DataType::Int32 => {
|
||||
Arc::new(RunArray::<Int32Type>::from(data)) as ArrayRef
|
||||
}
|
||||
DataType::Int64 => {
|
||||
Arc::new(RunArray::<Int64Type>::from(data)) as ArrayRef
|
||||
}
|
||||
dt => panic!("Unexpected data type for run_ends array {dt:?}"),
|
||||
}
|
||||
}
|
||||
DataType::RunEndEncoded(ref run_ends_type, _) => match run_ends_type.data_type() {
|
||||
DataType::Int16 => Arc::new(RunArray::<Int16Type>::from(data)) as ArrayRef,
|
||||
DataType::Int32 => Arc::new(RunArray::<Int32Type>::from(data)) as ArrayRef,
|
||||
DataType::Int64 => Arc::new(RunArray::<Int64Type>::from(data)) as ArrayRef,
|
||||
dt => panic!("Unexpected data type for run_ends array {dt:?}"),
|
||||
},
|
||||
DataType::Null => Arc::new(NullArray::from(data)) as ArrayRef,
|
||||
DataType::Decimal128(_, _) => Arc::new(Decimal128Array::from(data)) as ArrayRef,
|
||||
DataType::Decimal256(_, _) => Arc::new(Decimal256Array::from(data)) as ArrayRef,
|
||||
@@ -687,11 +657,8 @@ unsafe fn get_offsets<O: ArrowNativeType>(data: &ArrayData) -> OffsetBuffer<O> {
|
||||
match data.is_empty() && data.buffers()[0].is_empty() {
|
||||
true => OffsetBuffer::new_empty(),
|
||||
false => {
|
||||
let buffer = ScalarBuffer::new(
|
||||
data.buffers()[0].clone(),
|
||||
data.offset(),
|
||||
data.len() + 1,
|
||||
);
|
||||
let buffer =
|
||||
ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len() + 1);
|
||||
// Safety:
|
||||
// ArrayData is valid
|
||||
unsafe { OffsetBuffer::new_unchecked(buffer) }
|
||||
@@ -700,11 +667,7 @@ unsafe fn get_offsets<O: ArrowNativeType>(data: &ArrayData) -> OffsetBuffer<O> {
|
||||
}
|
||||
|
||||
/// Helper function for printing potentially long arrays.
|
||||
fn print_long_array<A, F>(
|
||||
array: &A,
|
||||
f: &mut std::fmt::Formatter,
|
||||
print_item: F,
|
||||
) -> std::fmt::Result
|
||||
fn print_long_array<A, F>(array: &A, f: &mut std::fmt::Formatter, print_item: F) -> std::fmt::Result
|
||||
where
|
||||
A: Array,
|
||||
F: Fn(&A, usize, &mut std::fmt::Formatter) -> std::fmt::Result,
|
||||
@@ -767,8 +730,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_empty_list_primitive() {
|
||||
let data_type =
|
||||
DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
|
||||
let data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
|
||||
let array = new_empty_array(&data_type);
|
||||
let a = array.as_any().downcast_ref::<ListArray>().unwrap();
|
||||
assert_eq!(a.len(), 0);
|
||||
@@ -799,8 +761,7 @@ mod tests {
|
||||
fn test_null_struct() {
|
||||
// It is possible to create a null struct containing a non-nullable child
|
||||
// see https://github.com/apache/arrow-rs/pull/3244 for details
|
||||
let struct_type =
|
||||
DataType::Struct(vec![Field::new("data", DataType::Int64, false)].into());
|
||||
let struct_type = DataType::Struct(vec![Field::new("data", DataType::Int64, false)].into());
|
||||
let array = new_null_array(&struct_type, 9);
|
||||
|
||||
let a = array.as_any().downcast_ref::<StructArray>().unwrap();
|
||||
@@ -827,8 +788,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_null_list_primitive() {
|
||||
let data_type =
|
||||
DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
|
||||
let data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
|
||||
let array = new_null_array(&data_type, 9);
|
||||
let a = array.as_any().downcast_ref::<ListArray>().unwrap();
|
||||
assert_eq!(a.len(), 9);
|
||||
@@ -862,8 +822,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_null_dictionary() {
|
||||
let values = vec![None, None, None, None, None, None, None, None, None]
|
||||
as Vec<Option<&str>>;
|
||||
let values =
|
||||
vec![None, None, None, None, None, None, None, None, None] as Vec<Option<&str>>;
|
||||
|
||||
let array: DictionaryArray<Int8Type> = values.into_iter().collect();
|
||||
let array = Arc::new(array) as ArrayRef;
|
||||
@@ -965,8 +925,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_memory_size_primitive() {
|
||||
let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
|
||||
let empty =
|
||||
PrimitiveArray::<Int64Type>::from(ArrayData::new_empty(arr.data_type()));
|
||||
let empty = PrimitiveArray::<Int64Type>::from(ArrayData::new_empty(arr.data_type()));
|
||||
|
||||
// subtract empty array to avoid magic numbers for the size of additional fields
|
||||
assert_eq!(
|
||||
|
||||
@@ -917,8 +917,8 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
|
||||
let null_bit_buffer = data.nulls().map(|b| b.inner().sliced());
|
||||
|
||||
let element_len = std::mem::size_of::<T::Native>();
|
||||
let buffer = data.buffers()[0]
|
||||
.slice_with_length(data.offset() * element_len, len * element_len);
|
||||
let buffer =
|
||||
data.buffers()[0].slice_with_length(data.offset() * element_len, len * element_len);
|
||||
|
||||
drop(data);
|
||||
|
||||
@@ -1116,10 +1116,9 @@ impl<T: ArrowPrimitiveType> std::fmt::Debug for PrimitiveArray<T> {
|
||||
},
|
||||
// if the time zone is invalid, shows NaiveDateTime with an error message
|
||||
Err(_) => match as_datetime::<T>(v) {
|
||||
Some(datetime) => write!(
|
||||
f,
|
||||
"{datetime:?} (Unknown Time Zone '{tz_string}')"
|
||||
),
|
||||
Some(datetime) => {
|
||||
write!(f, "{datetime:?} (Unknown Time Zone '{tz_string}')")
|
||||
}
|
||||
None => write!(f, "null"),
|
||||
},
|
||||
}
|
||||
@@ -1191,25 +1190,19 @@ def_from_for_primitive!(Float64Type, f64);
|
||||
def_from_for_primitive!(Decimal128Type, i128);
|
||||
def_from_for_primitive!(Decimal256Type, i256);
|
||||
|
||||
impl<T: ArrowPrimitiveType> From<Option<<T as ArrowPrimitiveType>::Native>>
|
||||
for NativeAdapter<T>
|
||||
{
|
||||
impl<T: ArrowPrimitiveType> From<Option<<T as ArrowPrimitiveType>::Native>> for NativeAdapter<T> {
|
||||
fn from(value: Option<<T as ArrowPrimitiveType>::Native>) -> Self {
|
||||
NativeAdapter { native: value }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: ArrowPrimitiveType> From<&Option<<T as ArrowPrimitiveType>::Native>>
|
||||
for NativeAdapter<T>
|
||||
{
|
||||
impl<T: ArrowPrimitiveType> From<&Option<<T as ArrowPrimitiveType>::Native>> for NativeAdapter<T> {
|
||||
fn from(value: &Option<<T as ArrowPrimitiveType>::Native>) -> Self {
|
||||
NativeAdapter { native: *value }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: ArrowPrimitiveType, Ptr: Into<NativeAdapter<T>>> FromIterator<Ptr>
|
||||
for PrimitiveArray<T>
|
||||
{
|
||||
impl<T: ArrowPrimitiveType, Ptr: Into<NativeAdapter<T>>> FromIterator<Ptr> for PrimitiveArray<T> {
|
||||
fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
|
||||
let iter = iter.into_iter();
|
||||
let (lower, _) = iter.size_hint();
|
||||
@@ -1265,15 +1258,8 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
|
||||
|
||||
let (null, buffer) = trusted_len_unzip(iterator);
|
||||
|
||||
let data = ArrayData::new_unchecked(
|
||||
T::DATA_TYPE,
|
||||
len,
|
||||
None,
|
||||
Some(null),
|
||||
0,
|
||||
vec![buffer],
|
||||
vec![],
|
||||
);
|
||||
let data =
|
||||
ArrayData::new_unchecked(T::DATA_TYPE, len, None, Some(null), 0, vec![buffer], vec![]);
|
||||
PrimitiveArray::from(data)
|
||||
}
|
||||
}
|
||||
@@ -1294,9 +1280,7 @@ macro_rules! def_numeric_from_vec {
|
||||
}
|
||||
|
||||
// Constructs a primitive array from a vector. Should only be used for testing.
|
||||
impl From<Vec<Option<<$ty as ArrowPrimitiveType>::Native>>>
|
||||
for PrimitiveArray<$ty>
|
||||
{
|
||||
impl From<Vec<Option<<$ty as ArrowPrimitiveType>::Native>>> for PrimitiveArray<$ty> {
|
||||
fn from(data: Vec<Option<<$ty as ArrowPrimitiveType>::Native>>) -> Self {
|
||||
PrimitiveArray::from_iter(data.iter())
|
||||
}
|
||||
@@ -1392,8 +1376,7 @@ impl<T: ArrowPrimitiveType> From<ArrayData> for PrimitiveArray<T> {
|
||||
"PrimitiveArray data should contain a single buffer only (values buffer)"
|
||||
);
|
||||
|
||||
let values =
|
||||
ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
|
||||
let values = ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
|
||||
Self {
|
||||
data_type: data.data_type().clone(),
|
||||
values,
|
||||
@@ -1407,11 +1390,7 @@ impl<T: DecimalType + ArrowPrimitiveType> PrimitiveArray<T> {
|
||||
/// specified precision and scale.
|
||||
///
|
||||
/// See [`validate_decimal_precision_and_scale`]
|
||||
pub fn with_precision_and_scale(
|
||||
self,
|
||||
precision: u8,
|
||||
scale: i8,
|
||||
) -> Result<Self, ArrowError> {
|
||||
pub fn with_precision_and_scale(self, precision: u8, scale: i8) -> Result<Self, ArrowError> {
|
||||
validate_decimal_precision_and_scale::<T>(precision, scale)?;
|
||||
Ok(Self {
|
||||
data_type: T::TYPE_CONSTRUCTOR(precision, scale),
|
||||
@@ -1575,8 +1554,7 @@ mod tests {
|
||||
// 1: 00:00:00.001
|
||||
// 37800005: 10:30:00.005
|
||||
// 86399210: 23:59:59.210
|
||||
let arr: PrimitiveArray<Time32MillisecondType> =
|
||||
vec![1, 37_800_005, 86_399_210].into();
|
||||
let arr: PrimitiveArray<Time32MillisecondType> = vec![1, 37_800_005, 86_399_210].into();
|
||||
assert_eq!(3, arr.len());
|
||||
assert_eq!(0, arr.offset());
|
||||
assert_eq!(0, arr.null_count());
|
||||
@@ -1858,11 +1836,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_timestamp_fmt_debug() {
|
||||
let arr: PrimitiveArray<TimestampMillisecondType> =
|
||||
TimestampMillisecondArray::from(vec![
|
||||
1546214400000,
|
||||
1546214400000,
|
||||
-1546214400000,
|
||||
]);
|
||||
TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000]);
|
||||
assert_eq!(
|
||||
"PrimitiveArray<Timestamp(Millisecond, None)>\n[\n 2018-12-31T00:00:00,\n 2018-12-31T00:00:00,\n 1921-01-02T00:00:00,\n]",
|
||||
format!("{arr:?}")
|
||||
@@ -1872,12 +1846,8 @@ mod tests {
|
||||
#[test]
|
||||
fn test_timestamp_utc_fmt_debug() {
|
||||
let arr: PrimitiveArray<TimestampMillisecondType> =
|
||||
TimestampMillisecondArray::from(vec![
|
||||
1546214400000,
|
||||
1546214400000,
|
||||
-1546214400000,
|
||||
])
|
||||
.with_timezone_utc();
|
||||
TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000])
|
||||
.with_timezone_utc();
|
||||
assert_eq!(
|
||||
"PrimitiveArray<Timestamp(Millisecond, Some(\"+00:00\"))>\n[\n 2018-12-31T00:00:00+00:00,\n 2018-12-31T00:00:00+00:00,\n 1921-01-02T00:00:00+00:00,\n]",
|
||||
format!("{arr:?}")
|
||||
@@ -1888,12 +1858,8 @@ mod tests {
|
||||
#[cfg(feature = "chrono-tz")]
|
||||
fn test_timestamp_with_named_tz_fmt_debug() {
|
||||
let arr: PrimitiveArray<TimestampMillisecondType> =
|
||||
TimestampMillisecondArray::from(vec![
|
||||
1546214400000,
|
||||
1546214400000,
|
||||
-1546214400000,
|
||||
])
|
||||
.with_timezone("Asia/Taipei".to_string());
|
||||
TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000])
|
||||
.with_timezone("Asia/Taipei".to_string());
|
||||
assert_eq!(
|
||||
"PrimitiveArray<Timestamp(Millisecond, Some(\"Asia/Taipei\"))>\n[\n 2018-12-31T08:00:00+08:00,\n 2018-12-31T08:00:00+08:00,\n 1921-01-02T08:00:00+08:00,\n]",
|
||||
format!("{:?}", arr)
|
||||
@@ -1904,12 +1870,8 @@ mod tests {
|
||||
#[cfg(not(feature = "chrono-tz"))]
|
||||
fn test_timestamp_with_named_tz_fmt_debug() {
|
||||
let arr: PrimitiveArray<TimestampMillisecondType> =
|
||||
TimestampMillisecondArray::from(vec![
|
||||
1546214400000,
|
||||
1546214400000,
|
||||
-1546214400000,
|
||||
])
|
||||
.with_timezone("Asia/Taipei".to_string());
|
||||
TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000])
|
||||
.with_timezone("Asia/Taipei".to_string());
|
||||
|
||||
println!("{arr:?}");
|
||||
|
||||
@@ -1922,12 +1884,8 @@ mod tests {
|
||||
#[test]
|
||||
fn test_timestamp_with_fixed_offset_tz_fmt_debug() {
|
||||
let arr: PrimitiveArray<TimestampMillisecondType> =
|
||||
TimestampMillisecondArray::from(vec![
|
||||
1546214400000,
|
||||
1546214400000,
|
||||
-1546214400000,
|
||||
])
|
||||
.with_timezone("+08:00".to_string());
|
||||
TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000])
|
||||
.with_timezone("+08:00".to_string());
|
||||
assert_eq!(
|
||||
"PrimitiveArray<Timestamp(Millisecond, Some(\"+08:00\"))>\n[\n 2018-12-31T08:00:00+08:00,\n 2018-12-31T08:00:00+08:00,\n 1921-01-02T08:00:00+08:00,\n]",
|
||||
format!("{arr:?}")
|
||||
@@ -1937,12 +1895,8 @@ mod tests {
|
||||
#[test]
|
||||
fn test_timestamp_with_incorrect_tz_fmt_debug() {
|
||||
let arr: PrimitiveArray<TimestampMillisecondType> =
|
||||
TimestampMillisecondArray::from(vec![
|
||||
1546214400000,
|
||||
1546214400000,
|
||||
-1546214400000,
|
||||
])
|
||||
.with_timezone("xxx".to_string());
|
||||
TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000])
|
||||
.with_timezone("xxx".to_string());
|
||||
assert_eq!(
|
||||
"PrimitiveArray<Timestamp(Millisecond, Some(\"xxx\"))>\n[\n 2018-12-31T00:00:00 (Unknown Time Zone 'xxx'),\n 2018-12-31T00:00:00 (Unknown Time Zone 'xxx'),\n 1921-01-02T00:00:00 (Unknown Time Zone 'xxx'),\n]",
|
||||
format!("{arr:?}")
|
||||
@@ -1952,14 +1906,13 @@ mod tests {
|
||||
#[test]
|
||||
#[cfg(feature = "chrono-tz")]
|
||||
fn test_timestamp_with_tz_with_daylight_saving_fmt_debug() {
|
||||
let arr: PrimitiveArray<TimestampMillisecondType> =
|
||||
TimestampMillisecondArray::from(vec![
|
||||
1647161999000,
|
||||
1647162000000,
|
||||
1667717999000,
|
||||
1667718000000,
|
||||
])
|
||||
.with_timezone("America/Denver".to_string());
|
||||
let arr: PrimitiveArray<TimestampMillisecondType> = TimestampMillisecondArray::from(vec![
|
||||
1647161999000,
|
||||
1647162000000,
|
||||
1667717999000,
|
||||
1667718000000,
|
||||
])
|
||||
.with_timezone("America/Denver".to_string());
|
||||
assert_eq!(
|
||||
"PrimitiveArray<Timestamp(Millisecond, Some(\"America/Denver\"))>\n[\n 2022-03-13T01:59:59-07:00,\n 2022-03-13T03:00:00-06:00,\n 2022-11-06T00:59:59-06:00,\n 2022-11-06T01:00:00-06:00,\n]",
|
||||
format!("{:?}", arr)
|
||||
@@ -1997,8 +1950,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_timestamp_micros_out_of_range() {
|
||||
// replicate the issue from https://github.com/apache/arrow-datafusion/issues/3832
|
||||
let arr: PrimitiveArray<TimestampMicrosecondType> =
|
||||
vec![9065525203050843594].into();
|
||||
let arr: PrimitiveArray<TimestampMicrosecondType> = vec![9065525203050843594].into();
|
||||
assert_eq!(
|
||||
"PrimitiveArray<Timestamp(Microsecond, None)>\n[\n null,\n]",
|
||||
format!("{arr:?}")
|
||||
@@ -2143,8 +2095,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_decimal256() {
|
||||
let values: Vec<_> =
|
||||
vec![i256::ZERO, i256::ONE, i256::MINUS_ONE, i256::MIN, i256::MAX];
|
||||
let values: Vec<_> = vec![i256::ZERO, i256::ONE, i256::MINUS_ONE, i256::MIN, i256::MAX];
|
||||
|
||||
let array: PrimitiveArray<Decimal256Type> =
|
||||
PrimitiveArray::from_iter(values.iter().copied());
|
||||
@@ -2166,8 +2117,8 @@ mod tests {
|
||||
// let val_8887: [u8; 16] = [192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
|
||||
// let val_neg_8887: [u8; 16] = [64, 36, 75, 238, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255];
|
||||
let values: [u8; 32] = [
|
||||
192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 36, 75, 238, 253,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 36, 75, 238, 253, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
];
|
||||
let array_data = ArrayData::builder(DataType::Decimal128(38, 6))
|
||||
.len(2)
|
||||
@@ -2232,8 +2183,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_decimal_from_iter() {
|
||||
let array: Decimal128Array =
|
||||
vec![Some(-100), None, Some(101)].into_iter().collect();
|
||||
let array: Decimal128Array = vec![Some(-100), None, Some(101)].into_iter().collect();
|
||||
assert_eq!(array.len(), 3);
|
||||
assert_eq!(array.data_type(), &DataType::Decimal128(38, 10));
|
||||
assert_eq!(-100_i128, array.value(0));
|
||||
@@ -2343,8 +2293,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_decimal_array_set_null_if_overflow_with_precision() {
|
||||
let array =
|
||||
Decimal128Array::from(vec![Some(123456), Some(123), None, Some(123456)]);
|
||||
let array = Decimal128Array::from(vec![Some(123456), Some(123), None, Some(123456)]);
|
||||
let result = array.null_if_overflow_precision(5);
|
||||
let expected = Decimal128Array::from(vec![None, Some(123), None, None]);
|
||||
assert_eq!(result, expected);
|
||||
@@ -2361,8 +2310,7 @@ mod tests {
|
||||
let decimal2 = i256::from_i128(56789);
|
||||
builder.append_value(decimal2);
|
||||
|
||||
let array: Decimal256Array =
|
||||
builder.finish().with_precision_and_scale(76, 6).unwrap();
|
||||
let array: Decimal256Array = builder.finish().with_precision_and_scale(76, 6).unwrap();
|
||||
|
||||
let collected: Vec<_> = array.iter().collect();
|
||||
assert_eq!(vec![Some(decimal1), None, Some(decimal2)], collected);
|
||||
@@ -2387,8 +2335,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_from_iter_decimal128array() {
|
||||
let mut array: Decimal128Array =
|
||||
vec![Some(-100), None, Some(101)].into_iter().collect();
|
||||
let mut array: Decimal128Array = vec![Some(-100), None, Some(101)].into_iter().collect();
|
||||
array = array.with_precision_and_scale(38, 10).unwrap();
|
||||
assert_eq!(array.len(), 3);
|
||||
assert_eq!(array.data_type(), &DataType::Decimal128(38, 10));
|
||||
@@ -2404,13 +2351,11 @@ mod tests {
|
||||
let array = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7]);
|
||||
let r = array.unary_opt::<_, Int32Type>(|x| (x % 2 != 0).then_some(x));
|
||||
|
||||
let expected =
|
||||
Int32Array::from(vec![Some(1), None, Some(3), None, Some(5), None, Some(7)]);
|
||||
let expected = Int32Array::from(vec![Some(1), None, Some(3), None, Some(5), None, Some(7)]);
|
||||
assert_eq!(r, expected);
|
||||
|
||||
let r = expected.unary_opt::<_, Int32Type>(|x| (x % 3 != 0).then_some(x));
|
||||
let expected =
|
||||
Int32Array::from(vec![Some(1), None, None, None, Some(5), None, Some(7)]);
|
||||
let expected = Int32Array::from(vec![Some(1), None, None, None, Some(5), None, Some(7)]);
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
@@ -2513,9 +2458,8 @@ mod tests {
|
||||
Int32Array::new(vec![1, 2, 3, 4].into(), None);
|
||||
Int32Array::new(vec![1, 2, 3, 4].into(), Some(NullBuffer::new_null(4)));
|
||||
|
||||
let err =
|
||||
Int32Array::try_new(vec![1, 2, 3, 4].into(), Some(NullBuffer::new_null(3)))
|
||||
.unwrap_err();
|
||||
let err = Int32Array::try_new(vec![1, 2, 3, 4].into(), Some(NullBuffer::new_null(3)))
|
||||
.unwrap_err();
|
||||
|
||||
assert_eq!(
|
||||
err.to_string(),
|
||||
|
||||
@@ -91,10 +91,7 @@ impl<R: RunEndIndexType> RunArray<R> {
|
||||
/// Attempts to create RunArray using given run_ends (index where a run ends)
|
||||
/// and the values (value of the run). Returns an error if the given data is not compatible
|
||||
/// with RunEndEncoded specification.
|
||||
pub fn try_new(
|
||||
run_ends: &PrimitiveArray<R>,
|
||||
values: &dyn Array,
|
||||
) -> Result<Self, ArrowError> {
|
||||
pub fn try_new(run_ends: &PrimitiveArray<R>, values: &dyn Array) -> Result<Self, ArrowError> {
|
||||
let run_ends_type = run_ends.data_type().clone();
|
||||
let values_type = values.data_type().clone();
|
||||
let ree_array_type = DataType::RunEndEncoded(
|
||||
@@ -182,10 +179,7 @@ impl<R: RunEndIndexType> RunArray<R> {
|
||||
/// scaled well for larger inputs.
|
||||
/// See <https://github.com/apache/arrow-rs/pull/3622#issuecomment-1407753727> for more details.
|
||||
#[inline]
|
||||
pub fn get_physical_indices<I>(
|
||||
&self,
|
||||
logical_indices: &[I],
|
||||
) -> Result<Vec<usize>, ArrowError>
|
||||
pub fn get_physical_indices<I>(&self, logical_indices: &[I]) -> Result<Vec<usize>, ArrowError>
|
||||
where
|
||||
I: ArrowNativeType,
|
||||
{
|
||||
@@ -211,8 +205,7 @@ impl<R: RunEndIndexType> RunArray<R> {
|
||||
});
|
||||
|
||||
// Return early if all the logical indices cannot be converted to physical indices.
|
||||
let largest_logical_index =
|
||||
logical_indices[*ordered_indices.last().unwrap()].as_usize();
|
||||
let largest_logical_index = logical_indices[*ordered_indices.last().unwrap()].as_usize();
|
||||
if largest_logical_index >= len {
|
||||
return Err(ArrowError::InvalidArgumentError(format!(
|
||||
"Cannot convert all logical indices to physical indices. The logical index cannot be converted is {largest_logical_index}.",
|
||||
@@ -225,8 +218,7 @@ impl<R: RunEndIndexType> RunArray<R> {
|
||||
let mut physical_indices = vec![0; indices_len];
|
||||
|
||||
let mut ordered_index = 0_usize;
|
||||
for (physical_index, run_end) in
|
||||
self.run_ends.values().iter().enumerate().skip(skip_value)
|
||||
for (physical_index, run_end) in self.run_ends.values().iter().enumerate().skip(skip_value)
|
||||
{
|
||||
// Get the run end index (relative to offset) of current physical index
|
||||
let run_end_value = run_end.as_usize() - offset;
|
||||
@@ -234,8 +226,7 @@ impl<R: RunEndIndexType> RunArray<R> {
|
||||
// All the `logical_indices` that are less than current run end index
|
||||
// belongs to current physical index.
|
||||
while ordered_index < indices_len
|
||||
&& logical_indices[ordered_indices[ordered_index]].as_usize()
|
||||
< run_end_value
|
||||
&& logical_indices[ordered_indices[ordered_index]].as_usize() < run_end_value
|
||||
{
|
||||
physical_indices[ordered_indices[ordered_index]] = physical_index;
|
||||
ordered_index += 1;
|
||||
@@ -245,8 +236,7 @@ impl<R: RunEndIndexType> RunArray<R> {
|
||||
// If there are input values >= run_ends.last_value then we'll not be able to convert
|
||||
// all logical indices to physical indices.
|
||||
if ordered_index < logical_indices.len() {
|
||||
let logical_index =
|
||||
logical_indices[ordered_indices[ordered_index]].as_usize();
|
||||
let logical_index = logical_indices[ordered_indices[ordered_index]].as_usize();
|
||||
return Err(ArrowError::InvalidArgumentError(format!(
|
||||
"Cannot convert all logical indices to physical indices. The logical index cannot be converted is {logical_index}.",
|
||||
)));
|
||||
@@ -704,8 +694,7 @@ mod tests {
|
||||
seed.shuffle(&mut rng);
|
||||
}
|
||||
// repeat the items between 1 and 8 times. Cap the length for smaller sized arrays
|
||||
let num =
|
||||
max_run_length.min(rand::thread_rng().gen_range(1..=max_run_length));
|
||||
let num = max_run_length.min(rand::thread_rng().gen_range(1..=max_run_length));
|
||||
for _ in 0..num {
|
||||
result.push(seed[ix]);
|
||||
}
|
||||
@@ -749,19 +738,16 @@ mod tests {
|
||||
#[test]
|
||||
fn test_run_array() {
|
||||
// Construct a value array
|
||||
let value_data = PrimitiveArray::<Int8Type>::from_iter_values([
|
||||
10_i8, 11, 12, 13, 14, 15, 16, 17,
|
||||
]);
|
||||
let value_data =
|
||||
PrimitiveArray::<Int8Type>::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
|
||||
|
||||
// Construct a run_ends array:
|
||||
let run_ends_values = [4_i16, 6, 7, 9, 13, 18, 20, 22];
|
||||
let run_ends_data = PrimitiveArray::<Int16Type>::from_iter_values(
|
||||
run_ends_values.iter().copied(),
|
||||
);
|
||||
let run_ends_data =
|
||||
PrimitiveArray::<Int16Type>::from_iter_values(run_ends_values.iter().copied());
|
||||
|
||||
// Construct a run ends encoded array from the above two
|
||||
let ree_array =
|
||||
RunArray::<Int16Type>::try_new(&run_ends_data, &value_data).unwrap();
|
||||
let ree_array = RunArray::<Int16Type>::try_new(&run_ends_data, &value_data).unwrap();
|
||||
|
||||
assert_eq!(ree_array.len(), 22);
|
||||
assert_eq!(ree_array.null_count(), 0);
|
||||
@@ -872,8 +858,7 @@ mod tests {
|
||||
let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
|
||||
.into_iter()
|
||||
.collect();
|
||||
let run_ends: Int32Array =
|
||||
[Some(1), Some(2), Some(3), Some(4)].into_iter().collect();
|
||||
let run_ends: Int32Array = [Some(1), Some(2), Some(3), Some(4)].into_iter().collect();
|
||||
|
||||
let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
|
||||
assert_eq!(array.values().data_type(), &DataType::Utf8);
|
||||
@@ -924,7 +909,10 @@ mod tests {
|
||||
let run_ends: Int32Array = [Some(1), None, Some(3)].into_iter().collect();
|
||||
|
||||
let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
|
||||
let expected = ArrowError::InvalidArgumentError("Found null values in run_ends array. The run_ends array should not have null values.".to_string());
|
||||
let expected = ArrowError::InvalidArgumentError(
|
||||
"Found null values in run_ends array. The run_ends array should not have null values."
|
||||
.to_string(),
|
||||
);
|
||||
assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
|
||||
}
|
||||
|
||||
@@ -1003,8 +991,7 @@ mod tests {
|
||||
let mut rng = thread_rng();
|
||||
logical_indices.shuffle(&mut rng);
|
||||
|
||||
let physical_indices =
|
||||
run_array.get_physical_indices(&logical_indices).unwrap();
|
||||
let physical_indices = run_array.get_physical_indices(&logical_indices).unwrap();
|
||||
|
||||
assert_eq!(logical_indices.len(), physical_indices.len());
|
||||
|
||||
|
||||
@@ -59,9 +59,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericStringArray<OffsetSize> {
|
||||
|
||||
/// Fallibly creates a [`GenericStringArray`] from a [`GenericBinaryArray`] returning
|
||||
/// an error if [`GenericBinaryArray`] contains invalid UTF-8 data
|
||||
pub fn try_from_binary(
|
||||
v: GenericBinaryArray<OffsetSize>,
|
||||
) -> Result<Self, ArrowError> {
|
||||
pub fn try_from_binary(v: GenericBinaryArray<OffsetSize>) -> Result<Self, ArrowError> {
|
||||
let (offsets, values, nulls) = v.into_parts();
|
||||
Self::try_new(offsets, values, nulls)
|
||||
}
|
||||
@@ -83,9 +81,7 @@ impl<OffsetSize: OffsetSizeTrait> From<GenericBinaryArray<OffsetSize>>
|
||||
}
|
||||
}
|
||||
|
||||
impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<&str>>>
|
||||
for GenericStringArray<OffsetSize>
|
||||
{
|
||||
impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<&str>>> for GenericStringArray<OffsetSize> {
|
||||
fn from(v: Vec<Option<&str>>) -> Self {
|
||||
v.into_iter().collect()
|
||||
}
|
||||
@@ -97,9 +93,7 @@ impl<OffsetSize: OffsetSizeTrait> From<Vec<&str>> for GenericStringArray<OffsetS
|
||||
}
|
||||
}
|
||||
|
||||
impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<String>>>
|
||||
for GenericStringArray<OffsetSize>
|
||||
{
|
||||
impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<String>>> for GenericStringArray<OffsetSize> {
|
||||
fn from(v: Vec<Option<String>>) -> Self {
|
||||
v.into_iter().collect()
|
||||
}
|
||||
@@ -438,13 +432,11 @@ mod tests {
|
||||
let expected: LargeStringArray = data.clone().into_iter().map(Some).collect();
|
||||
|
||||
// Iterator reports too many items
|
||||
let arr =
|
||||
LargeStringArray::from_iter_values(BadIterator::new(3, 10, data.clone()));
|
||||
let arr = LargeStringArray::from_iter_values(BadIterator::new(3, 10, data.clone()));
|
||||
assert_eq!(expected, arr);
|
||||
|
||||
// Iterator reports too few items
|
||||
let arr =
|
||||
LargeStringArray::from_iter_values(BadIterator::new(3, 1, data.clone()));
|
||||
let arr = LargeStringArray::from_iter_values(BadIterator::new(3, 1, data.clone()));
|
||||
assert_eq!(expected, arr);
|
||||
}
|
||||
|
||||
@@ -460,9 +452,11 @@ mod tests {
|
||||
|
||||
let offsets = [0, 5, 8, 15].map(|n| O::from_usize(n).unwrap());
|
||||
let null_buffer = Buffer::from_slice_ref([0b101]);
|
||||
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
|
||||
Field::new("item", DataType::UInt8, false),
|
||||
));
|
||||
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
|
||||
"item",
|
||||
DataType::UInt8,
|
||||
false,
|
||||
)));
|
||||
|
||||
// [None, Some("Parquet")]
|
||||
let array_data = ArrayData::builder(data_type)
|
||||
@@ -493,9 +487,7 @@ mod tests {
|
||||
_test_generic_string_array_from_list_array::<i64>();
|
||||
}
|
||||
|
||||
fn _test_generic_string_array_from_list_array_with_child_nulls_failed<
|
||||
O: OffsetSizeTrait,
|
||||
>() {
|
||||
fn _test_generic_string_array_from_list_array_with_child_nulls_failed<O: OffsetSizeTrait>() {
|
||||
let values = b"HelloArrow";
|
||||
let child_data = ArrayData::builder(DataType::UInt8)
|
||||
.len(10)
|
||||
@@ -508,9 +500,11 @@ mod tests {
|
||||
|
||||
// It is possible to create a null struct containing a non-nullable child
|
||||
// see https://github.com/apache/arrow-rs/pull/3244 for details
|
||||
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
|
||||
Field::new("item", DataType::UInt8, true),
|
||||
));
|
||||
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
|
||||
"item",
|
||||
DataType::UInt8,
|
||||
true,
|
||||
)));
|
||||
|
||||
// [None, Some(b"Parquet")]
|
||||
let array_data = ArrayData::builder(data_type)
|
||||
@@ -544,9 +538,11 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
let offsets = [0, 2, 3].map(|n| O::from_usize(n).unwrap());
|
||||
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
|
||||
Field::new("item", DataType::UInt16, false),
|
||||
));
|
||||
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
|
||||
"item",
|
||||
DataType::UInt16,
|
||||
false,
|
||||
)));
|
||||
|
||||
let array_data = ArrayData::builder(data_type)
|
||||
.len(2)
|
||||
|
||||
@@ -462,9 +462,7 @@ impl Index<&str> for StructArray {
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use crate::{
|
||||
BooleanArray, Float32Array, Float64Array, Int32Array, Int64Array, StringArray,
|
||||
};
|
||||
use crate::{BooleanArray, Float32Array, Float64Array, Int32Array, Int64Array, StringArray};
|
||||
use arrow_buffer::ToByteSlice;
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -540,12 +538,10 @@ mod tests {
|
||||
None,
|
||||
Some("mark"),
|
||||
]));
|
||||
let ints: ArrayRef =
|
||||
Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
|
||||
let ints: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
|
||||
|
||||
let arr =
|
||||
StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
|
||||
.unwrap();
|
||||
StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]).unwrap();
|
||||
|
||||
let struct_data = arr.into_data();
|
||||
assert_eq!(4, struct_data.len());
|
||||
@@ -578,13 +574,11 @@ mod tests {
|
||||
None,
|
||||
// 3 elements, not 4
|
||||
]));
|
||||
let ints: ArrayRef =
|
||||
Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
|
||||
let ints: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
|
||||
|
||||
let err =
|
||||
StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
|
||||
.unwrap_err()
|
||||
.to_string();
|
||||
let err = StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
|
||||
.unwrap_err()
|
||||
.to_string();
|
||||
|
||||
assert_eq!(
|
||||
err,
|
||||
@@ -599,8 +593,7 @@ mod tests {
|
||||
fn test_struct_array_from_mismatched_types_single() {
|
||||
drop(StructArray::from(vec![(
|
||||
Arc::new(Field::new("b", DataType::Int16, false)),
|
||||
Arc::new(BooleanArray::from(vec![false, false, true, true]))
|
||||
as Arc<dyn Array>,
|
||||
Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
|
||||
)]));
|
||||
}
|
||||
|
||||
@@ -612,8 +605,7 @@ mod tests {
|
||||
drop(StructArray::from(vec![
|
||||
(
|
||||
Arc::new(Field::new("b", DataType::Int16, false)),
|
||||
Arc::new(BooleanArray::from(vec![false, false, true, true]))
|
||||
as Arc<dyn Array>,
|
||||
Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
|
||||
),
|
||||
(
|
||||
Arc::new(Field::new("c", DataType::Utf8, false)),
|
||||
@@ -733,9 +725,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(
|
||||
expected = "Found unmasked nulls for non-nullable StructArray field \\\"c\\\""
|
||||
)]
|
||||
#[should_panic(expected = "Found unmasked nulls for non-nullable StructArray field \\\"c\\\"")]
|
||||
fn test_struct_array_from_mismatched_nullability() {
|
||||
drop(StructArray::from(vec![(
|
||||
Arc::new(Field::new("c", DataType::Int32, false)),
|
||||
|
||||
@@ -179,8 +179,7 @@ impl UnionArray {
|
||||
if let Some(b) = &value_offsets {
|
||||
if ((type_ids.len()) * 4) != b.len() {
|
||||
return Err(ArrowError::InvalidArgumentError(
|
||||
"Type Ids and Offsets represent a different number of array slots."
|
||||
.to_string(),
|
||||
"Type Ids and Offsets represent a different number of array slots.".to_string(),
|
||||
));
|
||||
}
|
||||
}
|
||||
@@ -216,9 +215,8 @@ impl UnionArray {
|
||||
|
||||
// Unsafe Justification: arguments were validated above (and
|
||||
// re-revalidated as part of data().validate() below)
|
||||
let new_self = unsafe {
|
||||
Self::new_unchecked(field_type_ids, type_ids, value_offsets, child_arrays)
|
||||
};
|
||||
let new_self =
|
||||
unsafe { Self::new_unchecked(field_type_ids, type_ids, value_offsets, child_arrays) };
|
||||
new_self.to_data().validate()?;
|
||||
|
||||
Ok(new_self)
|
||||
@@ -1059,7 +1057,13 @@ mod tests {
|
||||
let mut builder = UnionBuilder::new_sparse();
|
||||
builder.append::<Float32Type>("a", 1.0).unwrap();
|
||||
let err = builder.append::<Int32Type>("a", 1).unwrap_err().to_string();
|
||||
assert!(err.contains("Attempt to write col \"a\" with type Int32 doesn't match existing type Float32"), "{}", err);
|
||||
assert!(
|
||||
err.contains(
|
||||
"Attempt to write col \"a\" with type Int32 doesn't match existing type Float32"
|
||||
),
|
||||
"{}",
|
||||
err
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -127,11 +127,7 @@ impl BooleanBuilder {
|
||||
///
|
||||
/// Returns an error if the slices are of different lengths
|
||||
#[inline]
|
||||
pub fn append_values(
|
||||
&mut self,
|
||||
values: &[bool],
|
||||
is_valid: &[bool],
|
||||
) -> Result<(), ArrowError> {
|
||||
pub fn append_values(&mut self, values: &[bool], is_valid: &[bool]) -> Result<(), ArrowError> {
|
||||
if values.len() != is_valid.len() {
|
||||
Err(ArrowError::InvalidArgumentError(
|
||||
"Value and validity lengths must be equal".to_string(),
|
||||
@@ -250,8 +246,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_boolean_array_builder_append_slice() {
|
||||
let arr1 =
|
||||
BooleanArray::from(vec![Some(true), Some(false), None, None, Some(false)]);
|
||||
let arr1 = BooleanArray::from(vec![Some(true), Some(false), None, None, Some(false)]);
|
||||
|
||||
let mut builder = BooleanArray::builder(0);
|
||||
builder.append_slice(&[true, false]);
|
||||
|
||||
@@ -45,11 +45,9 @@ pub type Float32BufferBuilder = BufferBuilder<f32>;
|
||||
pub type Float64BufferBuilder = BufferBuilder<f64>;
|
||||
|
||||
/// Buffer builder for 128-bit decimal type.
|
||||
pub type Decimal128BufferBuilder =
|
||||
BufferBuilder<<Decimal128Type as ArrowPrimitiveType>::Native>;
|
||||
pub type Decimal128BufferBuilder = BufferBuilder<<Decimal128Type as ArrowPrimitiveType>::Native>;
|
||||
/// Buffer builder for 256-bit decimal type.
|
||||
pub type Decimal256BufferBuilder =
|
||||
BufferBuilder<<Decimal256Type as ArrowPrimitiveType>::Native>;
|
||||
pub type Decimal256BufferBuilder = BufferBuilder<<Decimal256Type as ArrowPrimitiveType>::Native>;
|
||||
|
||||
/// Buffer builder for timestamp type of second unit.
|
||||
pub type TimestampSecondBufferBuilder =
|
||||
@@ -107,9 +105,7 @@ pub type DurationNanosecondBufferBuilder =
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::builder::{
|
||||
ArrayBuilder, Int32BufferBuilder, Int8Builder, UInt8BufferBuilder,
|
||||
};
|
||||
use crate::builder::{ArrayBuilder, Int32BufferBuilder, Int8Builder, UInt8BufferBuilder};
|
||||
use crate::Array;
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -75,7 +75,8 @@ impl FixedSizeBinaryBuilder {
|
||||
pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<(), ArrowError> {
|
||||
if self.value_length != value.as_ref().len() as i32 {
|
||||
Err(ArrowError::InvalidArgumentError(
|
||||
"Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths".to_string()
|
||||
"Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths"
|
||||
.to_string(),
|
||||
))
|
||||
} else {
|
||||
self.values_builder.append_slice(value.as_ref());
|
||||
@@ -95,11 +96,10 @@ impl FixedSizeBinaryBuilder {
|
||||
/// Builds the [`FixedSizeBinaryArray`] and reset this builder.
|
||||
pub fn finish(&mut self) -> FixedSizeBinaryArray {
|
||||
let array_length = self.len();
|
||||
let array_data_builder =
|
||||
ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
|
||||
.add_buffer(self.values_builder.finish())
|
||||
.nulls(self.null_buffer_builder.finish())
|
||||
.len(array_length);
|
||||
let array_data_builder = ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
|
||||
.add_buffer(self.values_builder.finish())
|
||||
.nulls(self.null_buffer_builder.finish())
|
||||
.len(array_length);
|
||||
let array_data = unsafe { array_data_builder.build_unchecked() };
|
||||
FixedSizeBinaryArray::from(array_data)
|
||||
}
|
||||
@@ -108,11 +108,10 @@ impl FixedSizeBinaryBuilder {
|
||||
pub fn finish_cloned(&self) -> FixedSizeBinaryArray {
|
||||
let array_length = self.len();
|
||||
let values_buffer = Buffer::from_slice_ref(self.values_builder.as_slice());
|
||||
let array_data_builder =
|
||||
ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
|
||||
.add_buffer(values_buffer)
|
||||
.nulls(self.null_buffer_builder.finish_cloned())
|
||||
.len(array_length);
|
||||
let array_data_builder = ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
|
||||
.add_buffer(values_buffer)
|
||||
.nulls(self.null_buffer_builder.finish_cloned())
|
||||
.len(array_length);
|
||||
let array_data = unsafe { array_data_builder.build_unchecked() };
|
||||
FixedSizeBinaryArray::from(array_data)
|
||||
}
|
||||
|
||||
@@ -19,10 +19,7 @@ use crate::types::bytes::ByteArrayNativeType;
|
||||
use std::{any::Any, sync::Arc};
|
||||
|
||||
use crate::{
|
||||
types::{
|
||||
BinaryType, ByteArrayType, LargeBinaryType, LargeUtf8Type, RunEndIndexType,
|
||||
Utf8Type,
|
||||
},
|
||||
types::{BinaryType, ByteArrayType, LargeBinaryType, LargeUtf8Type, RunEndIndexType, Utf8Type},
|
||||
ArrayRef, ArrowPrimitiveType, RunArray,
|
||||
};
|
||||
|
||||
@@ -112,10 +109,7 @@ where
|
||||
pub fn with_capacity(capacity: usize, data_capacity: usize) -> Self {
|
||||
Self {
|
||||
run_ends_builder: PrimitiveBuilder::with_capacity(capacity),
|
||||
values_builder: GenericByteBuilder::<V>::with_capacity(
|
||||
capacity,
|
||||
data_capacity,
|
||||
),
|
||||
values_builder: GenericByteBuilder::<V>::with_capacity(capacity, data_capacity),
|
||||
current_value: Vec::new(),
|
||||
has_current_value: false,
|
||||
current_run_end_index: 0,
|
||||
@@ -282,12 +276,13 @@ where
|
||||
}
|
||||
|
||||
fn run_end_index_as_native(&self) -> R::Native {
|
||||
R::Native::from_usize(self.current_run_end_index)
|
||||
.unwrap_or_else(|| panic!(
|
||||
R::Native::from_usize(self.current_run_end_index).unwrap_or_else(|| {
|
||||
panic!(
|
||||
"Cannot convert the value {} from `usize` to native form of arrow datatype {}",
|
||||
self.current_run_end_index,
|
||||
R::DATA_TYPE
|
||||
))
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -413,8 +408,7 @@ mod tests {
|
||||
|
||||
// Values are polymorphic and so require a downcast.
|
||||
let av = array.values();
|
||||
let ava: &GenericByteArray<T> =
|
||||
av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
|
||||
let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
|
||||
|
||||
assert_eq!(*ava.value(0), *values[0]);
|
||||
assert!(ava.is_null(1));
|
||||
@@ -459,8 +453,7 @@ mod tests {
|
||||
|
||||
// Values are polymorphic and so require a downcast.
|
||||
let av = array.values();
|
||||
let ava: &GenericByteArray<T> =
|
||||
av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
|
||||
let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
|
||||
|
||||
assert_eq!(ava.value(0), values[0]);
|
||||
assert!(ava.is_null(1));
|
||||
|
||||
@@ -68,12 +68,8 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
|
||||
let value_builder = BufferBuilder::<u8>::new_from_buffer(value_buffer);
|
||||
|
||||
let null_buffer_builder = null_buffer
|
||||
.map(|buffer| {
|
||||
NullBufferBuilder::new_from_buffer(buffer, offsets_builder.len() - 1)
|
||||
})
|
||||
.unwrap_or_else(|| {
|
||||
NullBufferBuilder::new_with_len(offsets_builder.len() - 1)
|
||||
});
|
||||
.map(|buffer| NullBufferBuilder::new_from_buffer(buffer, offsets_builder.len() - 1))
|
||||
.unwrap_or_else(|| NullBufferBuilder::new_with_len(offsets_builder.len() - 1));
|
||||
|
||||
Self {
|
||||
offsets_builder,
|
||||
@@ -84,8 +80,7 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
|
||||
|
||||
#[inline]
|
||||
fn next_offset(&self) -> T::Offset {
|
||||
T::Offset::from_usize(self.value_builder.len())
|
||||
.expect("byte array offset overflow")
|
||||
T::Offset::from_usize(self.value_builder.len()).expect("byte array offset overflow")
|
||||
}
|
||||
|
||||
/// Appends a value into the builder.
|
||||
|
||||
@@ -16,9 +16,7 @@
|
||||
// under the License.
|
||||
|
||||
use crate::builder::{ArrayBuilder, GenericByteBuilder, PrimitiveBuilder};
|
||||
use crate::types::{
|
||||
ArrowDictionaryKeyType, ByteArrayType, GenericBinaryType, GenericStringType,
|
||||
};
|
||||
use crate::types::{ArrowDictionaryKeyType, ByteArrayType, GenericBinaryType, GenericStringType};
|
||||
use crate::{Array, ArrayRef, DictionaryArray, GenericByteArray};
|
||||
use arrow_buffer::ArrowNativeType;
|
||||
use arrow_schema::{ArrowError, DataType};
|
||||
@@ -91,10 +89,7 @@ where
|
||||
state: Default::default(),
|
||||
dedup: Default::default(),
|
||||
keys_builder: PrimitiveBuilder::with_capacity(keys_capacity),
|
||||
values_builder: GenericByteBuilder::<T>::with_capacity(
|
||||
value_capacity,
|
||||
data_capacity,
|
||||
),
|
||||
values_builder: GenericByteBuilder::<T>::with_capacity(value_capacity, data_capacity),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -131,8 +126,7 @@ where
|
||||
let mut dedup = HashMap::with_capacity_and_hasher(dict_len, ());
|
||||
|
||||
let values_len = dictionary_values.value_data().len();
|
||||
let mut values_builder =
|
||||
GenericByteBuilder::<T>::with_capacity(dict_len, values_len);
|
||||
let mut values_builder = GenericByteBuilder::<T>::with_capacity(dict_len, values_len);
|
||||
|
||||
K::Native::from_usize(dictionary_values.len())
|
||||
.ok_or(ArrowError::DictionaryKeyOverflowError)?;
|
||||
@@ -214,10 +208,7 @@ where
|
||||
/// value is appended to the values array.
|
||||
///
|
||||
/// Returns an error if the new index would overflow the key type.
|
||||
pub fn append(
|
||||
&mut self,
|
||||
value: impl AsRef<T::Native>,
|
||||
) -> Result<K::Native, ArrowError> {
|
||||
pub fn append(&mut self, value: impl AsRef<T::Native>) -> Result<K::Native, ArrowError> {
|
||||
let value_native: &T::Native = value.as_ref();
|
||||
let value_bytes: &[u8] = value_native.as_ref();
|
||||
|
||||
@@ -240,8 +231,7 @@ where
|
||||
state.hash_one(get_bytes(storage, *idx))
|
||||
});
|
||||
|
||||
K::Native::from_usize(idx)
|
||||
.ok_or(ArrowError::DictionaryKeyOverflowError)?
|
||||
K::Native::from_usize(idx).ok_or(ArrowError::DictionaryKeyOverflowError)?
|
||||
}
|
||||
};
|
||||
self.keys_builder.append_value(key);
|
||||
@@ -283,8 +273,7 @@ where
|
||||
let values = self.values_builder.finish();
|
||||
let keys = self.keys_builder.finish();
|
||||
|
||||
let data_type =
|
||||
DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(T::DATA_TYPE));
|
||||
let data_type = DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(T::DATA_TYPE));
|
||||
|
||||
let builder = keys
|
||||
.into_data()
|
||||
@@ -300,8 +289,7 @@ where
|
||||
let values = self.values_builder.finish_cloned();
|
||||
let keys = self.keys_builder.finish_cloned();
|
||||
|
||||
let data_type =
|
||||
DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(T::DATA_TYPE));
|
||||
let data_type = DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(T::DATA_TYPE));
|
||||
|
||||
let builder = keys
|
||||
.into_data()
|
||||
@@ -367,12 +355,10 @@ fn get_bytes<T: ByteArrayType>(values: &GenericByteBuilder<T>, idx: usize) -> &[
|
||||
/// assert_eq!(ava.value(1), "def");
|
||||
///
|
||||
/// ```
|
||||
pub type StringDictionaryBuilder<K> =
|
||||
GenericByteDictionaryBuilder<K, GenericStringType<i32>>;
|
||||
pub type StringDictionaryBuilder<K> = GenericByteDictionaryBuilder<K, GenericStringType<i32>>;
|
||||
|
||||
/// Builder for [`DictionaryArray`] of [`LargeStringArray`](crate::array::LargeStringArray)
|
||||
pub type LargeStringDictionaryBuilder<K> =
|
||||
GenericByteDictionaryBuilder<K, GenericStringType<i64>>;
|
||||
pub type LargeStringDictionaryBuilder<K> = GenericByteDictionaryBuilder<K, GenericStringType<i64>>;
|
||||
|
||||
/// Builder for [`DictionaryArray`] of [`BinaryArray`](crate::array::BinaryArray)
|
||||
///
|
||||
@@ -407,12 +393,10 @@ pub type LargeStringDictionaryBuilder<K> =
|
||||
/// assert_eq!(ava.value(1), b"def");
|
||||
///
|
||||
/// ```
|
||||
pub type BinaryDictionaryBuilder<K> =
|
||||
GenericByteDictionaryBuilder<K, GenericBinaryType<i32>>;
|
||||
pub type BinaryDictionaryBuilder<K> = GenericByteDictionaryBuilder<K, GenericBinaryType<i32>>;
|
||||
|
||||
/// Builder for [`DictionaryArray`] of [`LargeBinaryArray`](crate::array::LargeBinaryArray)
|
||||
pub type LargeBinaryDictionaryBuilder<K> =
|
||||
GenericByteDictionaryBuilder<K, GenericBinaryType<i64>>;
|
||||
pub type LargeBinaryDictionaryBuilder<K> = GenericByteDictionaryBuilder<K, GenericBinaryType<i64>>;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
@@ -444,8 +428,7 @@ mod tests {
|
||||
|
||||
// Values are polymorphic and so require a downcast.
|
||||
let av = array.values();
|
||||
let ava: &GenericByteArray<T> =
|
||||
av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
|
||||
let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
|
||||
|
||||
assert_eq!(*ava.value(0), *values[0]);
|
||||
assert_eq!(*ava.value(1), *values[1]);
|
||||
@@ -483,8 +466,7 @@ mod tests {
|
||||
|
||||
// Values are polymorphic and so require a downcast.
|
||||
let av = array.values();
|
||||
let ava: &GenericByteArray<T> =
|
||||
av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
|
||||
let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
|
||||
|
||||
assert_eq!(ava.value(0), values[0]);
|
||||
assert_eq!(ava.value(1), values[1]);
|
||||
@@ -542,11 +524,8 @@ mod tests {
|
||||
<T as ByteArrayType>::Native: AsRef<<T as ByteArrayType>::Native>,
|
||||
{
|
||||
let mut builder =
|
||||
GenericByteDictionaryBuilder::<Int8Type, T>::new_with_dictionary(
|
||||
6,
|
||||
&dictionary,
|
||||
)
|
||||
.unwrap();
|
||||
GenericByteDictionaryBuilder::<Int8Type, T>::new_with_dictionary(6, &dictionary)
|
||||
.unwrap();
|
||||
builder.append(values[0]).unwrap();
|
||||
builder.append_null();
|
||||
builder.append(values[1]).unwrap();
|
||||
@@ -562,8 +541,7 @@ mod tests {
|
||||
|
||||
// Values are polymorphic and so require a downcast.
|
||||
let av = array.values();
|
||||
let ava: &GenericByteArray<T> =
|
||||
av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
|
||||
let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
|
||||
|
||||
assert!(!ava.is_valid(0));
|
||||
assert_eq!(ava.value(1), values[1]);
|
||||
@@ -597,11 +575,8 @@ mod tests {
|
||||
<T as ByteArrayType>::Native: AsRef<<T as ByteArrayType>::Native>,
|
||||
{
|
||||
let mut builder =
|
||||
GenericByteDictionaryBuilder::<Int16Type, T>::new_with_dictionary(
|
||||
4,
|
||||
&dictionary,
|
||||
)
|
||||
.unwrap();
|
||||
GenericByteDictionaryBuilder::<Int16Type, T>::new_with_dictionary(4, &dictionary)
|
||||
.unwrap();
|
||||
builder.append(values[0]).unwrap();
|
||||
builder.append_null();
|
||||
builder.append(values[1]).unwrap();
|
||||
|
||||
@@ -86,11 +86,7 @@ impl Default for MapFieldNames {
|
||||
|
||||
impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
|
||||
/// Creates a new `MapBuilder`
|
||||
pub fn new(
|
||||
field_names: Option<MapFieldNames>,
|
||||
key_builder: K,
|
||||
value_builder: V,
|
||||
) -> Self {
|
||||
pub fn new(field_names: Option<MapFieldNames>, key_builder: K, value_builder: V) -> Self {
|
||||
let capacity = key_builder.len();
|
||||
Self::with_capacity(field_names, key_builder, value_builder, capacity)
|
||||
}
|
||||
@@ -243,12 +239,9 @@ mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
#[should_panic(
|
||||
expected = "Keys array must have no null values, found 1 null value(s)"
|
||||
)]
|
||||
#[should_panic(expected = "Keys array must have no null values, found 1 null value(s)")]
|
||||
fn test_map_builder_with_null_keys_panics() {
|
||||
let mut builder =
|
||||
MapBuilder::new(None, StringBuilder::new(), Int32Builder::new());
|
||||
let mut builder = MapBuilder::new(None, StringBuilder::new(), Int32Builder::new());
|
||||
builder.keys().append_null();
|
||||
builder.values().append_value(42);
|
||||
builder.append(true).unwrap();
|
||||
|
||||
@@ -161,9 +161,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
|
||||
let values_builder = BufferBuilder::<T::Native>::new_from_buffer(values_buffer);
|
||||
|
||||
let null_buffer_builder = null_buffer
|
||||
.map(|buffer| {
|
||||
NullBufferBuilder::new_from_buffer(buffer, values_builder.len())
|
||||
})
|
||||
.map(|buffer| NullBufferBuilder::new_from_buffer(buffer, values_builder.len()))
|
||||
.unwrap_or_else(|| NullBufferBuilder::new_with_len(values_builder.len()));
|
||||
|
||||
Self {
|
||||
@@ -256,10 +254,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
|
||||
/// This requires the iterator be a trusted length. This could instead require
|
||||
/// the iterator implement `TrustedLen` once that is stabilized.
|
||||
#[inline]
|
||||
pub unsafe fn append_trusted_len_iter(
|
||||
&mut self,
|
||||
iter: impl IntoIterator<Item = T::Native>,
|
||||
) {
|
||||
pub unsafe fn append_trusted_len_iter(&mut self, iter: impl IntoIterator<Item = T::Native>) {
|
||||
let iter = iter.into_iter();
|
||||
let len = iter
|
||||
.size_hint()
|
||||
@@ -328,11 +323,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
|
||||
|
||||
impl<P: DecimalType> PrimitiveBuilder<P> {
|
||||
/// Sets the precision and scale
|
||||
pub fn with_precision_and_scale(
|
||||
self,
|
||||
precision: u8,
|
||||
scale: i8,
|
||||
) -> Result<Self, ArrowError> {
|
||||
pub fn with_precision_and_scale(self, precision: u8, scale: i8) -> Result<Self, ArrowError> {
|
||||
validate_decimal_precision_and_scale::<P>(precision, scale)?;
|
||||
Ok(Self {
|
||||
data_type: P::TYPE_CONSTRUCTOR(precision, scale),
|
||||
@@ -592,25 +583,21 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_primitive_array_builder_with_data_type() {
|
||||
let mut builder =
|
||||
Decimal128Builder::new().with_data_type(DataType::Decimal128(1, 2));
|
||||
let mut builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(1, 2));
|
||||
builder.append_value(1);
|
||||
let array = builder.finish();
|
||||
assert_eq!(array.precision(), 1);
|
||||
assert_eq!(array.scale(), 2);
|
||||
|
||||
let data_type = DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".into()));
|
||||
let mut builder =
|
||||
TimestampNanosecondBuilder::new().with_data_type(data_type.clone());
|
||||
let mut builder = TimestampNanosecondBuilder::new().with_data_type(data_type.clone());
|
||||
builder.append_value(1);
|
||||
let array = builder.finish();
|
||||
assert_eq!(array.data_type(), &data_type);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(
|
||||
expected = "incompatible data type for builder, expected Int32 got Int64"
|
||||
)]
|
||||
#[should_panic(expected = "incompatible data type for builder, expected Int32 got Int64")]
|
||||
fn test_invalid_with_data_type() {
|
||||
Int32Builder::new().with_data_type(DataType::Int64);
|
||||
}
|
||||
|
||||
@@ -221,8 +221,7 @@ where
|
||||
let key = self.values_builder.len();
|
||||
self.values_builder.append_value(value);
|
||||
vacant.insert(key);
|
||||
K::Native::from_usize(key)
|
||||
.ok_or(ArrowError::DictionaryKeyOverflowError)?
|
||||
K::Native::from_usize(key).ok_or(ArrowError::DictionaryKeyOverflowError)?
|
||||
}
|
||||
Entry::Occupied(o) => K::Native::usize_as(*o.get()),
|
||||
};
|
||||
@@ -266,10 +265,8 @@ where
|
||||
let values = self.values_builder.finish();
|
||||
let keys = self.keys_builder.finish();
|
||||
|
||||
let data_type = DataType::Dictionary(
|
||||
Box::new(K::DATA_TYPE),
|
||||
Box::new(values.data_type().clone()),
|
||||
);
|
||||
let data_type =
|
||||
DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(values.data_type().clone()));
|
||||
|
||||
let builder = keys
|
||||
.into_data()
|
||||
@@ -285,8 +282,7 @@ where
|
||||
let values = self.values_builder.finish_cloned();
|
||||
let keys = self.keys_builder.finish_cloned();
|
||||
|
||||
let data_type =
|
||||
DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(V::DATA_TYPE));
|
||||
let data_type = DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(V::DATA_TYPE));
|
||||
|
||||
let builder = keys
|
||||
.into_data()
|
||||
@@ -331,8 +327,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_primitive_dictionary_builder() {
|
||||
let mut builder =
|
||||
PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(3, 2);
|
||||
let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(3, 2);
|
||||
builder.append(12345678).unwrap();
|
||||
builder.append_null();
|
||||
builder.append(22345678).unwrap();
|
||||
@@ -384,8 +379,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_primitive_dictionary_with_builders() {
|
||||
let keys_builder = PrimitiveBuilder::<Int32Type>::new();
|
||||
let values_builder =
|
||||
Decimal128Builder::new().with_data_type(DataType::Decimal128(1, 2));
|
||||
let values_builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(1, 2));
|
||||
let mut builder =
|
||||
PrimitiveDictionaryBuilder::<Int32Type, Decimal128Type>::new_from_empty_builders(
|
||||
keys_builder,
|
||||
|
||||
@@ -106,24 +106,18 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
|
||||
DataType::Float32 => Box::new(Float32Builder::with_capacity(capacity)),
|
||||
DataType::Float64 => Box::new(Float64Builder::with_capacity(capacity)),
|
||||
DataType::Binary => Box::new(BinaryBuilder::with_capacity(capacity, 1024)),
|
||||
DataType::LargeBinary => {
|
||||
Box::new(LargeBinaryBuilder::with_capacity(capacity, 1024))
|
||||
}
|
||||
DataType::LargeBinary => Box::new(LargeBinaryBuilder::with_capacity(capacity, 1024)),
|
||||
DataType::FixedSizeBinary(len) => {
|
||||
Box::new(FixedSizeBinaryBuilder::with_capacity(capacity, *len))
|
||||
}
|
||||
DataType::Decimal128(p, s) => Box::new(
|
||||
Decimal128Builder::with_capacity(capacity)
|
||||
.with_data_type(DataType::Decimal128(*p, *s)),
|
||||
Decimal128Builder::with_capacity(capacity).with_data_type(DataType::Decimal128(*p, *s)),
|
||||
),
|
||||
DataType::Decimal256(p, s) => Box::new(
|
||||
Decimal256Builder::with_capacity(capacity)
|
||||
.with_data_type(DataType::Decimal256(*p, *s)),
|
||||
Decimal256Builder::with_capacity(capacity).with_data_type(DataType::Decimal256(*p, *s)),
|
||||
),
|
||||
DataType::Utf8 => Box::new(StringBuilder::with_capacity(capacity, 1024)),
|
||||
DataType::LargeUtf8 => {
|
||||
Box::new(LargeStringBuilder::with_capacity(capacity, 1024))
|
||||
}
|
||||
DataType::LargeUtf8 => Box::new(LargeStringBuilder::with_capacity(capacity, 1024)),
|
||||
DataType::Date32 => Box::new(Date32Builder::with_capacity(capacity)),
|
||||
DataType::Date64 => Box::new(Date64Builder::with_capacity(capacity)),
|
||||
DataType::Time32(TimeUnit::Second) => {
|
||||
@@ -175,19 +169,14 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
|
||||
DataType::Duration(TimeUnit::Nanosecond) => {
|
||||
Box::new(DurationNanosecondBuilder::with_capacity(capacity))
|
||||
}
|
||||
DataType::Struct(fields) => {
|
||||
Box::new(StructBuilder::from_fields(fields.clone(), capacity))
|
||||
}
|
||||
DataType::Struct(fields) => Box::new(StructBuilder::from_fields(fields.clone(), capacity)),
|
||||
t => panic!("Data type {t:?} is not currently supported"),
|
||||
}
|
||||
}
|
||||
|
||||
impl StructBuilder {
|
||||
/// Creates a new `StructBuilder`
|
||||
pub fn new(
|
||||
fields: impl Into<Fields>,
|
||||
field_builders: Vec<Box<dyn ArrayBuilder>>,
|
||||
) -> Self {
|
||||
pub fn new(fields: impl Into<Fields>, field_builders: Vec<Box<dyn ArrayBuilder>>) -> Self {
|
||||
Self {
|
||||
field_builders,
|
||||
fields: fields.into(),
|
||||
@@ -234,10 +223,7 @@ impl StructBuilder {
|
||||
pub fn finish(&mut self) -> StructArray {
|
||||
self.validate_content();
|
||||
if self.fields.is_empty() {
|
||||
return StructArray::new_empty_fields(
|
||||
self.len(),
|
||||
self.null_buffer_builder.finish(),
|
||||
);
|
||||
return StructArray::new_empty_fields(self.len(), self.null_buffer_builder.finish());
|
||||
}
|
||||
|
||||
let arrays = self.field_builders.iter_mut().map(|f| f.finish()).collect();
|
||||
@@ -524,8 +510,7 @@ mod tests {
|
||||
expected = "Data type List(Field { name: \"item\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) is not currently supported"
|
||||
)]
|
||||
fn test_struct_array_builder_from_schema_unsupported_type() {
|
||||
let list_type =
|
||||
DataType::List(Arc::new(Field::new("item", DataType::Int64, true)));
|
||||
let list_type = DataType::List(Arc::new(Field::new("item", DataType::Int64, true)));
|
||||
let fields = vec![
|
||||
Field::new("f1", DataType::Int16, false),
|
||||
Field::new("f2", list_type, false),
|
||||
@@ -571,9 +556,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(
|
||||
expected = "Number of fields is not equal to the number of field_builders."
|
||||
)]
|
||||
#[should_panic(expected = "Number of fields is not equal to the number of field_builders.")]
|
||||
fn test_struct_array_builder_unequal_field_field_builders() {
|
||||
let int_builder = Int32Builder::with_capacity(10);
|
||||
|
||||
|
||||
@@ -65,11 +65,7 @@ impl<T: ArrowNativeType> FieldDataValues for BufferBuilder<T> {
|
||||
|
||||
impl FieldData {
|
||||
/// Creates a new `FieldData`.
|
||||
fn new<T: ArrowPrimitiveType>(
|
||||
type_id: i8,
|
||||
data_type: DataType,
|
||||
capacity: usize,
|
||||
) -> Self {
|
||||
fn new<T: ArrowPrimitiveType>(type_id: i8, data_type: DataType, capacity: usize) -> Self {
|
||||
Self {
|
||||
type_id,
|
||||
data_type,
|
||||
@@ -222,7 +218,12 @@ impl UnionBuilder {
|
||||
let mut field_data = match self.fields.remove(&type_name) {
|
||||
Some(data) => {
|
||||
if data.data_type != T::DATA_TYPE {
|
||||
return Err(ArrowError::InvalidArgumentError(format!("Attempt to write col \"{}\" with type {} doesn't match existing type {}", type_name, T::DATA_TYPE, data.data_type)));
|
||||
return Err(ArrowError::InvalidArgumentError(format!(
|
||||
"Attempt to write col \"{}\" with type {} doesn't match existing type {}",
|
||||
type_name,
|
||||
T::DATA_TYPE,
|
||||
data.data_type
|
||||
)));
|
||||
}
|
||||
data
|
||||
}
|
||||
|
||||
+6
-17
@@ -578,9 +578,7 @@ macro_rules! downcast_run_array {
|
||||
|
||||
/// Force downcast of an [`Array`], such as an [`ArrayRef`] to
|
||||
/// [`GenericListArray<T>`], panicking on failure.
|
||||
pub fn as_generic_list_array<S: OffsetSizeTrait>(
|
||||
arr: &dyn Array,
|
||||
) -> &GenericListArray<S> {
|
||||
pub fn as_generic_list_array<S: OffsetSizeTrait>(arr: &dyn Array) -> &GenericListArray<S> {
|
||||
arr.as_any()
|
||||
.downcast_ref::<GenericListArray<S>>()
|
||||
.expect("Unable to downcast to list array")
|
||||
@@ -612,9 +610,7 @@ pub fn as_large_list_array(arr: &dyn Array) -> &LargeListArray {
|
||||
/// Force downcast of an [`Array`], such as an [`ArrayRef`] to
|
||||
/// [`GenericBinaryArray<S>`], panicking on failure.
|
||||
#[inline]
|
||||
pub fn as_generic_binary_array<S: OffsetSizeTrait>(
|
||||
arr: &dyn Array,
|
||||
) -> &GenericBinaryArray<S> {
|
||||
pub fn as_generic_binary_array<S: OffsetSizeTrait>(arr: &dyn Array) -> &GenericBinaryArray<S> {
|
||||
arr.as_any()
|
||||
.downcast_ref::<GenericBinaryArray<S>>()
|
||||
.expect("Unable to downcast to binary array")
|
||||
@@ -826,8 +822,7 @@ pub trait AsArray: private::Sealed {
|
||||
}
|
||||
|
||||
/// Downcast this to a [`DictionaryArray`] returning `None` if not possible
|
||||
fn as_dictionary_opt<K: ArrowDictionaryKeyType>(&self)
|
||||
-> Option<&DictionaryArray<K>>;
|
||||
fn as_dictionary_opt<K: ArrowDictionaryKeyType>(&self) -> Option<&DictionaryArray<K>>;
|
||||
|
||||
/// Downcast this to a [`DictionaryArray`] panicking if not possible
|
||||
fn as_dictionary<K: ArrowDictionaryKeyType>(&self) -> &DictionaryArray<K> {
|
||||
@@ -877,9 +872,7 @@ impl AsArray for dyn Array + '_ {
|
||||
self.as_any().downcast_ref()
|
||||
}
|
||||
|
||||
fn as_dictionary_opt<K: ArrowDictionaryKeyType>(
|
||||
&self,
|
||||
) -> Option<&DictionaryArray<K>> {
|
||||
fn as_dictionary_opt<K: ArrowDictionaryKeyType>(&self) -> Option<&DictionaryArray<K>> {
|
||||
self.as_any().downcast_ref()
|
||||
}
|
||||
|
||||
@@ -926,9 +919,7 @@ impl AsArray for ArrayRef {
|
||||
self.as_any().downcast_ref()
|
||||
}
|
||||
|
||||
fn as_dictionary_opt<K: ArrowDictionaryKeyType>(
|
||||
&self,
|
||||
) -> Option<&DictionaryArray<K>> {
|
||||
fn as_dictionary_opt<K: ArrowDictionaryKeyType>(&self) -> Option<&DictionaryArray<K>> {
|
||||
self.as_ref().as_dictionary_opt()
|
||||
}
|
||||
|
||||
@@ -972,9 +963,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_decimal256array() {
|
||||
let a = Decimal256Array::from_iter_values(
|
||||
[1, 2, 4, 5].into_iter().map(i256::from_i128),
|
||||
);
|
||||
let a = Decimal256Array::from_iter_values([1, 2, 4, 5].into_iter().map(i256::from_i128));
|
||||
assert!(!as_primitive_array::<Decimal256Type>(&a).is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -55,10 +55,7 @@ pub(crate) fn add_months_datetime<Tz: TimeZone>(
|
||||
/// Add the given number of days to the given datetime.
|
||||
///
|
||||
/// Returns `None` when it will result in overflow.
|
||||
pub(crate) fn add_days_datetime<Tz: TimeZone>(
|
||||
dt: DateTime<Tz>,
|
||||
days: i32,
|
||||
) -> Option<DateTime<Tz>> {
|
||||
pub(crate) fn add_days_datetime<Tz: TimeZone>(dt: DateTime<Tz>, days: i32) -> Option<DateTime<Tz>> {
|
||||
match days.cmp(&0) {
|
||||
Ordering::Equal => Some(dt),
|
||||
Ordering::Greater => dt.checked_add_days(Days::new(days as u64)),
|
||||
@@ -83,10 +80,7 @@ pub(crate) fn sub_months_datetime<Tz: TimeZone>(
|
||||
/// Substract the given number of days to the given datetime.
|
||||
///
|
||||
/// Returns `None` when it will result in overflow.
|
||||
pub(crate) fn sub_days_datetime<Tz: TimeZone>(
|
||||
dt: DateTime<Tz>,
|
||||
days: i32,
|
||||
) -> Option<DateTime<Tz>> {
|
||||
pub(crate) fn sub_days_datetime<Tz: TimeZone>(dt: DateTime<Tz>, days: i32) -> Option<DateTime<Tz>> {
|
||||
match days.cmp(&0) {
|
||||
Ordering::Equal => Some(dt),
|
||||
Ordering::Greater => dt.checked_sub_days(Days::new(days as u64)),
|
||||
|
||||
@@ -18,8 +18,8 @@
|
||||
//! Idiomatic iterators for [`Array`](crate::Array)
|
||||
|
||||
use crate::array::{
|
||||
ArrayAccessor, BooleanArray, FixedSizeBinaryArray, GenericBinaryArray,
|
||||
GenericListArray, GenericStringArray, PrimitiveArray,
|
||||
ArrayAccessor, BooleanArray, FixedSizeBinaryArray, GenericBinaryArray, GenericListArray,
|
||||
GenericStringArray, PrimitiveArray,
|
||||
};
|
||||
use crate::{FixedSizeListArray, MapArray};
|
||||
use arrow_buffer::NullBuffer;
|
||||
@@ -187,8 +187,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_string_array_iter_round_trip() {
|
||||
let array =
|
||||
StringArray::from(vec![Some("a"), None, Some("aaa"), None, Some("aaaaa")]);
|
||||
let array = StringArray::from(vec![Some("a"), None, Some("aaa"), None, Some("aaaaa")]);
|
||||
let array = Arc::new(array) as ArrayRef;
|
||||
|
||||
let array = array.as_any().downcast_ref::<StringArray>().unwrap();
|
||||
@@ -211,8 +210,7 @@ mod tests {
|
||||
|
||||
// check if DoubleEndedIterator is implemented
|
||||
let result: StringArray = array.iter().rev().collect();
|
||||
let rev_array =
|
||||
StringArray::from(vec![Some("aaaaa"), None, Some("aaa"), None, Some("a")]);
|
||||
let rev_array = StringArray::from(vec![Some("aaaaa"), None, Some("aaa"), None, Some("a")]);
|
||||
assert_eq!(result, rev_array);
|
||||
// check if ExactSizeIterator is implemented
|
||||
let _ = array.iter().rposition(|opt_b| opt_b == Some("a"));
|
||||
|
||||
@@ -182,8 +182,7 @@ pub use array::*;
|
||||
|
||||
mod record_batch;
|
||||
pub use record_batch::{
|
||||
RecordBatch, RecordBatchIterator, RecordBatchOptions, RecordBatchReader,
|
||||
RecordBatchWriter,
|
||||
RecordBatch, RecordBatchIterator, RecordBatchOptions, RecordBatchReader, RecordBatchWriter,
|
||||
};
|
||||
|
||||
mod arithmetic;
|
||||
|
||||
+19
-37
@@ -179,8 +179,8 @@ macro_rules! make_numeric_type {
|
||||
16 => {
|
||||
// same general logic as for 8 lanes, extended to 16 bits
|
||||
let vecidx = i32x16::new(
|
||||
1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
|
||||
8192, 16384, 32768,
|
||||
1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384,
|
||||
32768,
|
||||
);
|
||||
|
||||
let vecmask = i32x16::splat((mask & 0xFFFF) as i32);
|
||||
@@ -194,21 +194,19 @@ macro_rules! make_numeric_type {
|
||||
let tmp = &mut [0_i16; 32];
|
||||
|
||||
let vecidx = i32x16::new(
|
||||
1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
|
||||
8192, 16384, 32768,
|
||||
1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384,
|
||||
32768,
|
||||
);
|
||||
|
||||
let vecmask = i32x16::splat((mask & 0xFFFF) as i32);
|
||||
let vecmask = (vecidx & vecmask).eq(vecidx);
|
||||
|
||||
i16x16::from_cast(vecmask)
|
||||
.write_to_slice_unaligned(&mut tmp[0..16]);
|
||||
i16x16::from_cast(vecmask).write_to_slice_unaligned(&mut tmp[0..16]);
|
||||
|
||||
let vecmask = i32x16::splat(((mask >> 16) & 0xFFFF) as i32);
|
||||
let vecmask = (vecidx & vecmask).eq(vecidx);
|
||||
|
||||
i16x16::from_cast(vecmask)
|
||||
.write_to_slice_unaligned(&mut tmp[16..32]);
|
||||
i16x16::from_cast(vecmask).write_to_slice_unaligned(&mut tmp[16..32]);
|
||||
|
||||
unsafe { std::mem::transmute(i16x32::from_slice_unaligned(tmp)) }
|
||||
}
|
||||
@@ -218,33 +216,29 @@ macro_rules! make_numeric_type {
|
||||
let tmp = &mut [0_i8; 64];
|
||||
|
||||
let vecidx = i32x16::new(
|
||||
1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
|
||||
8192, 16384, 32768,
|
||||
1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384,
|
||||
32768,
|
||||
);
|
||||
|
||||
let vecmask = i32x16::splat((mask & 0xFFFF) as i32);
|
||||
let vecmask = (vecidx & vecmask).eq(vecidx);
|
||||
|
||||
i8x16::from_cast(vecmask)
|
||||
.write_to_slice_unaligned(&mut tmp[0..16]);
|
||||
i8x16::from_cast(vecmask).write_to_slice_unaligned(&mut tmp[0..16]);
|
||||
|
||||
let vecmask = i32x16::splat(((mask >> 16) & 0xFFFF) as i32);
|
||||
let vecmask = (vecidx & vecmask).eq(vecidx);
|
||||
|
||||
i8x16::from_cast(vecmask)
|
||||
.write_to_slice_unaligned(&mut tmp[16..32]);
|
||||
i8x16::from_cast(vecmask).write_to_slice_unaligned(&mut tmp[16..32]);
|
||||
|
||||
let vecmask = i32x16::splat(((mask >> 32) & 0xFFFF) as i32);
|
||||
let vecmask = (vecidx & vecmask).eq(vecidx);
|
||||
|
||||
i8x16::from_cast(vecmask)
|
||||
.write_to_slice_unaligned(&mut tmp[32..48]);
|
||||
i8x16::from_cast(vecmask).write_to_slice_unaligned(&mut tmp[32..48]);
|
||||
|
||||
let vecmask = i32x16::splat(((mask >> 48) & 0xFFFF) as i32);
|
||||
let vecmask = (vecidx & vecmask).eq(vecidx);
|
||||
|
||||
i8x16::from_cast(vecmask)
|
||||
.write_to_slice_unaligned(&mut tmp[48..64]);
|
||||
i8x16::from_cast(vecmask).write_to_slice_unaligned(&mut tmp[48..64]);
|
||||
|
||||
unsafe { std::mem::transmute(i8x64::from_slice_unaligned(tmp)) }
|
||||
}
|
||||
@@ -269,11 +263,7 @@ macro_rules! make_numeric_type {
|
||||
|
||||
/// Selects elements of `a` and `b` using `mask`
|
||||
#[inline]
|
||||
fn mask_select(
|
||||
mask: Self::SimdMask,
|
||||
a: Self::Simd,
|
||||
b: Self::Simd,
|
||||
) -> Self::Simd {
|
||||
fn mask_select(mask: Self::SimdMask, a: Self::Simd, b: Self::Simd) -> Self::Simd {
|
||||
mask.select(a, b)
|
||||
}
|
||||
|
||||
@@ -327,10 +317,7 @@ macro_rules! make_numeric_type {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn unary_op<F: Fn(Self::Simd) -> Self::Simd>(
|
||||
a: Self::Simd,
|
||||
op: F,
|
||||
) -> Self::Simd {
|
||||
fn unary_op<F: Fn(Self::Simd) -> Self::Simd>(a: Self::Simd, op: F) -> Self::Simd {
|
||||
op(a)
|
||||
}
|
||||
}
|
||||
@@ -581,8 +568,7 @@ mod tests {
|
||||
let mask = 0b1101;
|
||||
let actual = IntervalMonthDayNanoType::mask_from_u64(mask);
|
||||
let expected = expected_mask!(i128, mask);
|
||||
let expected =
|
||||
m128x4::from_cast(i128x4::from_slice_unaligned(expected.as_slice()));
|
||||
let expected = m128x4::from_cast(i128x4::from_slice_unaligned(expected.as_slice()));
|
||||
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
@@ -612,8 +598,7 @@ mod tests {
|
||||
let mask = 0b10101010_10101010;
|
||||
let actual = Float32Type::mask_from_u64(mask);
|
||||
let expected = expected_mask!(i32, mask);
|
||||
let expected =
|
||||
m32x16::from_cast(i32x16::from_slice_unaligned(expected.as_slice()));
|
||||
let expected = m32x16::from_cast(i32x16::from_slice_unaligned(expected.as_slice()));
|
||||
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
@@ -623,8 +608,7 @@ mod tests {
|
||||
let mask = 0b01010101_01010101;
|
||||
let actual = Int32Type::mask_from_u64(mask);
|
||||
let expected = expected_mask!(i32, mask);
|
||||
let expected =
|
||||
m32x16::from_cast(i32x16::from_slice_unaligned(expected.as_slice()));
|
||||
let expected = m32x16::from_cast(i32x16::from_slice_unaligned(expected.as_slice()));
|
||||
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
@@ -635,16 +619,14 @@ mod tests {
|
||||
let actual = UInt16Type::mask_from_u64(mask);
|
||||
let expected = expected_mask!(i16, mask);
|
||||
dbg!(&expected);
|
||||
let expected =
|
||||
m16x32::from_cast(i16x32::from_slice_unaligned(expected.as_slice()));
|
||||
let expected = m16x32::from_cast(i16x32::from_slice_unaligned(expected.as_slice()));
|
||||
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mask_i8() {
|
||||
let mask =
|
||||
0b01010101_01010101_10101010_10101010_01010101_01010101_10101010_10101010;
|
||||
let mask = 0b01010101_01010101_10101010_10101010_01010101_01010101_10101010_10101010;
|
||||
let actual = Int8Type::mask_from_u64(mask);
|
||||
let expected = expected_mask!(i8, mask);
|
||||
let expected = m8x64::from_cast(i8x64::from_slice_unaligned(expected.as_slice()));
|
||||
|
||||
@@ -107,10 +107,7 @@ impl RecordBatch {
|
||||
/// vec![Arc::new(id_array)]
|
||||
/// ).unwrap();
|
||||
/// ```
|
||||
pub fn try_new(
|
||||
schema: SchemaRef,
|
||||
columns: Vec<ArrayRef>,
|
||||
) -> Result<Self, ArrowError> {
|
||||
pub fn try_new(schema: SchemaRef, columns: Vec<ArrayRef>) -> Result<Self, ArrowError> {
|
||||
let options = RecordBatchOptions::new();
|
||||
Self::try_new_impl(schema, columns, &options)
|
||||
}
|
||||
@@ -179,9 +176,7 @@ impl RecordBatch {
|
||||
// check that all columns have the same row count
|
||||
if columns.iter().any(|c| c.len() != row_count) {
|
||||
let err = match options.row_count {
|
||||
Some(_) => {
|
||||
"all columns in a record batch must have the specified row count"
|
||||
}
|
||||
Some(_) => "all columns in a record batch must have the specified row count",
|
||||
None => "all columns in a record batch must have the same length",
|
||||
};
|
||||
return Err(ArrowError::InvalidArgumentError(err.to_string()));
|
||||
@@ -190,9 +185,7 @@ impl RecordBatch {
|
||||
// function for comparing column type and field type
|
||||
// return true if 2 types are not matched
|
||||
let type_not_match = if options.match_field_names {
|
||||
|(_, (col_type, field_type)): &(usize, (&DataType, &DataType))| {
|
||||
col_type != field_type
|
||||
}
|
||||
|(_, (col_type, field_type)): &(usize, (&DataType, &DataType))| col_type != field_type
|
||||
} else {
|
||||
|(_, (col_type, field_type)): &(usize, (&DataType, &DataType))| {
|
||||
!col_type.equals_datatype(field_type)
|
||||
@@ -484,7 +477,11 @@ impl From<StructArray> for RecordBatch {
|
||||
fn from(value: StructArray) -> Self {
|
||||
let row_count = value.len();
|
||||
let (fields, columns, nulls) = value.into_parts();
|
||||
assert_eq!(nulls.map(|n| n.null_count()).unwrap_or_default(), 0, "Cannot convert nullable StructArray to RecordBatch, see StructArray documentation");
|
||||
assert_eq!(
|
||||
nulls.map(|n| n.null_count()).unwrap_or_default(),
|
||||
0,
|
||||
"Cannot convert nullable StructArray to RecordBatch, see StructArray documentation"
|
||||
);
|
||||
|
||||
RecordBatch {
|
||||
schema: Arc::new(Schema::new(fields)),
|
||||
@@ -588,9 +585,7 @@ where
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::{
|
||||
BooleanArray, Int32Array, Int64Array, Int8Array, ListArray, StringArray,
|
||||
};
|
||||
use crate::{BooleanArray, Int32Array, Int64Array, Int8Array, ListArray, StringArray};
|
||||
use arrow_buffer::{Buffer, ToByteSlice};
|
||||
use arrow_data::{ArrayData, ArrayDataBuilder};
|
||||
use arrow_schema::Fields;
|
||||
@@ -606,8 +601,7 @@ mod tests {
|
||||
let b = StringArray::from(vec!["a", "b", "c", "d", "e"]);
|
||||
|
||||
let record_batch =
|
||||
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])
|
||||
.unwrap();
|
||||
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]).unwrap();
|
||||
check_batch(record_batch, 5)
|
||||
}
|
||||
|
||||
@@ -622,8 +616,7 @@ mod tests {
|
||||
let b = StringArray::from(vec!["a", "b", "c", "d", "e"]);
|
||||
|
||||
let record_batch =
|
||||
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])
|
||||
.unwrap();
|
||||
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]).unwrap();
|
||||
assert_eq!(record_batch.get_array_memory_size(), 364);
|
||||
}
|
||||
|
||||
@@ -649,8 +642,7 @@ mod tests {
|
||||
let b = StringArray::from(vec!["a", "b", "c", "d", "e", "f", "h", "i"]);
|
||||
|
||||
let record_batch =
|
||||
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])
|
||||
.unwrap();
|
||||
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]).unwrap();
|
||||
|
||||
let offset = 2;
|
||||
let length = 5;
|
||||
@@ -699,8 +691,8 @@ mod tests {
|
||||
]));
|
||||
let b: ArrayRef = Arc::new(StringArray::from(vec!["a", "b", "c", "d", "e"]));
|
||||
|
||||
let record_batch = RecordBatch::try_from_iter(vec![("a", a), ("b", b)])
|
||||
.expect("valid conversion");
|
||||
let record_batch =
|
||||
RecordBatch::try_from_iter(vec![("a", a), ("b", b)]).expect("valid conversion");
|
||||
|
||||
let expected_schema = Schema::new(vec![
|
||||
Field::new("a", DataType::Int32, true),
|
||||
@@ -716,11 +708,9 @@ mod tests {
|
||||
let b: ArrayRef = Arc::new(StringArray::from(vec!["a", "b", "c", "d", "e"]));
|
||||
|
||||
// Note there are no nulls in a or b, but we specify that b is nullable
|
||||
let record_batch = RecordBatch::try_from_iter_with_nullable(vec![
|
||||
("a", a, false),
|
||||
("b", b, true),
|
||||
])
|
||||
.expect("valid conversion");
|
||||
let record_batch =
|
||||
RecordBatch::try_from_iter_with_nullable(vec![("a", a, false), ("b", b, true)])
|
||||
.expect("valid conversion");
|
||||
|
||||
let expected_schema = Schema::new(vec![
|
||||
Field::new("a", DataType::Int32, false),
|
||||
@@ -792,8 +782,7 @@ mod tests {
|
||||
let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
|
||||
let b = Int32Array::from(vec![1, 2, 3, 4, 5]);
|
||||
|
||||
let batch =
|
||||
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]);
|
||||
let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]);
|
||||
assert!(batch.is_err());
|
||||
}
|
||||
|
||||
@@ -863,11 +852,8 @@ mod tests {
|
||||
Field::new("id", DataType::Int32, false),
|
||||
Field::new("val", DataType::Int32, false),
|
||||
]);
|
||||
let record_batch = RecordBatch::try_new(
|
||||
Arc::new(schema1),
|
||||
vec![id_arr.clone(), val_arr.clone()],
|
||||
)
|
||||
.unwrap();
|
||||
let record_batch =
|
||||
RecordBatch::try_new(Arc::new(schema1), vec![id_arr.clone(), val_arr.clone()]).unwrap();
|
||||
|
||||
assert_eq!(record_batch["id"].as_ref(), id_arr.as_ref());
|
||||
assert_eq!(record_batch["val"].as_ref(), val_arr.as_ref());
|
||||
@@ -1005,15 +991,12 @@ mod tests {
|
||||
let b: ArrayRef = Arc::new(StringArray::from(vec!["a", "b", "c"]));
|
||||
let c: ArrayRef = Arc::new(StringArray::from(vec!["d", "e", "f"]));
|
||||
|
||||
let record_batch = RecordBatch::try_from_iter(vec![
|
||||
("a", a.clone()),
|
||||
("b", b.clone()),
|
||||
("c", c.clone()),
|
||||
])
|
||||
.expect("valid conversion");
|
||||
let record_batch =
|
||||
RecordBatch::try_from_iter(vec![("a", a.clone()), ("b", b.clone()), ("c", c.clone())])
|
||||
.expect("valid conversion");
|
||||
|
||||
let expected = RecordBatch::try_from_iter(vec![("a", a), ("c", c)])
|
||||
.expect("valid conversion");
|
||||
let expected =
|
||||
RecordBatch::try_from_iter(vec![("a", a), ("c", c)]).expect("valid conversion");
|
||||
|
||||
assert_eq!(expected, record_batch.project(&[0, 2]).unwrap());
|
||||
}
|
||||
@@ -1049,8 +1032,7 @@ mod tests {
|
||||
|
||||
let options = RecordBatchOptions::new().with_row_count(Some(10));
|
||||
|
||||
let ok =
|
||||
RecordBatch::try_new_with_options(schema.clone(), vec![], &options).unwrap();
|
||||
let ok = RecordBatch::try_new_with_options(schema.clone(), vec![], &options).unwrap();
|
||||
assert_eq!(ok.num_rows(), 10);
|
||||
|
||||
let a = ok.slice(2, 5);
|
||||
|
||||
@@ -86,8 +86,7 @@ where
|
||||
// If current logical index is greater than current run end index then increment
|
||||
// the physical index.
|
||||
let run_ends = self.array.run_ends().values();
|
||||
if self.current_front_logical >= run_ends[self.current_front_physical].as_usize()
|
||||
{
|
||||
if self.current_front_logical >= run_ends[self.current_front_physical].as_usize() {
|
||||
// As the run_ends is expected to be strictly increasing, there
|
||||
// should be at least one logical entry in one physical entry. Because of this
|
||||
// reason the next value can be accessed by incrementing physical index once.
|
||||
@@ -136,8 +135,7 @@ where
|
||||
|
||||
let run_ends = self.array.run_ends().values();
|
||||
if self.current_back_physical > 0
|
||||
&& self.current_back_logical
|
||||
< run_ends[self.current_back_physical - 1].as_usize()
|
||||
&& self.current_back_logical < run_ends[self.current_back_physical - 1].as_usize()
|
||||
{
|
||||
// As the run_ends is expected to be strictly increasing, there
|
||||
// should be at least one logical entry in one physical entry. Because of this
|
||||
@@ -211,8 +209,7 @@ mod tests {
|
||||
seed.shuffle(&mut rng);
|
||||
}
|
||||
// repeat the items between 1 and 8 times. Cap the length for smaller sized arrays
|
||||
let num =
|
||||
max_run_length.min(rand::thread_rng().gen_range(1..=max_run_length));
|
||||
let num = max_run_length.min(rand::thread_rng().gen_range(1..=max_run_length));
|
||||
for _ in 0..num {
|
||||
result.push(seed[ix]);
|
||||
}
|
||||
@@ -285,8 +282,7 @@ mod tests {
|
||||
for logical_len in logical_lengths {
|
||||
let input_array = build_input_array(logical_len);
|
||||
|
||||
let mut run_array_builder =
|
||||
PrimitiveRunBuilder::<Int32Type, Int32Type>::new();
|
||||
let mut run_array_builder = PrimitiveRunBuilder::<Int32Type, Int32Type>::new();
|
||||
run_array_builder.extend(input_array.iter().copied());
|
||||
let run_array = run_array_builder.finish();
|
||||
let typed_array = run_array.downcast::<Int32Array>().unwrap();
|
||||
@@ -327,8 +323,7 @@ mod tests {
|
||||
})
|
||||
.collect();
|
||||
|
||||
let result_asref: Vec<Option<&str>> =
|
||||
result.iter().map(|f| f.as_deref()).collect();
|
||||
let result_asref: Vec<Option<&str>> = result.iter().map(|f| f.as_deref()).collect();
|
||||
|
||||
let expected_vec = vec![
|
||||
Some("abb"),
|
||||
@@ -364,8 +359,7 @@ mod tests {
|
||||
|
||||
// Iterate on sliced typed run array
|
||||
let actual: Vec<Option<i32>> = sliced_typed_run_array.into_iter().collect();
|
||||
let expected: Vec<Option<i32>> =
|
||||
input_array.iter().take(slice_len).copied().collect();
|
||||
let expected: Vec<Option<i32>> = input_array.iter().take(slice_len).copied().collect();
|
||||
assert_eq!(expected, actual);
|
||||
|
||||
// test for offset = total_len - slice_len, length = slice_len
|
||||
|
||||
@@ -20,9 +20,7 @@
|
||||
use crate::timezone::Tz;
|
||||
use crate::ArrowPrimitiveType;
|
||||
use arrow_schema::{DataType, TimeUnit};
|
||||
use chrono::{
|
||||
DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Timelike, Utc,
|
||||
};
|
||||
use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Timelike, Utc};
|
||||
|
||||
/// Number of seconds in a day
|
||||
pub const SECONDS_IN_DAY: i64 = 86_400;
|
||||
@@ -221,10 +219,7 @@ pub fn as_datetime<T: ArrowPrimitiveType>(v: i64) -> Option<NaiveDateTime> {
|
||||
}
|
||||
|
||||
/// Converts an [`ArrowPrimitiveType`] to [`DateTime<Tz>`]
|
||||
pub fn as_datetime_with_timezone<T: ArrowPrimitiveType>(
|
||||
v: i64,
|
||||
tz: Tz,
|
||||
) -> Option<DateTime<Tz>> {
|
||||
pub fn as_datetime_with_timezone<T: ArrowPrimitiveType>(v: i64, tz: Tz) -> Option<DateTime<Tz>> {
|
||||
let naive = as_datetime::<T>(v)?;
|
||||
Some(Utc.from_utc_datetime(&naive).with_timezone(&tz))
|
||||
}
|
||||
@@ -274,8 +269,8 @@ pub fn as_duration<T: ArrowPrimitiveType>(v: i64) -> Option<Duration> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::temporal_conversions::{
|
||||
date64_to_datetime, split_second, timestamp_ms_to_datetime,
|
||||
timestamp_ns_to_datetime, timestamp_us_to_datetime, NANOSECONDS,
|
||||
date64_to_datetime, split_second, timestamp_ms_to_datetime, timestamp_ns_to_datetime,
|
||||
timestamp_us_to_datetime, NANOSECONDS,
|
||||
};
|
||||
use chrono::NaiveDateTime;
|
||||
|
||||
|
||||
@@ -38,8 +38,8 @@ fn parse_fixed_offset(tz: &str) -> Option<FixedOffset> {
|
||||
if values.iter().any(|x| *x > 9) {
|
||||
return None;
|
||||
}
|
||||
let secs = (values[0] * 10 + values[1]) as i32 * 60 * 60
|
||||
+ (values[2] * 10 + values[3]) as i32 * 60;
|
||||
let secs =
|
||||
(values[0] * 10 + values[1]) as i32 * 60 * 60 + (values[2] * 10 + values[3]) as i32 * 60;
|
||||
|
||||
match bytes[0] {
|
||||
b'+' => FixedOffset::east_opt(secs),
|
||||
@@ -122,10 +122,7 @@ mod private {
|
||||
})
|
||||
}
|
||||
|
||||
fn offset_from_local_datetime(
|
||||
&self,
|
||||
local: &NaiveDateTime,
|
||||
) -> LocalResult<Self::Offset> {
|
||||
fn offset_from_local_datetime(&self, local: &NaiveDateTime) -> LocalResult<Self::Offset> {
|
||||
tz!(self, tz, {
|
||||
tz.offset_from_local_datetime(local).map(|x| TzOffset {
|
||||
tz: *self,
|
||||
@@ -285,10 +282,7 @@ mod private {
|
||||
self.0.offset_from_local_date(local).map(TzOffset)
|
||||
}
|
||||
|
||||
fn offset_from_local_datetime(
|
||||
&self,
|
||||
local: &NaiveDateTime,
|
||||
) -> LocalResult<Self::Offset> {
|
||||
fn offset_from_local_datetime(&self, local: &NaiveDateTime) -> LocalResult<Self::Offset> {
|
||||
self.0.offset_from_local_datetime(local).map(TzOffset)
|
||||
}
|
||||
|
||||
|
||||
@@ -18,8 +18,7 @@
|
||||
//! Zero-sized types used to parameterize generic array implementations
|
||||
|
||||
use crate::delta::{
|
||||
add_days_datetime, add_months_datetime, shift_months, sub_days_datetime,
|
||||
sub_months_datetime,
|
||||
add_days_datetime, add_months_datetime, shift_months, sub_days_datetime, sub_months_datetime,
|
||||
};
|
||||
use crate::temporal_conversions::as_datetime_with_timezone;
|
||||
use crate::timezone::Tz;
|
||||
@@ -27,9 +26,8 @@ use crate::{ArrowNativeTypeOp, OffsetSizeTrait};
|
||||
use arrow_buffer::{i256, Buffer, OffsetBuffer};
|
||||
use arrow_data::decimal::{validate_decimal256_precision, validate_decimal_precision};
|
||||
use arrow_schema::{
|
||||
ArrowError, DataType, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION,
|
||||
DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
|
||||
DECIMAL_DEFAULT_SCALE,
|
||||
ArrowError, DataType, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
|
||||
DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DECIMAL_DEFAULT_SCALE,
|
||||
};
|
||||
use chrono::{Duration, NaiveDate, NaiveDateTime};
|
||||
use half::f16;
|
||||
@@ -875,9 +873,7 @@ impl IntervalDayTimeType {
|
||||
///
|
||||
/// * `i` - The IntervalDayTimeType to convert
|
||||
#[inline]
|
||||
pub fn to_parts(
|
||||
i: <IntervalDayTimeType as ArrowPrimitiveType>::Native,
|
||||
) -> (i32, i32) {
|
||||
pub fn to_parts(i: <IntervalDayTimeType as ArrowPrimitiveType>::Native) -> (i32, i32) {
|
||||
let days = (i >> 32) as i32;
|
||||
let ms = i as i32;
|
||||
(days, ms)
|
||||
@@ -1221,10 +1217,7 @@ pub trait DecimalType:
|
||||
fn format_decimal(value: Self::Native, precision: u8, scale: i8) -> String;
|
||||
|
||||
/// Validates that `value` contains no more than `precision` decimal digits
|
||||
fn validate_decimal_precision(
|
||||
value: Self::Native,
|
||||
precision: u8,
|
||||
) -> Result<(), ArrowError>;
|
||||
fn validate_decimal_precision(value: Self::Native, precision: u8) -> Result<(), ArrowError>;
|
||||
}
|
||||
|
||||
/// Validate that `precision` and `scale` are valid for `T`
|
||||
@@ -1400,10 +1393,7 @@ pub trait ByteArrayType: 'static + Send + Sync + bytes::ByteArrayTypeSealed {
|
||||
const DATA_TYPE: DataType;
|
||||
|
||||
/// Verifies that every consecutive pair of `offsets` denotes a valid slice of `values`
|
||||
fn validate(
|
||||
offsets: &OffsetBuffer<Self::Offset>,
|
||||
values: &Buffer,
|
||||
) -> Result<(), ArrowError>;
|
||||
fn validate(offsets: &OffsetBuffer<Self::Offset>, values: &Buffer) -> Result<(), ArrowError>;
|
||||
}
|
||||
|
||||
/// [`ByteArrayType`] for string arrays
|
||||
@@ -1422,10 +1412,7 @@ impl<O: OffsetSizeTrait> ByteArrayType for GenericStringType<O> {
|
||||
DataType::Utf8
|
||||
};
|
||||
|
||||
fn validate(
|
||||
offsets: &OffsetBuffer<Self::Offset>,
|
||||
values: &Buffer,
|
||||
) -> Result<(), ArrowError> {
|
||||
fn validate(offsets: &OffsetBuffer<Self::Offset>, values: &Buffer) -> Result<(), ArrowError> {
|
||||
// Verify that the slice as a whole is valid UTF-8
|
||||
let validated = std::str::from_utf8(values).map_err(|e| {
|
||||
ArrowError::InvalidArgumentError(format!("Encountered non UTF-8 data: {e}"))
|
||||
@@ -1471,10 +1458,7 @@ impl<O: OffsetSizeTrait> ByteArrayType for GenericBinaryType<O> {
|
||||
DataType::Binary
|
||||
};
|
||||
|
||||
fn validate(
|
||||
offsets: &OffsetBuffer<Self::Offset>,
|
||||
values: &Buffer,
|
||||
) -> Result<(), ArrowError> {
|
||||
fn validate(offsets: &OffsetBuffer<Self::Offset>, values: &Buffer) -> Result<(), ArrowError> {
|
||||
// offsets are guaranteed to be monotonically increasing and non-empty
|
||||
let max_offset = offsets.last().unwrap().as_usize();
|
||||
if values.len() < max_offset {
|
||||
|
||||
@@ -133,9 +133,7 @@ impl HeaderDecoder {
|
||||
let remaining = &MAGIC[MAGIC.len() - self.bytes_remaining..];
|
||||
let to_decode = buf.len().min(remaining.len());
|
||||
if !buf.starts_with(&remaining[..to_decode]) {
|
||||
return Err(ArrowError::ParseError(
|
||||
"Incorrect avro magic".to_string(),
|
||||
));
|
||||
return Err(ArrowError::ParseError("Incorrect avro magic".to_string()));
|
||||
}
|
||||
self.bytes_remaining -= to_decode;
|
||||
buf = &buf[to_decode..];
|
||||
|
||||
@@ -50,9 +50,7 @@ fn read_header<R: BufRead>(mut reader: R) -> Result<Header, ArrowError> {
|
||||
}
|
||||
|
||||
/// Return an iterator of [`Block`] from the provided [`BufRead`]
|
||||
fn read_blocks<R: BufRead>(
|
||||
mut reader: R,
|
||||
) -> impl Iterator<Item = Result<Block, ArrowError>> {
|
||||
fn read_blocks<R: BufRead>(mut reader: R) -> impl Iterator<Item = Result<Block, ArrowError>> {
|
||||
let mut decoder = BlockDecoder::default();
|
||||
|
||||
let mut try_next = move || {
|
||||
|
||||
@@ -335,9 +335,7 @@ mod tests {
|
||||
Field {
|
||||
name: "value",
|
||||
doc: None,
|
||||
r#type: Schema::TypeName(TypeName::Primitive(
|
||||
PrimitiveType::Long
|
||||
)),
|
||||
r#type: Schema::TypeName(TypeName::Primitive(PrimitiveType::Long)),
|
||||
default: None,
|
||||
},
|
||||
Field {
|
||||
|
||||
@@ -26,10 +26,7 @@
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if divisor is zero
|
||||
pub fn div_rem<const N: usize>(
|
||||
numerator: &[u64; N],
|
||||
divisor: &[u64; N],
|
||||
) -> ([u64; N], [u64; N]) {
|
||||
pub fn div_rem<const N: usize>(numerator: &[u64; N], divisor: &[u64; N]) -> ([u64; N], [u64; N]) {
|
||||
let numerator_bits = bits(numerator);
|
||||
let divisor_bits = bits(divisor);
|
||||
assert_ne!(divisor_bits, 0, "division by zero");
|
||||
@@ -61,10 +58,7 @@ fn bits(arr: &[u64]) -> usize {
|
||||
}
|
||||
|
||||
/// Division of numerator by a u64 divisor
|
||||
fn div_rem_small<const N: usize>(
|
||||
numerator: &[u64; N],
|
||||
divisor: u64,
|
||||
) -> ([u64; N], [u64; N]) {
|
||||
fn div_rem_small<const N: usize>(numerator: &[u64; N], divisor: u64) -> ([u64; N], [u64; N]) {
|
||||
let mut rem = 0u64;
|
||||
let mut numerator = *numerator;
|
||||
numerator.iter_mut().rev().for_each(|d| {
|
||||
@@ -227,11 +221,7 @@ fn sub_assign(a: &mut [u64], b: &[u64]) -> bool {
|
||||
}
|
||||
|
||||
/// Converts an overflowing binary operation on scalars to one on slices
|
||||
fn binop_slice(
|
||||
a: &mut [u64],
|
||||
b: &[u64],
|
||||
binop: impl Fn(u64, u64) -> (u64, bool) + Copy,
|
||||
) -> bool {
|
||||
fn binop_slice(a: &mut [u64], b: &[u64], binop: impl Fn(u64, u64) -> (u64, bool) + Copy) -> bool {
|
||||
let mut c = false;
|
||||
a.iter_mut().zip(b.iter()).for_each(|(x, y)| {
|
||||
let (res1, overflow1) = y.overflowing_add(u64::from(c));
|
||||
|
||||
@@ -310,9 +310,7 @@ impl i256 {
|
||||
(Self::from_le_bytes(bytes), false)
|
||||
}
|
||||
Ordering::Equal => (Self::from_le_bytes(v_bytes.try_into().unwrap()), false),
|
||||
Ordering::Greater => {
|
||||
(Self::from_le_bytes(v_bytes[..32].try_into().unwrap()), true)
|
||||
}
|
||||
Ordering::Greater => (Self::from_le_bytes(v_bytes[..32].try_into().unwrap()), true),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -357,8 +355,7 @@ impl i256 {
|
||||
#[inline]
|
||||
pub fn checked_add(self, other: Self) -> Option<Self> {
|
||||
let r = self.wrapping_add(other);
|
||||
((other.is_negative() && r < self) || (!other.is_negative() && r >= self))
|
||||
.then_some(r)
|
||||
((other.is_negative() && r < self) || (!other.is_negative() && r >= self)).then_some(r)
|
||||
}
|
||||
|
||||
/// Performs wrapping subtraction
|
||||
@@ -373,8 +370,7 @@ impl i256 {
|
||||
#[inline]
|
||||
pub fn checked_sub(self, other: Self) -> Option<Self> {
|
||||
let r = self.wrapping_sub(other);
|
||||
((other.is_negative() && r > self) || (!other.is_negative() && r <= self))
|
||||
.then_some(r)
|
||||
((other.is_negative() && r > self) || (!other.is_negative() && r <= self)).then_some(r)
|
||||
}
|
||||
|
||||
/// Performs wrapping multiplication
|
||||
@@ -591,9 +587,7 @@ impl i256 {
|
||||
|
||||
/// Temporary workaround due to lack of stable const array slicing
|
||||
/// See <https://github.com/rust-lang/rust/issues/90091>
|
||||
const fn split_array<const N: usize, const M: usize>(
|
||||
vals: [u8; N],
|
||||
) -> ([u8; M], [u8; M]) {
|
||||
const fn split_array<const N: usize, const M: usize>(vals: [u8; N]) -> ([u8; M], [u8; M]) {
|
||||
let mut a = [0; M];
|
||||
let mut b = [0; M];
|
||||
let mut i = 0;
|
||||
@@ -915,8 +909,7 @@ mod tests {
|
||||
|
||||
// Addition
|
||||
let actual = il.wrapping_add(ir);
|
||||
let (expected, overflow) =
|
||||
i256::from_bigint_with_overflow(bl.clone() + br.clone());
|
||||
let (expected, overflow) = i256::from_bigint_with_overflow(bl.clone() + br.clone());
|
||||
assert_eq!(actual, expected);
|
||||
|
||||
let checked = il.checked_add(ir);
|
||||
@@ -927,8 +920,7 @@ mod tests {
|
||||
|
||||
// Subtraction
|
||||
let actual = il.wrapping_sub(ir);
|
||||
let (expected, overflow) =
|
||||
i256::from_bigint_with_overflow(bl.clone() - br.clone());
|
||||
let (expected, overflow) = i256::from_bigint_with_overflow(bl.clone() - br.clone());
|
||||
assert_eq!(actual.to_string(), expected.to_string());
|
||||
|
||||
let checked = il.checked_sub(ir);
|
||||
@@ -939,8 +931,7 @@ mod tests {
|
||||
|
||||
// Multiplication
|
||||
let actual = il.wrapping_mul(ir);
|
||||
let (expected, overflow) =
|
||||
i256::from_bigint_with_overflow(bl.clone() * br.clone());
|
||||
let (expected, overflow) = i256::from_bigint_with_overflow(bl.clone() * br.clone());
|
||||
assert_eq!(actual.to_string(), expected.to_string());
|
||||
|
||||
let checked = il.checked_mul(ir);
|
||||
@@ -996,8 +987,7 @@ mod tests {
|
||||
// Exponentiation
|
||||
for exp in vec![0, 1, 2, 3, 8, 100].into_iter() {
|
||||
let actual = il.wrapping_pow(exp);
|
||||
let (expected, overflow) =
|
||||
i256::from_bigint_with_overflow(bl.clone().pow(exp));
|
||||
let (expected, overflow) = i256::from_bigint_with_overflow(bl.clone().pow(exp));
|
||||
assert_eq!(actual.to_string(), expected.to_string());
|
||||
|
||||
let checked = il.checked_pow(exp);
|
||||
@@ -1212,7 +1202,10 @@ mod tests {
|
||||
("000000000000000000000000000000000000000", Some(i256::ZERO)),
|
||||
("0000000000000000000000000000000000000000-11", None),
|
||||
("11-1111111111111111111111111111111111111", None),
|
||||
("115792089237316195423570985008687907853269984665640564039457584007913129639936", None)
|
||||
(
|
||||
"115792089237316195423570985008687907853269984665640564039457584007913129639936",
|
||||
None,
|
||||
),
|
||||
];
|
||||
for (case, expected) in cases {
|
||||
assert_eq!(i256::from_string(case), expected)
|
||||
|
||||
@@ -223,13 +223,7 @@ impl BitAnd<&BooleanBuffer> for &BooleanBuffer {
|
||||
fn bitand(self, rhs: &BooleanBuffer) -> Self::Output {
|
||||
assert_eq!(self.len, rhs.len);
|
||||
BooleanBuffer {
|
||||
buffer: buffer_bin_and(
|
||||
&self.buffer,
|
||||
self.offset,
|
||||
&rhs.buffer,
|
||||
rhs.offset,
|
||||
self.len,
|
||||
),
|
||||
buffer: buffer_bin_and(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
|
||||
offset: 0,
|
||||
len: self.len,
|
||||
}
|
||||
@@ -242,13 +236,7 @@ impl BitOr<&BooleanBuffer> for &BooleanBuffer {
|
||||
fn bitor(self, rhs: &BooleanBuffer) -> Self::Output {
|
||||
assert_eq!(self.len, rhs.len);
|
||||
BooleanBuffer {
|
||||
buffer: buffer_bin_or(
|
||||
&self.buffer,
|
||||
self.offset,
|
||||
&rhs.buffer,
|
||||
rhs.offset,
|
||||
self.len,
|
||||
),
|
||||
buffer: buffer_bin_or(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
|
||||
offset: 0,
|
||||
len: self.len,
|
||||
}
|
||||
@@ -261,13 +249,7 @@ impl BitXor<&BooleanBuffer> for &BooleanBuffer {
|
||||
fn bitxor(self, rhs: &BooleanBuffer) -> Self::Output {
|
||||
assert_eq!(self.len, rhs.len);
|
||||
BooleanBuffer {
|
||||
buffer: buffer_bin_xor(
|
||||
&self.buffer,
|
||||
self.offset,
|
||||
&rhs.buffer,
|
||||
rhs.offset,
|
||||
self.len,
|
||||
),
|
||||
buffer: buffer_bin_xor(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
|
||||
offset: 0,
|
||||
len: self.len,
|
||||
}
|
||||
@@ -428,8 +410,7 @@ mod tests {
|
||||
let buf = Buffer::from(&[0, 1, 1, 0, 0]);
|
||||
let boolean_buf = &BooleanBuffer::new(buf, offset, len);
|
||||
|
||||
let expected =
|
||||
BooleanBuffer::new(Buffer::from(&[255, 254, 254, 255, 255]), offset, len);
|
||||
let expected = BooleanBuffer::new(Buffer::from(&[255, 254, 254, 255, 255]), offset, len);
|
||||
assert_eq!(!boolean_buf, expected);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -523,9 +523,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(
|
||||
expected = "the offset of the new Buffer cannot exceed the existing length"
|
||||
)]
|
||||
#[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
|
||||
fn test_slice_offset_out_of_bound() {
|
||||
let buf = Buffer::from(&[2, 4, 6, 8, 10]);
|
||||
buf.slice(6);
|
||||
@@ -688,9 +686,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(
|
||||
expected = "the offset of the new Buffer cannot exceed the existing length"
|
||||
)]
|
||||
#[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
|
||||
fn slice_overflow() {
|
||||
let buffer = Buffer::from(MutableBuffer::from_len_zeroed(12));
|
||||
buffer.slice_with_length(2, usize::MAX);
|
||||
|
||||
@@ -334,9 +334,7 @@ impl MutableBuffer {
|
||||
|
||||
#[inline]
|
||||
pub(super) fn into_buffer(self) -> Buffer {
|
||||
let bytes = unsafe {
|
||||
Bytes::new(self.data, self.len, Deallocation::Standard(self.layout))
|
||||
};
|
||||
let bytes = unsafe { Bytes::new(self.data, self.len, Deallocation::Standard(self.layout)) };
|
||||
std::mem::forget(self);
|
||||
Buffer::from_bytes(bytes)
|
||||
}
|
||||
@@ -351,8 +349,7 @@ impl MutableBuffer {
|
||||
// SAFETY
|
||||
// ArrowNativeType is trivially transmutable, is sealed to prevent potentially incorrect
|
||||
// implementation outside this crate, and this method checks alignment
|
||||
let (prefix, offsets, suffix) =
|
||||
unsafe { self.as_slice_mut().align_to_mut::<T>() };
|
||||
let (prefix, offsets, suffix) = unsafe { self.as_slice_mut().align_to_mut::<T>() };
|
||||
assert!(prefix.is_empty() && suffix.is_empty());
|
||||
offsets
|
||||
}
|
||||
@@ -604,9 +601,7 @@ impl MutableBuffer {
|
||||
// we can't specialize `extend` for `TrustedLen` like `Vec` does.
|
||||
// 2. `from_trusted_len_iter_bool` is faster.
|
||||
#[inline]
|
||||
pub unsafe fn from_trusted_len_iter_bool<I: Iterator<Item = bool>>(
|
||||
mut iterator: I,
|
||||
) -> Self {
|
||||
pub unsafe fn from_trusted_len_iter_bool<I: Iterator<Item = bool>>(mut iterator: I) -> Self {
|
||||
let (_, upper) = iterator.size_hint();
|
||||
let len = upper.expect("from_trusted_len_iter requires an upper limit");
|
||||
|
||||
|
||||
@@ -71,10 +71,7 @@ impl NullBuffer {
|
||||
/// This is commonly used by binary operations where the result is NULL if either
|
||||
/// of the input values is NULL. Handling the null mask separately in this way
|
||||
/// can yield significant performance improvements over an iterator approach
|
||||
pub fn union(
|
||||
lhs: Option<&NullBuffer>,
|
||||
rhs: Option<&NullBuffer>,
|
||||
) -> Option<NullBuffer> {
|
||||
pub fn union(lhs: Option<&NullBuffer>, rhs: Option<&NullBuffer>) -> Option<NullBuffer> {
|
||||
match (lhs, rhs) {
|
||||
(Some(lhs), Some(rhs)) => Some(Self::new(lhs.inner() & rhs.inner())),
|
||||
(Some(n), None) | (None, Some(n)) => Some(n.clone()),
|
||||
|
||||
@@ -219,8 +219,7 @@ mod tests {
|
||||
assert_eq!(buffer.as_ref(), &[0, 2, 8, 11, 18, 20]);
|
||||
|
||||
let half_max = i32::MAX / 2;
|
||||
let buffer =
|
||||
OffsetBuffer::<i32>::from_lengths([half_max as usize, half_max as usize]);
|
||||
let buffer = OffsetBuffer::<i32>::from_lengths([half_max as usize, half_max as usize]);
|
||||
assert_eq!(buffer.as_ref(), &[0, half_max, half_max * 2]);
|
||||
}
|
||||
|
||||
|
||||
@@ -184,10 +184,6 @@ pub fn buffer_bin_xor(
|
||||
|
||||
/// Apply a bitwise not to one input and return the result as a Buffer.
|
||||
/// The input is treated as a bitmap, meaning that offset and length are specified in number of bits.
|
||||
pub fn buffer_unary_not(
|
||||
left: &Buffer,
|
||||
offset_in_bits: usize,
|
||||
len_in_bits: usize,
|
||||
) -> Buffer {
|
||||
pub fn buffer_unary_not(left: &Buffer, offset_in_bits: usize, len_in_bits: usize) -> Buffer {
|
||||
bitwise_unary_op_helper(left, offset_in_bits, len_in_bits, |a| !a)
|
||||
}
|
||||
|
||||
@@ -110,11 +110,7 @@ where
|
||||
///
|
||||
/// - `buffer` must contain strictly increasing values greater than zero
|
||||
/// - The last value of `buffer` must be greater than or equal to `offset + len`
|
||||
pub unsafe fn new_unchecked(
|
||||
run_ends: ScalarBuffer<E>,
|
||||
offset: usize,
|
||||
len: usize,
|
||||
) -> Self {
|
||||
pub unsafe fn new_unchecked(run_ends: ScalarBuffer<E>, offset: usize, len: usize) -> Self {
|
||||
Self {
|
||||
run_ends,
|
||||
offset,
|
||||
|
||||
@@ -221,9 +221,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(
|
||||
expected = "Memory pointer is not aligned with the specified scalar type"
|
||||
)]
|
||||
#[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
|
||||
fn test_unaligned() {
|
||||
let expected = [0_i32, 1, 2];
|
||||
let buffer = Buffer::from_iter(expected.iter().cloned());
|
||||
@@ -232,18 +230,14 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(
|
||||
expected = "the offset of the new Buffer cannot exceed the existing length"
|
||||
)]
|
||||
#[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
|
||||
fn test_length_out_of_bounds() {
|
||||
let buffer = Buffer::from_iter([0_i32, 1, 2]);
|
||||
ScalarBuffer::<i32>::new(buffer, 1, 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(
|
||||
expected = "the offset of the new Buffer cannot exceed the existing length"
|
||||
)]
|
||||
#[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
|
||||
fn test_offset_out_of_bounds() {
|
||||
let buffer = Buffer::from_iter([0_i32, 1, 2]);
|
||||
ScalarBuffer::<i32>::new(buffer, 4, 0);
|
||||
|
||||
@@ -154,14 +154,12 @@ impl BooleanBufferBuilder {
|
||||
|
||||
if cur_remainder != 0 {
|
||||
// Pad last byte with 1s
|
||||
*self.buffer.as_slice_mut().last_mut().unwrap() |=
|
||||
!((1 << cur_remainder) - 1)
|
||||
*self.buffer.as_slice_mut().last_mut().unwrap() |= !((1 << cur_remainder) - 1)
|
||||
}
|
||||
self.buffer.resize(new_len_bytes, 0xFF);
|
||||
if new_remainder != 0 {
|
||||
// Clear remaining bits
|
||||
*self.buffer.as_slice_mut().last_mut().unwrap() &=
|
||||
(1 << new_remainder) - 1
|
||||
*self.buffer.as_slice_mut().last_mut().unwrap() &= (1 << new_remainder) - 1
|
||||
}
|
||||
self.len = new_len;
|
||||
}
|
||||
|
||||
@@ -60,11 +60,7 @@ impl Bytes {
|
||||
/// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
|
||||
/// bytes. If the `ptr` and `capacity` come from a `Buffer`, then this is guaranteed.
|
||||
#[inline]
|
||||
pub(crate) unsafe fn new(
|
||||
ptr: NonNull<u8>,
|
||||
len: usize,
|
||||
deallocation: Deallocation,
|
||||
) -> Bytes {
|
||||
pub(crate) unsafe fn new(ptr: NonNull<u8>, len: usize, deallocation: Deallocation) -> Bytes {
|
||||
Bytes {
|
||||
ptr,
|
||||
len,
|
||||
|
||||
@@ -60,8 +60,7 @@ impl<'a> UnalignedBitChunk<'a> {
|
||||
|
||||
// If less than 8 bytes, read into prefix
|
||||
if buffer.len() <= 8 {
|
||||
let (suffix_mask, trailing_padding) =
|
||||
compute_suffix_mask(len, offset_padding);
|
||||
let (suffix_mask, trailing_padding) = compute_suffix_mask(len, offset_padding);
|
||||
let prefix = read_u64(buffer) & suffix_mask & prefix_mask;
|
||||
|
||||
return Self {
|
||||
@@ -75,8 +74,7 @@ impl<'a> UnalignedBitChunk<'a> {
|
||||
|
||||
// If less than 16 bytes, read into prefix and suffix
|
||||
if buffer.len() <= 16 {
|
||||
let (suffix_mask, trailing_padding) =
|
||||
compute_suffix_mask(len, offset_padding);
|
||||
let (suffix_mask, trailing_padding) = compute_suffix_mask(len, offset_padding);
|
||||
let prefix = read_u64(&buffer[..8]) & prefix_mask;
|
||||
let suffix = read_u64(&buffer[8..]) & suffix_mask;
|
||||
|
||||
@@ -167,10 +165,7 @@ impl<'a> UnalignedBitChunk<'a> {
|
||||
}
|
||||
|
||||
pub type UnalignedBitChunkIterator<'a> = std::iter::Chain<
|
||||
std::iter::Chain<
|
||||
std::option::IntoIter<u64>,
|
||||
std::iter::Cloned<std::slice::Iter<'a, u64>>,
|
||||
>,
|
||||
std::iter::Chain<std::option::IntoIter<u64>, std::iter::Cloned<std::slice::Iter<'a, u64>>>,
|
||||
std::option::IntoIter<u64>,
|
||||
>;
|
||||
|
||||
@@ -338,9 +333,8 @@ impl Iterator for BitChunkIterator<'_> {
|
||||
} else {
|
||||
// the constructor ensures that bit_offset is in 0..8
|
||||
// that means we need to read at most one additional byte to fill in the high bits
|
||||
let next = unsafe {
|
||||
std::ptr::read_unaligned(raw_data.add(index + 1) as *const u8) as u64
|
||||
};
|
||||
let next =
|
||||
unsafe { std::ptr::read_unaligned(raw_data.add(index + 1) as *const u8) as u64 };
|
||||
|
||||
(current >> bit_offset) | (next << (64 - bit_offset))
|
||||
};
|
||||
@@ -387,8 +381,8 @@ mod tests {
|
||||
#[test]
|
||||
fn test_iter_unaligned() {
|
||||
let input: &[u8] = &[
|
||||
0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000,
|
||||
0b00100000, 0b01000000, 0b11111111,
|
||||
0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000, 0b00100000,
|
||||
0b01000000, 0b11111111,
|
||||
];
|
||||
let buffer: Buffer = Buffer::from(input);
|
||||
|
||||
@@ -408,8 +402,8 @@ mod tests {
|
||||
#[test]
|
||||
fn test_iter_unaligned_remainder_1_byte() {
|
||||
let input: &[u8] = &[
|
||||
0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000,
|
||||
0b00100000, 0b01000000, 0b11111111,
|
||||
0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000, 0b00100000,
|
||||
0b01000000, 0b11111111,
|
||||
];
|
||||
let buffer: Buffer = Buffer::from(input);
|
||||
|
||||
@@ -442,8 +436,8 @@ mod tests {
|
||||
#[test]
|
||||
fn test_iter_unaligned_remainder_bits_large() {
|
||||
let input: &[u8] = &[
|
||||
0b11111111, 0b00000000, 0b11111111, 0b00000000, 0b11111111, 0b00000000,
|
||||
0b11111111, 0b00000000, 0b11111111,
|
||||
0b11111111, 0b00000000, 0b11111111, 0b00000000, 0b11111111, 0b00000000, 0b11111111,
|
||||
0b00000000, 0b11111111,
|
||||
];
|
||||
let buffer: Buffer = Buffer::from(input);
|
||||
|
||||
@@ -637,11 +631,8 @@ mod tests {
|
||||
let max_truncate = 128.min(mask_len - offset);
|
||||
let truncate = rng.gen::<usize>().checked_rem(max_truncate).unwrap_or(0);
|
||||
|
||||
let unaligned = UnalignedBitChunk::new(
|
||||
buffer.as_slice(),
|
||||
offset,
|
||||
mask_len - offset - truncate,
|
||||
);
|
||||
let unaligned =
|
||||
UnalignedBitChunk::new(buffer.as_slice(), offset, mask_len - offset - truncate);
|
||||
|
||||
let bool_slice = &bools[offset..mask_len - truncate];
|
||||
|
||||
|
||||
@@ -276,8 +276,8 @@ mod tests {
|
||||
assert_eq!(
|
||||
actual,
|
||||
&[
|
||||
false, true, false, false, true, false, true, false, false, false, false,
|
||||
false, true, false
|
||||
false, true, false, false, true, false, true, false, false, false, false, false,
|
||||
true, false
|
||||
]
|
||||
);
|
||||
|
||||
|
||||
@@ -42,8 +42,7 @@ pub fn set_bits(
|
||||
let chunks = BitChunks::new(data, offset_read + bits_to_align, len - bits_to_align);
|
||||
chunks.iter().for_each(|chunk| {
|
||||
null_count += chunk.count_zeros();
|
||||
write_data[write_byte_index..write_byte_index + 8]
|
||||
.copy_from_slice(&chunk.to_le_bytes());
|
||||
write_data[write_byte_index..write_byte_index + 8].copy_from_slice(&chunk.to_le_bytes());
|
||||
write_byte_index += 8;
|
||||
});
|
||||
|
||||
@@ -70,8 +69,8 @@ mod tests {
|
||||
fn test_set_bits_aligned() {
|
||||
let mut destination: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
|
||||
let source: &[u8] = &[
|
||||
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
|
||||
0b11100111, 0b10100101,
|
||||
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
|
||||
0b10100101,
|
||||
];
|
||||
|
||||
let destination_offset = 8;
|
||||
@@ -80,8 +79,8 @@ mod tests {
|
||||
let len = 64;
|
||||
|
||||
let expected_data: &[u8] = &[
|
||||
0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
|
||||
0b11100111, 0b10100101, 0,
|
||||
0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
|
||||
0b10100101, 0,
|
||||
];
|
||||
let expected_null_count = 24;
|
||||
let result = set_bits(
|
||||
@@ -100,8 +99,8 @@ mod tests {
|
||||
fn test_set_bits_unaligned_destination_start() {
|
||||
let mut destination: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
|
||||
let source: &[u8] = &[
|
||||
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
|
||||
0b11100111, 0b10100101,
|
||||
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
|
||||
0b10100101,
|
||||
];
|
||||
|
||||
let destination_offset = 3;
|
||||
@@ -110,8 +109,8 @@ mod tests {
|
||||
let len = 64;
|
||||
|
||||
let expected_data: &[u8] = &[
|
||||
0b00111000, 0b00101111, 0b11001101, 0b11011100, 0b01011110, 0b00011111,
|
||||
0b00111110, 0b00101111, 0b00000101, 0b00000000,
|
||||
0b00111000, 0b00101111, 0b11001101, 0b11011100, 0b01011110, 0b00011111, 0b00111110,
|
||||
0b00101111, 0b00000101, 0b00000000,
|
||||
];
|
||||
let expected_null_count = 24;
|
||||
let result = set_bits(
|
||||
@@ -130,8 +129,8 @@ mod tests {
|
||||
fn test_set_bits_unaligned_destination_end() {
|
||||
let mut destination: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
|
||||
let source: &[u8] = &[
|
||||
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
|
||||
0b11100111, 0b10100101,
|
||||
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
|
||||
0b10100101,
|
||||
];
|
||||
|
||||
let destination_offset = 8;
|
||||
@@ -140,8 +139,8 @@ mod tests {
|
||||
let len = 62;
|
||||
|
||||
let expected_data: &[u8] = &[
|
||||
0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
|
||||
0b11100111, 0b00100101, 0,
|
||||
0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
|
||||
0b00100101, 0,
|
||||
];
|
||||
let expected_null_count = 23;
|
||||
let result = set_bits(
|
||||
@@ -160,9 +159,9 @@ mod tests {
|
||||
fn test_set_bits_unaligned() {
|
||||
let mut destination: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
|
||||
let source: &[u8] = &[
|
||||
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
|
||||
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
|
||||
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
|
||||
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
|
||||
0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111, 0b10100101,
|
||||
0b10011001, 0b11011011, 0b11101011, 0b11000011,
|
||||
];
|
||||
|
||||
let destination_offset = 3;
|
||||
@@ -171,9 +170,8 @@ mod tests {
|
||||
let len = 95;
|
||||
|
||||
let expected_data: &[u8] = &[
|
||||
0b01111000, 0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000,
|
||||
0b01111001, 0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000,
|
||||
0b00000001,
|
||||
0b01111000, 0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000, 0b01111001,
|
||||
0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000, 0b00000001,
|
||||
];
|
||||
let expected_null_count = 35;
|
||||
let result = set_bits(
|
||||
|
||||
+423
-735
File diff suppressed because it is too large
Load Diff
+14
-48
@@ -129,10 +129,7 @@ impl<'a> FormatOptions<'a> {
|
||||
}
|
||||
|
||||
/// Overrides the format used for [`DataType::Timestamp`] columns with a timezone
|
||||
pub const fn with_timestamp_tz_format(
|
||||
self,
|
||||
timestamp_tz_format: Option<&'a str>,
|
||||
) -> Self {
|
||||
pub const fn with_timestamp_tz_format(self, timestamp_tz_format: Option<&'a str>) -> Self {
|
||||
Self {
|
||||
timestamp_tz_format,
|
||||
..self
|
||||
@@ -173,9 +170,7 @@ impl<'a> ValueFormatter<'a> {
|
||||
match self.formatter.format.write(self.idx, s) {
|
||||
Ok(_) => Ok(()),
|
||||
Err(FormatError::Arrow(e)) => Err(e),
|
||||
Err(FormatError::Format(_)) => {
|
||||
Err(ArrowError::CastError("Format error".to_string()))
|
||||
}
|
||||
Err(FormatError::Format(_)) => Err(ArrowError::CastError("Format error".to_string())),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -260,10 +255,7 @@ impl<'a> ArrayFormatter<'a> {
|
||||
/// Returns an [`ArrayFormatter`] that can be used to format `array`
|
||||
///
|
||||
/// This returns an error if an array of the given data type cannot be formatted
|
||||
pub fn try_new(
|
||||
array: &'a dyn Array,
|
||||
options: &FormatOptions<'a>,
|
||||
) -> Result<Self, ArrowError> {
|
||||
pub fn try_new(array: &'a dyn Array, options: &FormatOptions<'a>) -> Result<Self, ArrowError> {
|
||||
Ok(Self {
|
||||
format: make_formatter(array, options)?,
|
||||
safe: options.safe,
|
||||
@@ -472,9 +464,7 @@ fn write_timestamp(
|
||||
let date = Utc.from_utc_datetime(&naive).with_timezone(&tz);
|
||||
match format {
|
||||
Some(s) => write!(f, "{}", date.format(s))?,
|
||||
None => {
|
||||
write!(f, "{}", date.to_rfc3339_opts(SecondsFormat::AutoSi, true))?
|
||||
}
|
||||
None => write!(f, "{}", date.to_rfc3339_opts(SecondsFormat::AutoSi, true))?,
|
||||
}
|
||||
}
|
||||
None => match format {
|
||||
@@ -526,19 +516,11 @@ macro_rules! temporal_display {
|
||||
impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
|
||||
type State = TimeFormat<'a>;
|
||||
|
||||
fn prepare(
|
||||
&self,
|
||||
options: &FormatOptions<'a>,
|
||||
) -> Result<Self::State, ArrowError> {
|
||||
fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
|
||||
Ok(options.$format)
|
||||
}
|
||||
|
||||
fn write(
|
||||
&self,
|
||||
fmt: &Self::State,
|
||||
idx: usize,
|
||||
f: &mut dyn Write,
|
||||
) -> FormatResult {
|
||||
fn write(&self, fmt: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
|
||||
let value = self.value(idx);
|
||||
let naive = $convert(value as _).ok_or_else(|| {
|
||||
ArrowError::CastError(format!(
|
||||
@@ -575,19 +557,11 @@ macro_rules! duration_display {
|
||||
impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
|
||||
type State = DurationFormat;
|
||||
|
||||
fn prepare(
|
||||
&self,
|
||||
options: &FormatOptions<'a>,
|
||||
) -> Result<Self::State, ArrowError> {
|
||||
fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
|
||||
Ok(options.duration_format)
|
||||
}
|
||||
|
||||
fn write(
|
||||
&self,
|
||||
fmt: &Self::State,
|
||||
idx: usize,
|
||||
f: &mut dyn Write,
|
||||
) -> FormatResult {
|
||||
fn write(&self, fmt: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
|
||||
let v = self.value(idx);
|
||||
match fmt {
|
||||
DurationFormat::ISO8601 => write!(f, "{}", $convert(v))?,
|
||||
@@ -704,8 +678,7 @@ impl<'a> DisplayIndex for &'a PrimitiveArray<IntervalMonthDayNanoType> {
|
||||
fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
|
||||
let value: u128 = self.value(idx) as u128;
|
||||
|
||||
let months_part: i32 =
|
||||
((value & 0xFFFFFFFF000000000000000000000000) >> 96) as i32;
|
||||
let months_part: i32 = ((value & 0xFFFFFFFF000000000000000000000000) >> 96) as i32;
|
||||
let days_part: i32 = ((value & 0xFFFFFFFF0000000000000000) >> 64) as i32;
|
||||
let nanoseconds_part: i64 = (value & 0xFFFFFFFFFFFFFFFF) as i64;
|
||||
|
||||
@@ -937,10 +910,7 @@ impl<'a> DisplayIndexState<'a> for &'a UnionArray {
|
||||
/// suitable for converting large arrays or record batches.
|
||||
///
|
||||
/// Please see [`ArrayFormatter`] for a more performant interface
|
||||
pub fn array_value_to_string(
|
||||
column: &dyn Array,
|
||||
row: usize,
|
||||
) -> Result<String, ArrowError> {
|
||||
pub fn array_value_to_string(column: &dyn Array, row: usize) -> Result<String, ArrowError> {
|
||||
let options = FormatOptions::default().with_display_error(true);
|
||||
let formatter = ArrayFormatter::try_new(column, &options)?;
|
||||
Ok(formatter.value(row).to_string())
|
||||
@@ -986,12 +956,9 @@ mod tests {
|
||||
// [[a, b, c], [d, e, f], [g, h]]
|
||||
let entry_offsets = [0, 3, 6, 8];
|
||||
|
||||
let map_array = MapArray::new_from_strings(
|
||||
keys.clone().into_iter(),
|
||||
&values_data,
|
||||
&entry_offsets,
|
||||
)
|
||||
.unwrap();
|
||||
let map_array =
|
||||
MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
"{d: 30, e: 40, f: 50}",
|
||||
array_value_to_string(&map_array, 1).unwrap()
|
||||
@@ -1006,8 +973,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_array_value_to_string_duration() {
|
||||
let iso_fmt = FormatOptions::new();
|
||||
let pretty_fmt =
|
||||
FormatOptions::new().with_duration_format(DurationFormat::Pretty);
|
||||
let pretty_fmt = FormatOptions::new().with_duration_format(DurationFormat::Pretty);
|
||||
|
||||
let array = DurationNanosecondArray::from(vec![
|
||||
1,
|
||||
|
||||
+92
-123
@@ -64,10 +64,7 @@ impl TimestampParser {
|
||||
|
||||
/// Parses a date of the form `1997-01-31`
|
||||
fn date(&self) -> Option<NaiveDate> {
|
||||
if self.mask & 0b1111111111 != 0b1101101111
|
||||
|| !self.test(4, b'-')
|
||||
|| !self.test(7, b'-')
|
||||
{
|
||||
if self.mask & 0b1111111111 != 0b1101101111 || !self.test(4, b'-') || !self.test(7, b'-') {
|
||||
return None;
|
||||
}
|
||||
|
||||
@@ -173,13 +170,9 @@ impl TimestampParser {
|
||||
/// * "2023-01-01 04:05:06.789 PST",
|
||||
///
|
||||
/// [IANA timezones]: https://www.iana.org/time-zones
|
||||
pub fn string_to_datetime<T: TimeZone>(
|
||||
timezone: &T,
|
||||
s: &str,
|
||||
) -> Result<DateTime<T>, ArrowError> {
|
||||
let err = |ctx: &str| {
|
||||
ArrowError::ParseError(format!("Error parsing timestamp from '{s}': {ctx}"))
|
||||
};
|
||||
pub fn string_to_datetime<T: TimeZone>(timezone: &T, s: &str) -> Result<DateTime<T>, ArrowError> {
|
||||
let err =
|
||||
|ctx: &str| ArrowError::ParseError(format!("Error parsing timestamp from '{s}': {ctx}"));
|
||||
|
||||
let bytes = s.as_bytes();
|
||||
if bytes.len() < 10 {
|
||||
@@ -300,9 +293,8 @@ fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> {
|
||||
/// This function does not support parsing strings with a timezone
|
||||
/// or offset specified, as it considers only time since midnight.
|
||||
pub fn string_to_time_nanoseconds(s: &str) -> Result<i64, ArrowError> {
|
||||
let nt = string_to_time(s).ok_or_else(|| {
|
||||
ArrowError::ParseError(format!("Failed to parse \'{s}\' as time"))
|
||||
})?;
|
||||
let nt = string_to_time(s)
|
||||
.ok_or_else(|| ArrowError::ParseError(format!("Failed to parse \'{s}\' as time")))?;
|
||||
Ok(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
|
||||
}
|
||||
|
||||
@@ -313,12 +305,8 @@ fn string_to_time(s: &str) -> Option<NaiveTime> {
|
||||
}
|
||||
|
||||
let (am, bytes) = match bytes.get(bytes.len() - 3..) {
|
||||
Some(b" AM" | b" am" | b" Am" | b" aM") => {
|
||||
(Some(true), &bytes[..bytes.len() - 3])
|
||||
}
|
||||
Some(b" PM" | b" pm" | b" pM" | b" Pm") => {
|
||||
(Some(false), &bytes[..bytes.len() - 3])
|
||||
}
|
||||
Some(b" AM" | b" am" | b" Am" | b" aM") => (Some(true), &bytes[..bytes.len() - 3]),
|
||||
Some(b" PM" | b" pm" | b" pM" | b" Pm") => (Some(false), &bytes[..bytes.len() - 3]),
|
||||
_ => (None, bytes),
|
||||
};
|
||||
|
||||
@@ -501,10 +489,7 @@ impl Parser for Time64NanosecondType {
|
||||
|
||||
fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
|
||||
let nt = NaiveTime::parse_from_str(string, format).ok()?;
|
||||
Some(
|
||||
nt.num_seconds_from_midnight() as i64 * 1_000_000_000
|
||||
+ nt.nanosecond() as i64,
|
||||
)
|
||||
Some(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -519,10 +504,7 @@ impl Parser for Time64MicrosecondType {
|
||||
|
||||
fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
|
||||
let nt = NaiveTime::parse_from_str(string, format).ok()?;
|
||||
Some(
|
||||
nt.num_seconds_from_midnight() as i64 * 1_000_000
|
||||
+ nt.nanosecond() as i64 / 1_000,
|
||||
)
|
||||
Some(nt.num_seconds_from_midnight() as i64 * 1_000_000 + nt.nanosecond() as i64 / 1_000)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -537,10 +519,7 @@ impl Parser for Time32MillisecondType {
|
||||
|
||||
fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
|
||||
let nt = NaiveTime::parse_from_str(string, format).ok()?;
|
||||
Some(
|
||||
nt.num_seconds_from_midnight() as i32 * 1_000
|
||||
+ nt.nanosecond() as i32 / 1_000_000,
|
||||
)
|
||||
Some(nt.num_seconds_from_midnight() as i32 * 1_000 + nt.nanosecond() as i32 / 1_000_000)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -555,10 +534,7 @@ impl Parser for Time32SecondType {
|
||||
|
||||
fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
|
||||
let nt = NaiveTime::parse_from_str(string, format).ok()?;
|
||||
Some(
|
||||
nt.num_seconds_from_midnight() as i32
|
||||
+ nt.nanosecond() as i32 / 1_000_000_000,
|
||||
)
|
||||
Some(nt.num_seconds_from_midnight() as i32 + nt.nanosecond() as i32 / 1_000_000_000)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -615,10 +591,8 @@ fn parse_date(string: &str) -> Option<NaiveDate> {
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
let year = digits[0] as u16 * 1000
|
||||
+ digits[1] as u16 * 100
|
||||
+ digits[2] as u16 * 10
|
||||
+ digits[3] as u16;
|
||||
let year =
|
||||
digits[0] as u16 * 1000 + digits[1] as u16 * 100 + digits[2] as u16 * 10 + digits[3] as u16;
|
||||
|
||||
NaiveDate::from_ymd_opt(year as _, month as _, day as _)
|
||||
}
|
||||
@@ -728,8 +702,7 @@ pub fn parse_decimal<T: DecimalType>(
|
||||
fractionals += 1;
|
||||
digits += 1;
|
||||
result = result.mul_wrapping(base);
|
||||
result =
|
||||
result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
|
||||
result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
|
||||
}
|
||||
|
||||
// Fail on "."
|
||||
@@ -771,9 +744,11 @@ pub fn parse_interval_year_month(
|
||||
let config = IntervalParseConfig::new(IntervalUnit::Year);
|
||||
let interval = Interval::parse(value, &config)?;
|
||||
|
||||
let months = interval.to_year_months().map_err(|_| ArrowError::CastError(format!(
|
||||
let months = interval.to_year_months().map_err(|_| {
|
||||
ArrowError::CastError(format!(
|
||||
"Cannot cast {value} to IntervalYearMonth. Only year and month fields are allowed."
|
||||
)))?;
|
||||
))
|
||||
})?;
|
||||
|
||||
Ok(IntervalYearMonthType::make_value(0, months))
|
||||
}
|
||||
@@ -888,21 +863,16 @@ impl FromStr for IntervalAmount {
|
||||
Ok(0)
|
||||
} else {
|
||||
integer.parse::<i64>().map_err(|_| {
|
||||
ArrowError::ParseError(format!(
|
||||
"Failed to parse {s} as interval amount"
|
||||
))
|
||||
ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
|
||||
})
|
||||
}?;
|
||||
|
||||
let frac_unscaled = frac.parse::<i64>().map_err(|_| {
|
||||
ArrowError::ParseError(format!(
|
||||
"Failed to parse {s} as interval amount"
|
||||
))
|
||||
ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
|
||||
})?;
|
||||
|
||||
// scale fractional part by interval precision
|
||||
let frac =
|
||||
frac_unscaled * 10_i64.pow(INTERVAL_PRECISION - frac.len() as u32);
|
||||
let frac = frac_unscaled * 10_i64.pow(INTERVAL_PRECISION - frac.len() as u32);
|
||||
|
||||
// propagate the sign of the integer part to the fractional part
|
||||
let frac = if integer < 0 || explicit_neg {
|
||||
@@ -915,9 +885,9 @@ impl FromStr for IntervalAmount {
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
Some((_, frac)) if frac.starts_with('-') => Err(ArrowError::ParseError(
|
||||
format!("Failed to parse {s} as interval amount"),
|
||||
)),
|
||||
Some((_, frac)) if frac.starts_with('-') => Err(ArrowError::ParseError(format!(
|
||||
"Failed to parse {s} as interval amount"
|
||||
))),
|
||||
Some((_, frac)) if frac.len() > INTERVAL_PRECISION as usize => {
|
||||
Err(ArrowError::ParseError(format!(
|
||||
"{s} exceeds the precision available for interval amount"
|
||||
@@ -925,9 +895,7 @@ impl FromStr for IntervalAmount {
|
||||
}
|
||||
Some(_) | None => {
|
||||
let integer = s.parse::<i64>().map_err(|_| {
|
||||
ArrowError::ParseError(format!(
|
||||
"Failed to parse {s} as interval amount"
|
||||
))
|
||||
ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
|
||||
})?;
|
||||
|
||||
let result = Self { integer, frac: 0 };
|
||||
@@ -1005,25 +973,20 @@ impl Interval {
|
||||
/// e.g. INTERVAL '0.5 MONTH' = 15 days, INTERVAL '1.5 MONTH' = 1 month 15 days
|
||||
/// e.g. INTERVAL '0.5 DAY' = 12 hours, INTERVAL '1.5 DAY' = 1 day 12 hours
|
||||
/// [Postgres reference](https://www.postgresql.org/docs/15/datatype-datetime.html#DATATYPE-INTERVAL-INPUT:~:text=Field%20values%20can,fractional%20on%20output.)
|
||||
fn add(
|
||||
&self,
|
||||
amount: IntervalAmount,
|
||||
unit: IntervalUnit,
|
||||
) -> Result<Self, ArrowError> {
|
||||
fn add(&self, amount: IntervalAmount, unit: IntervalUnit) -> Result<Self, ArrowError> {
|
||||
let result = match unit {
|
||||
IntervalUnit::Century => {
|
||||
let months_int = amount.integer.mul_checked(100)?.mul_checked(12)?;
|
||||
let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 2);
|
||||
let months =
|
||||
months_int
|
||||
.add_checked(month_frac)?
|
||||
.try_into()
|
||||
.map_err(|_| {
|
||||
ArrowError::ParseError(format!(
|
||||
"Unable to represent {} centuries as months in a signed 32-bit integer",
|
||||
&amount.integer
|
||||
))
|
||||
})?;
|
||||
let months = months_int
|
||||
.add_checked(month_frac)?
|
||||
.try_into()
|
||||
.map_err(|_| {
|
||||
ArrowError::ParseError(format!(
|
||||
"Unable to represent {} centuries as months in a signed 32-bit integer",
|
||||
&amount.integer
|
||||
))
|
||||
})?;
|
||||
|
||||
Self::new(self.months.add_checked(months)?, self.days, self.nanos)
|
||||
}
|
||||
@@ -1031,32 +994,30 @@ impl Interval {
|
||||
let months_int = amount.integer.mul_checked(10)?.mul_checked(12)?;
|
||||
|
||||
let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 1);
|
||||
let months =
|
||||
months_int
|
||||
.add_checked(month_frac)?
|
||||
.try_into()
|
||||
.map_err(|_| {
|
||||
ArrowError::ParseError(format!(
|
||||
"Unable to represent {} decades as months in a signed 32-bit integer",
|
||||
&amount.integer
|
||||
))
|
||||
})?;
|
||||
let months = months_int
|
||||
.add_checked(month_frac)?
|
||||
.try_into()
|
||||
.map_err(|_| {
|
||||
ArrowError::ParseError(format!(
|
||||
"Unable to represent {} decades as months in a signed 32-bit integer",
|
||||
&amount.integer
|
||||
))
|
||||
})?;
|
||||
|
||||
Self::new(self.months.add_checked(months)?, self.days, self.nanos)
|
||||
}
|
||||
IntervalUnit::Year => {
|
||||
let months_int = amount.integer.mul_checked(12)?;
|
||||
let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION);
|
||||
let months =
|
||||
months_int
|
||||
.add_checked(month_frac)?
|
||||
.try_into()
|
||||
.map_err(|_| {
|
||||
ArrowError::ParseError(format!(
|
||||
"Unable to represent {} years as months in a signed 32-bit integer",
|
||||
&amount.integer
|
||||
))
|
||||
})?;
|
||||
let months = months_int
|
||||
.add_checked(month_frac)?
|
||||
.try_into()
|
||||
.map_err(|_| {
|
||||
ArrowError::ParseError(format!(
|
||||
"Unable to represent {} years as months in a signed 32-bit integer",
|
||||
&amount.integer
|
||||
))
|
||||
})?;
|
||||
|
||||
Self::new(self.months.add_checked(months)?, self.days, self.nanos)
|
||||
}
|
||||
@@ -1090,8 +1051,7 @@ impl Interval {
|
||||
))
|
||||
})?;
|
||||
|
||||
let nanos =
|
||||
amount.frac * 7 * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
|
||||
let nanos = amount.frac * 7 * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
|
||||
|
||||
Self::new(
|
||||
self.months,
|
||||
@@ -1107,8 +1067,7 @@ impl Interval {
|
||||
))
|
||||
})?;
|
||||
|
||||
let nanos =
|
||||
amount.frac * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
|
||||
let nanos = amount.frac * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
|
||||
|
||||
Self::new(
|
||||
self.months,
|
||||
@@ -1118,8 +1077,7 @@ impl Interval {
|
||||
}
|
||||
IntervalUnit::Hour => {
|
||||
let nanos_int = amount.integer.mul_checked(NANOS_PER_HOUR)?;
|
||||
let nanos_frac =
|
||||
amount.frac * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
|
||||
let nanos_frac = amount.frac * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
|
||||
let nanos = nanos_int.add_checked(nanos_frac)?;
|
||||
|
||||
Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
|
||||
@@ -1398,8 +1356,7 @@ mod tests {
|
||||
"2030-12-04T17:11:10.123456",
|
||||
];
|
||||
for case in cases {
|
||||
let chrono =
|
||||
NaiveDateTime::parse_from_str(case, "%Y-%m-%dT%H:%M:%S%.f").unwrap();
|
||||
let chrono = NaiveDateTime::parse_from_str(case, "%Y-%m-%dT%H:%M:%S%.f").unwrap();
|
||||
let custom = string_to_datetime(&Utc, case).unwrap();
|
||||
assert_eq!(chrono, custom.naive_utc())
|
||||
}
|
||||
@@ -1431,8 +1388,7 @@ mod tests {
|
||||
];
|
||||
|
||||
for (s, ctx) in cases {
|
||||
let expected =
|
||||
format!("Parser error: Error parsing timestamp from '{s}': {ctx}");
|
||||
let expected = format!("Parser error: Error parsing timestamp from '{s}': {ctx}");
|
||||
let actual = string_to_datetime(&Utc, s).unwrap_err().to_string();
|
||||
assert_eq!(actual, expected)
|
||||
}
|
||||
@@ -1497,8 +1453,7 @@ mod tests {
|
||||
assert_eq!(local, "2020-09-08 15:42:29");
|
||||
|
||||
let dt =
|
||||
NaiveDateTime::parse_from_str("2020-09-08T13:42:29Z", "%Y-%m-%dT%H:%M:%SZ")
|
||||
.unwrap();
|
||||
NaiveDateTime::parse_from_str("2020-09-08T13:42:29Z", "%Y-%m-%dT%H:%M:%SZ").unwrap();
|
||||
let local: Tz = "+08:00".parse().unwrap();
|
||||
|
||||
// Parsed as offset from UTC
|
||||
@@ -1629,10 +1584,7 @@ mod tests {
|
||||
|
||||
// custom format
|
||||
assert_eq!(
|
||||
Time64NanosecondType::parse_formatted(
|
||||
"02 - 10 - 01 - .1234567",
|
||||
"%H - %M - %S - %.f"
|
||||
),
|
||||
Time64NanosecondType::parse_formatted("02 - 10 - 01 - .1234567", "%H - %M - %S - %.f"),
|
||||
Some(7_801_123_456_700)
|
||||
);
|
||||
}
|
||||
@@ -1709,10 +1661,7 @@ mod tests {
|
||||
|
||||
// custom format
|
||||
assert_eq!(
|
||||
Time64MicrosecondType::parse_formatted(
|
||||
"02 - 10 - 01 - .1234",
|
||||
"%H - %M - %S - %.f"
|
||||
),
|
||||
Time64MicrosecondType::parse_formatted("02 - 10 - 01 - .1234", "%H - %M - %S - %.f"),
|
||||
Some(7_801_123_400)
|
||||
);
|
||||
}
|
||||
@@ -1759,10 +1708,7 @@ mod tests {
|
||||
|
||||
// custom format
|
||||
assert_eq!(
|
||||
Time32MillisecondType::parse_formatted(
|
||||
"02 - 10 - 01 - .1",
|
||||
"%H - %M - %S - %.f"
|
||||
),
|
||||
Time32MillisecondType::parse_formatted("02 - 10 - 01 - .1", "%H - %M - %S - %.f"),
|
||||
Some(7_801_100)
|
||||
);
|
||||
}
|
||||
@@ -2005,8 +1951,19 @@ mod tests {
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
Interval::new(-13i32, -8i32, -NANOS_PER_HOUR - NANOS_PER_MINUTE - NANOS_PER_SECOND - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64),
|
||||
Interval::parse("-1 year -1 month -1 week -1 day -1 hour -1 minute -1 second -1.11 millisecond", &config).unwrap(),
|
||||
Interval::new(
|
||||
-13i32,
|
||||
-8i32,
|
||||
-NANOS_PER_HOUR
|
||||
- NANOS_PER_MINUTE
|
||||
- NANOS_PER_SECOND
|
||||
- (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
|
||||
),
|
||||
Interval::parse(
|
||||
"-1 year -1 month -1 week -1 day -1 hour -1 minute -1 second -1.11 millisecond",
|
||||
&config
|
||||
)
|
||||
.unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -2280,22 +2237,34 @@ mod tests {
|
||||
let edge_tests_256 = [
|
||||
(
|
||||
"9999999999999999999999999999999999999999999999999999999999999999999999999999",
|
||||
i256::from_string("9999999999999999999999999999999999999999999999999999999999999999999999999999").unwrap(),
|
||||
i256::from_string(
|
||||
"9999999999999999999999999999999999999999999999999999999999999999999999999999",
|
||||
)
|
||||
.unwrap(),
|
||||
0,
|
||||
),
|
||||
(
|
||||
"999999999999999999999999999999999999999999999999999999999999999999999999.9999",
|
||||
i256::from_string("9999999999999999999999999999999999999999999999999999999999999999999999999999").unwrap(),
|
||||
i256::from_string(
|
||||
"9999999999999999999999999999999999999999999999999999999999999999999999999999",
|
||||
)
|
||||
.unwrap(),
|
||||
4,
|
||||
),
|
||||
(
|
||||
"99999999999999999999999999999999999999999999999999.99999999999999999999999999",
|
||||
i256::from_string("9999999999999999999999999999999999999999999999999999999999999999999999999999").unwrap(),
|
||||
i256::from_string(
|
||||
"9999999999999999999999999999999999999999999999999999999999999999999999999999",
|
||||
)
|
||||
.unwrap(),
|
||||
26,
|
||||
),
|
||||
(
|
||||
"99999999999999999999999999999999999999999999999999",
|
||||
i256::from_string("9999999999999999999999999999999999999999999999999900000000000000000000000000").unwrap(),
|
||||
i256::from_string(
|
||||
"9999999999999999999999999999999999999999999999999900000000000000000000000000",
|
||||
)
|
||||
.unwrap(),
|
||||
26,
|
||||
),
|
||||
];
|
||||
|
||||
+16
-34
@@ -25,9 +25,7 @@ use comfy_table::{Cell, Table};
|
||||
use std::fmt::Display;
|
||||
|
||||
/// Create a visual representation of record batches
|
||||
pub fn pretty_format_batches(
|
||||
results: &[RecordBatch],
|
||||
) -> Result<impl Display, ArrowError> {
|
||||
pub fn pretty_format_batches(results: &[RecordBatch]) -> Result<impl Display, ArrowError> {
|
||||
let options = FormatOptions::default().with_display_error(true);
|
||||
pretty_format_batches_with_options(results, &options)
|
||||
}
|
||||
@@ -70,10 +68,7 @@ pub fn print_columns(col_name: &str, results: &[ArrayRef]) -> Result<(), ArrowEr
|
||||
}
|
||||
|
||||
/// Convert a series of record batches into a table
|
||||
fn create_table(
|
||||
results: &[RecordBatch],
|
||||
options: &FormatOptions,
|
||||
) -> Result<Table, ArrowError> {
|
||||
fn create_table(results: &[RecordBatch], options: &FormatOptions) -> Result<Table, ArrowError> {
|
||||
let mut table = Table::new();
|
||||
table.load_preset("||--+-++| ++++++");
|
||||
|
||||
@@ -209,8 +204,8 @@ mod tests {
|
||||
let table = pretty_format_columns("a", &columns).unwrap().to_string();
|
||||
|
||||
let expected = vec![
|
||||
"+---+", "| a |", "+---+", "| a |", "| b |", "| |", "| d |", "| e |",
|
||||
"| |", "| g |", "+---+",
|
||||
"+---+", "| a |", "+---+", "| a |", "| b |", "| |", "| d |", "| e |", "| |",
|
||||
"| g |", "+---+",
|
||||
];
|
||||
|
||||
let actual: Vec<&str> = table.lines().collect();
|
||||
@@ -289,10 +284,8 @@ mod tests {
|
||||
#[test]
|
||||
fn test_pretty_format_fixed_size_list() {
|
||||
// define a schema.
|
||||
let field_type = DataType::FixedSizeList(
|
||||
Arc::new(Field::new("item", DataType::Int32, true)),
|
||||
3,
|
||||
);
|
||||
let field_type =
|
||||
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 3);
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("d1", field_type, true)]));
|
||||
|
||||
let keys_builder = Int32Array::builder(3);
|
||||
@@ -383,10 +376,7 @@ mod tests {
|
||||
};
|
||||
}
|
||||
|
||||
fn timestamp_batch<T: ArrowTimestampType>(
|
||||
timezone: &str,
|
||||
value: T::Native,
|
||||
) -> RecordBatch {
|
||||
fn timestamp_batch<T: ArrowTimestampType>(timezone: &str, value: T::Native) -> RecordBatch {
|
||||
let mut builder = PrimitiveBuilder::<T>::with_capacity(10);
|
||||
builder.append_value(value);
|
||||
builder.append_null();
|
||||
@@ -621,8 +611,8 @@ mod tests {
|
||||
|
||||
let table = pretty_format_batches(&[batch]).unwrap().to_string();
|
||||
let expected = vec![
|
||||
"+------+", "| f |", "+------+", "| 101 |", "| |", "| 200 |",
|
||||
"| 3040 |", "+------+",
|
||||
"+------+", "| f |", "+------+", "| 101 |", "| |", "| 200 |", "| 3040 |",
|
||||
"+------+",
|
||||
];
|
||||
|
||||
let actual: Vec<&str> = table.lines().collect();
|
||||
@@ -660,16 +650,14 @@ mod tests {
|
||||
)),
|
||||
Arc::new(StructArray::from(vec![(
|
||||
Arc::new(Field::new("c121", DataType::Utf8, false)),
|
||||
Arc::new(StringArray::from(vec![Some("e"), Some("f"), Some("g")]))
|
||||
as ArrayRef,
|
||||
Arc::new(StringArray::from(vec![Some("e"), Some("f"), Some("g")])) as ArrayRef,
|
||||
)])) as ArrayRef,
|
||||
),
|
||||
]);
|
||||
let c2 = StringArray::from(vec![Some("a"), Some("b"), Some("c")]);
|
||||
|
||||
let batch =
|
||||
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)])
|
||||
.unwrap();
|
||||
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)]).unwrap();
|
||||
|
||||
let table = pretty_format_batches(&[batch]).unwrap().to_string();
|
||||
let expected = vec![
|
||||
@@ -705,8 +693,7 @@ mod tests {
|
||||
UnionMode::Dense,
|
||||
)]);
|
||||
|
||||
let batch =
|
||||
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(union)]).unwrap();
|
||||
let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(union)]).unwrap();
|
||||
let table = pretty_format_batches(&[batch]).unwrap().to_string();
|
||||
let actual: Vec<&str> = table.lines().collect();
|
||||
let expected = vec![
|
||||
@@ -742,8 +729,7 @@ mod tests {
|
||||
UnionMode::Sparse,
|
||||
)]);
|
||||
|
||||
let batch =
|
||||
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(union)]).unwrap();
|
||||
let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(union)]).unwrap();
|
||||
let table = pretty_format_batches(&[batch]).unwrap().to_string();
|
||||
let actual: Vec<&str> = table.lines().collect();
|
||||
let expected = vec![
|
||||
@@ -799,8 +785,7 @@ mod tests {
|
||||
UnionMode::Sparse,
|
||||
)]);
|
||||
|
||||
let batch =
|
||||
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(outer)]).unwrap();
|
||||
let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(outer)]).unwrap();
|
||||
let table = pretty_format_batches(&[batch]).unwrap().to_string();
|
||||
let actual: Vec<&str> = table.lines().collect();
|
||||
let expected = vec![
|
||||
@@ -882,8 +867,7 @@ mod tests {
|
||||
let table = pretty_format_batches(&[batch]).unwrap().to_string();
|
||||
|
||||
let expected = vec![
|
||||
"+------+", "| f16 |", "+------+", "| NaN |", "| 4 |", "| -inf |",
|
||||
"+------+",
|
||||
"+------+", "| f16 |", "+------+", "| NaN |", "| 4 |", "| -inf |", "+------+",
|
||||
];
|
||||
|
||||
let actual: Vec<&str> = table.lines().collect();
|
||||
@@ -986,9 +970,7 @@ mod tests {
|
||||
fn test_format_options() {
|
||||
let options = FormatOptions::default().with_null("null");
|
||||
let array = Int32Array::from(vec![Some(1), Some(2), None, Some(3), Some(4)]);
|
||||
let batch =
|
||||
RecordBatch::try_from_iter([("my_column_name", Arc::new(array) as _)])
|
||||
.unwrap();
|
||||
let batch = RecordBatch::try_from_iter([("my_column_name", Arc::new(array) as _)]).unwrap();
|
||||
|
||||
let column = pretty_format_columns_with_options(
|
||||
"my_column_name",
|
||||
|
||||
+41
-102
@@ -292,8 +292,7 @@ impl Format {
|
||||
|
||||
let header_length = headers.len();
|
||||
// keep track of inferred field types
|
||||
let mut column_types: Vec<InferredDataType> =
|
||||
vec![Default::default(); header_length];
|
||||
let mut column_types: Vec<InferredDataType> = vec![Default::default(); header_length];
|
||||
|
||||
let mut records_count = 0;
|
||||
|
||||
@@ -307,9 +306,7 @@ impl Format {
|
||||
|
||||
// Note since we may be looking at a sample of the data, we make the safe assumption that
|
||||
// they could be nullable
|
||||
for (i, column_type) in
|
||||
column_types.iter_mut().enumerate().take(header_length)
|
||||
{
|
||||
for (i, column_type) in column_types.iter_mut().enumerate().take(header_length) {
|
||||
if let Some(string) = record.get(i) {
|
||||
if !self.null_regex.is_null(string) {
|
||||
column_type.update(string)
|
||||
@@ -606,8 +603,7 @@ impl Decoder {
|
||||
return Ok(bytes);
|
||||
}
|
||||
|
||||
let to_read =
|
||||
self.batch_size.min(self.end - self.line_number) - self.record_decoder.len();
|
||||
let to_read = self.batch_size.min(self.end - self.line_number) - self.record_decoder.len();
|
||||
let (_, bytes) = self.record_decoder.decode(buf, to_read)?;
|
||||
Ok(bytes)
|
||||
}
|
||||
@@ -662,29 +658,23 @@ fn parse(
|
||||
let i = *i;
|
||||
let field = &fields[i];
|
||||
match field.data_type() {
|
||||
DataType::Boolean => {
|
||||
build_boolean_array(line_number, rows, i, null_regex)
|
||||
}
|
||||
DataType::Decimal128(precision, scale) => {
|
||||
build_decimal_array::<Decimal128Type>(
|
||||
line_number,
|
||||
rows,
|
||||
i,
|
||||
*precision,
|
||||
*scale,
|
||||
null_regex,
|
||||
)
|
||||
}
|
||||
DataType::Decimal256(precision, scale) => {
|
||||
build_decimal_array::<Decimal256Type>(
|
||||
line_number,
|
||||
rows,
|
||||
i,
|
||||
*precision,
|
||||
*scale,
|
||||
null_regex,
|
||||
)
|
||||
}
|
||||
DataType::Boolean => build_boolean_array(line_number, rows, i, null_regex),
|
||||
DataType::Decimal128(precision, scale) => build_decimal_array::<Decimal128Type>(
|
||||
line_number,
|
||||
rows,
|
||||
i,
|
||||
*precision,
|
||||
*scale,
|
||||
null_regex,
|
||||
),
|
||||
DataType::Decimal256(precision, scale) => build_decimal_array::<Decimal256Type>(
|
||||
line_number,
|
||||
rows,
|
||||
i,
|
||||
*precision,
|
||||
*scale,
|
||||
null_regex,
|
||||
),
|
||||
DataType::Int8 => {
|
||||
build_primitive_array::<Int8Type>(line_number, rows, i, null_regex)
|
||||
}
|
||||
@@ -721,34 +711,17 @@ fn parse(
|
||||
DataType::Date64 => {
|
||||
build_primitive_array::<Date64Type>(line_number, rows, i, null_regex)
|
||||
}
|
||||
DataType::Time32(TimeUnit::Second) => build_primitive_array::<
|
||||
Time32SecondType,
|
||||
>(
|
||||
line_number, rows, i, null_regex
|
||||
),
|
||||
DataType::Time32(TimeUnit::Second) => {
|
||||
build_primitive_array::<Time32SecondType>(line_number, rows, i, null_regex)
|
||||
}
|
||||
DataType::Time32(TimeUnit::Millisecond) => {
|
||||
build_primitive_array::<Time32MillisecondType>(
|
||||
line_number,
|
||||
rows,
|
||||
i,
|
||||
null_regex,
|
||||
)
|
||||
build_primitive_array::<Time32MillisecondType>(line_number, rows, i, null_regex)
|
||||
}
|
||||
DataType::Time64(TimeUnit::Microsecond) => {
|
||||
build_primitive_array::<Time64MicrosecondType>(
|
||||
line_number,
|
||||
rows,
|
||||
i,
|
||||
null_regex,
|
||||
)
|
||||
build_primitive_array::<Time64MicrosecondType>(line_number, rows, i, null_regex)
|
||||
}
|
||||
DataType::Time64(TimeUnit::Nanosecond) => {
|
||||
build_primitive_array::<Time64NanosecondType>(
|
||||
line_number,
|
||||
rows,
|
||||
i,
|
||||
null_regex,
|
||||
)
|
||||
build_primitive_array::<Time64NanosecondType>(line_number, rows, i, null_regex)
|
||||
}
|
||||
DataType::Timestamp(TimeUnit::Second, tz) => {
|
||||
build_timestamp_array::<TimestampSecondType>(
|
||||
@@ -786,9 +759,7 @@ fn parse(
|
||||
null_regex,
|
||||
)
|
||||
}
|
||||
DataType::Null => {
|
||||
Ok(Arc::new(NullArray::builder(rows.len()).finish()) as ArrayRef)
|
||||
}
|
||||
DataType::Null => Ok(Arc::new(NullArray::builder(rows.len()).finish()) as ArrayRef),
|
||||
DataType::Utf8 => Ok(Arc::new(
|
||||
rows.iter()
|
||||
.map(|row| {
|
||||
@@ -853,8 +824,7 @@ fn parse(
|
||||
})
|
||||
.collect();
|
||||
|
||||
let projected_fields: Fields =
|
||||
projection.iter().map(|i| fields[*i].clone()).collect();
|
||||
let projected_fields: Fields = projection.iter().map(|i| fields[*i].clone()).collect();
|
||||
|
||||
let projected_schema = Arc::new(match metadata {
|
||||
None => Schema::new(projected_fields),
|
||||
@@ -898,8 +868,7 @@ fn build_decimal_array<T: DecimalType>(
|
||||
// append null
|
||||
decimal_builder.append_null();
|
||||
} else {
|
||||
let decimal_value: Result<T::Native, _> =
|
||||
parse_decimal::<T>(s, precision, scale);
|
||||
let decimal_value: Result<T::Native, _> = parse_decimal::<T>(s, precision, scale);
|
||||
match decimal_value {
|
||||
Ok(v) => {
|
||||
decimal_builder.append_value(v);
|
||||
@@ -957,22 +926,10 @@ fn build_timestamp_array<T: ArrowTimestampType>(
|
||||
Ok(Arc::new(match timezone {
|
||||
Some(timezone) => {
|
||||
let tz: Tz = timezone.parse()?;
|
||||
build_timestamp_array_impl::<T, _>(
|
||||
line_number,
|
||||
rows,
|
||||
col_idx,
|
||||
&tz,
|
||||
null_regex,
|
||||
)?
|
||||
.with_timezone(timezone)
|
||||
build_timestamp_array_impl::<T, _>(line_number, rows, col_idx, &tz, null_regex)?
|
||||
.with_timezone(timezone)
|
||||
}
|
||||
None => build_timestamp_array_impl::<T, _>(
|
||||
line_number,
|
||||
rows,
|
||||
col_idx,
|
||||
&Utc,
|
||||
null_regex,
|
||||
)?,
|
||||
None => build_timestamp_array_impl::<T, _>(line_number, rows, col_idx, &Utc, null_regex)?,
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -1169,10 +1126,7 @@ impl ReaderBuilder {
|
||||
}
|
||||
|
||||
/// Create a new `BufReader` from a buffered reader
|
||||
pub fn build_buffered<R: BufRead>(
|
||||
self,
|
||||
reader: R,
|
||||
) -> Result<BufReader<R>, ArrowError> {
|
||||
pub fn build_buffered<R: BufRead>(self, reader: R) -> Result<BufReader<R>, ArrowError> {
|
||||
Ok(BufReader {
|
||||
reader,
|
||||
decoder: self.build_decoder(),
|
||||
@@ -1318,8 +1272,7 @@ mod tests {
|
||||
Field::new("lng", DataType::Float64, false),
|
||||
]);
|
||||
|
||||
let file_with_headers =
|
||||
File::open("test/data/uk_cities_with_headers.csv").unwrap();
|
||||
let file_with_headers = File::open("test/data/uk_cities_with_headers.csv").unwrap();
|
||||
let file_without_headers = File::open("test/data/uk_cities.csv").unwrap();
|
||||
let both_files = file_with_headers
|
||||
.chain(Cursor::new("\n".to_string()))
|
||||
@@ -1642,8 +1595,7 @@ mod tests {
|
||||
schema.field(5).data_type()
|
||||
);
|
||||
|
||||
let names: Vec<&str> =
|
||||
schema.fields().iter().map(|x| x.name().as_str()).collect();
|
||||
let names: Vec<&str> = schema.fields().iter().map(|x| x.name().as_str()).collect();
|
||||
assert_eq!(
|
||||
names,
|
||||
vec![
|
||||
@@ -1819,16 +1771,11 @@ mod tests {
|
||||
-2203932304000
|
||||
);
|
||||
assert_eq!(
|
||||
Date64Type::parse_formatted("1900-02-28 12:34:56", "%Y-%m-%d %H:%M:%S")
|
||||
.unwrap(),
|
||||
Date64Type::parse_formatted("1900-02-28 12:34:56", "%Y-%m-%d %H:%M:%S").unwrap(),
|
||||
-2203932304000
|
||||
);
|
||||
assert_eq!(
|
||||
Date64Type::parse_formatted(
|
||||
"1900-02-28 12:34:56+0030",
|
||||
"%Y-%m-%d %H:%M:%S%z"
|
||||
)
|
||||
.unwrap(),
|
||||
Date64Type::parse_formatted("1900-02-28 12:34:56+0030", "%Y-%m-%d %H:%M:%S%z").unwrap(),
|
||||
-2203932304000 - (30 * 60 * 1000)
|
||||
);
|
||||
}
|
||||
@@ -1865,10 +1812,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_parse_timestamp() {
|
||||
test_parse_timestamp_impl::<TimestampNanosecondType>(
|
||||
None,
|
||||
&[0, 0, -7_200_000_000_000],
|
||||
);
|
||||
test_parse_timestamp_impl::<TimestampNanosecondType>(None, &[0, 0, -7_200_000_000_000]);
|
||||
test_parse_timestamp_impl::<TimestampNanosecondType>(
|
||||
Some("+00:00".into()),
|
||||
&[0, 0, -7_200_000_000_000],
|
||||
@@ -1885,10 +1829,7 @@ mod tests {
|
||||
Some("-03".into()),
|
||||
&[10_800_000, 0, -7_200_000],
|
||||
);
|
||||
test_parse_timestamp_impl::<TimestampSecondType>(
|
||||
Some("-03".into()),
|
||||
&[10_800, 0, -7_200],
|
||||
);
|
||||
test_parse_timestamp_impl::<TimestampSecondType>(Some("-03".into()), &[10_800, 0, -7_200]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -2227,10 +2168,8 @@ mod tests {
|
||||
expected_rows
|
||||
);
|
||||
|
||||
let buffered = std::io::BufReader::with_capacity(
|
||||
capacity,
|
||||
File::open(path).unwrap(),
|
||||
);
|
||||
let buffered =
|
||||
std::io::BufReader::with_capacity(capacity, File::open(path).unwrap());
|
||||
|
||||
let reader = ReaderBuilder::new(schema.clone())
|
||||
.with_batch_size(batch_size)
|
||||
|
||||
@@ -76,11 +76,7 @@ impl RecordDecoder {
|
||||
/// Decodes records from `input` returning the number of records and bytes read
|
||||
///
|
||||
/// Note: this expects to be called with an empty `input` to signal EOF
|
||||
pub fn decode(
|
||||
&mut self,
|
||||
input: &[u8],
|
||||
to_read: usize,
|
||||
) -> Result<(usize, usize), ArrowError> {
|
||||
pub fn decode(&mut self, input: &[u8], to_read: usize) -> Result<(usize, usize), ArrowError> {
|
||||
if to_read == 0 {
|
||||
return Ok((0, 0));
|
||||
}
|
||||
@@ -124,11 +120,17 @@ impl RecordDecoder {
|
||||
// Need to allocate more capacity
|
||||
ReadRecordResult::OutputFull => break,
|
||||
ReadRecordResult::OutputEndsFull => {
|
||||
return Err(ArrowError::CsvError(format!("incorrect number of fields for line {}, expected {} got more than {}", self.line_number, self.num_columns, self.current_field)));
|
||||
return Err(ArrowError::CsvError(format!(
|
||||
"incorrect number of fields for line {}, expected {} got more than {}",
|
||||
self.line_number, self.num_columns, self.current_field
|
||||
)));
|
||||
}
|
||||
ReadRecordResult::Record => {
|
||||
if self.current_field != self.num_columns {
|
||||
return Err(ArrowError::CsvError(format!("incorrect number of fields for line {}, expected {} got {}", self.line_number, self.num_columns, self.current_field)));
|
||||
return Err(ArrowError::CsvError(format!(
|
||||
"incorrect number of fields for line {}, expected {} got {}",
|
||||
self.line_number, self.num_columns, self.current_field
|
||||
)));
|
||||
}
|
||||
read += 1;
|
||||
self.current_field = 0;
|
||||
@@ -334,8 +336,7 @@ mod tests {
|
||||
let mut decoder = RecordDecoder::new(Reader::new(), 2);
|
||||
let err = decoder.decode(csv.as_bytes(), 4).unwrap_err().to_string();
|
||||
|
||||
let expected =
|
||||
"Csv error: incorrect number of fields for line 3, expected 2 got 1";
|
||||
let expected = "Csv error: incorrect number of fields for line 3, expected 2 got 1";
|
||||
|
||||
assert_eq!(err, expected);
|
||||
|
||||
|
||||
+13
-32
@@ -389,18 +389,12 @@ mod tests {
|
||||
"consectetur adipiscing elit",
|
||||
"sed do eiusmod tempor",
|
||||
]);
|
||||
let c2 = PrimitiveArray::<Float64Type>::from(vec![
|
||||
Some(123.564532),
|
||||
None,
|
||||
Some(-556132.25),
|
||||
]);
|
||||
let c2 =
|
||||
PrimitiveArray::<Float64Type>::from(vec![Some(123.564532), None, Some(-556132.25)]);
|
||||
let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
|
||||
let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);
|
||||
let c5 = TimestampMillisecondArray::from(vec![
|
||||
None,
|
||||
Some(1555584887378),
|
||||
Some(1555555555555),
|
||||
]);
|
||||
let c5 =
|
||||
TimestampMillisecondArray::from(vec![None, Some(1555584887378), Some(1555555555555)]);
|
||||
let c6 = Time32SecondArray::from(vec![1234, 24680, 85563]);
|
||||
let c7: DictionaryArray<Int32Type> =
|
||||
vec!["cupcakes", "cupcakes", "foo"].into_iter().collect();
|
||||
@@ -451,13 +445,11 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
|
||||
Field::new("c2", DataType::Decimal256(76, 6), true),
|
||||
]);
|
||||
|
||||
let mut c1_builder =
|
||||
Decimal128Builder::new().with_data_type(DataType::Decimal128(38, 6));
|
||||
let mut c1_builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(38, 6));
|
||||
c1_builder.extend(vec![Some(-3335724), Some(2179404), None, Some(290472)]);
|
||||
let c1 = c1_builder.finish();
|
||||
|
||||
let mut c2_builder =
|
||||
Decimal256Builder::new().with_data_type(DataType::Decimal256(76, 6));
|
||||
let mut c2_builder = Decimal256Builder::new().with_data_type(DataType::Decimal256(76, 6));
|
||||
c2_builder.extend(vec![
|
||||
Some(i256::from_i128(-3335724)),
|
||||
Some(i256::from_i128(2179404)),
|
||||
@@ -467,8 +459,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
|
||||
let c2 = c2_builder.finish();
|
||||
|
||||
let batch =
|
||||
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)])
|
||||
.unwrap();
|
||||
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)]).unwrap();
|
||||
|
||||
let mut file = tempfile::tempfile().unwrap();
|
||||
|
||||
@@ -512,11 +503,8 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
|
||||
"consectetur adipiscing elit",
|
||||
"sed do eiusmod tempor",
|
||||
]);
|
||||
let c2 = PrimitiveArray::<Float64Type>::from(vec![
|
||||
Some(123.564532),
|
||||
None,
|
||||
Some(-556132.25),
|
||||
]);
|
||||
let c2 =
|
||||
PrimitiveArray::<Float64Type>::from(vec![Some(123.564532), None, Some(-556132.25)]);
|
||||
let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
|
||||
let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);
|
||||
let c6 = Time32SecondArray::from(vec![1234, 24680, 85563]);
|
||||
@@ -629,8 +617,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
|
||||
let c0 = UInt32Array::from(vec![Some(123), Some(234)]);
|
||||
let c1 = Date64Array::from(vec![Some(1926632005177), Some(1926632005177685347)]);
|
||||
let batch =
|
||||
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c0), Arc::new(c1)])
|
||||
.unwrap();
|
||||
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c0), Arc::new(c1)]).unwrap();
|
||||
|
||||
let mut file = tempfile::tempfile().unwrap();
|
||||
let mut writer = Writer::new(&mut file);
|
||||
@@ -656,15 +643,9 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
|
||||
Field::new("c4", DataType::Time32(TimeUnit::Second), false),
|
||||
]);
|
||||
|
||||
let c1 = TimestampMillisecondArray::from(vec![
|
||||
Some(1555584887378),
|
||||
Some(1635577147000),
|
||||
])
|
||||
.with_timezone("+00:00".to_string());
|
||||
let c2 = TimestampMillisecondArray::from(vec![
|
||||
Some(1555584887378),
|
||||
Some(1635577147000),
|
||||
]);
|
||||
let c1 = TimestampMillisecondArray::from(vec![Some(1555584887378), Some(1635577147000)])
|
||||
.with_timezone("+00:00".to_string());
|
||||
let c2 = TimestampMillisecondArray::from(vec![Some(1555584887378), Some(1635577147000)]);
|
||||
let c3 = Date32Array::from(vec![3, 2]);
|
||||
let c4 = Time32SecondArray::from(vec![1234, 24680]);
|
||||
|
||||
|
||||
+61
-86
@@ -42,9 +42,7 @@ pub(crate) fn contains_nulls(
|
||||
) -> bool {
|
||||
match null_bit_buffer {
|
||||
Some(buffer) => {
|
||||
match BitSliceIterator::new(buffer.validity(), buffer.offset() + offset, len)
|
||||
.next()
|
||||
{
|
||||
match BitSliceIterator::new(buffer.validity(), buffer.offset() + offset, len).next() {
|
||||
Some((start, end)) => start != 0 || end != len,
|
||||
None => len != 0, // No non-null values
|
||||
}
|
||||
@@ -130,9 +128,9 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuff
|
||||
MutableBuffer::new(capacity * k.primitive_width().unwrap()),
|
||||
empty_buffer,
|
||||
],
|
||||
DataType::FixedSizeList(_, _)
|
||||
| DataType::Struct(_)
|
||||
| DataType::RunEndEncoded(_, _) => [empty_buffer, MutableBuffer::new(0)],
|
||||
DataType::FixedSizeList(_, _) | DataType::Struct(_) | DataType::RunEndEncoded(_, _) => {
|
||||
[empty_buffer, MutableBuffer::new(0)]
|
||||
}
|
||||
DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => [
|
||||
MutableBuffer::new(capacity * mem::size_of::<u8>()),
|
||||
empty_buffer,
|
||||
@@ -159,10 +157,9 @@ pub(crate) fn into_buffers(
|
||||
) -> Vec<Buffer> {
|
||||
match data_type {
|
||||
DataType::Null | DataType::Struct(_) | DataType::FixedSizeList(_, _) => vec![],
|
||||
DataType::Utf8
|
||||
| DataType::Binary
|
||||
| DataType::LargeUtf8
|
||||
| DataType::LargeBinary => vec![buffer1.into(), buffer2.into()],
|
||||
DataType::Utf8 | DataType::Binary | DataType::LargeUtf8 | DataType::LargeBinary => {
|
||||
vec![buffer1.into(), buffer2.into()]
|
||||
}
|
||||
DataType::Union(_, mode) => {
|
||||
match mode {
|
||||
// Based on Union's DataTypeLayout
|
||||
@@ -452,12 +449,11 @@ impl ArrayData {
|
||||
for spec in layout.buffers.iter() {
|
||||
match spec {
|
||||
BufferSpec::FixedWidth { byte_width, .. } => {
|
||||
let buffer_size =
|
||||
self.len.checked_mul(*byte_width).ok_or_else(|| {
|
||||
ArrowError::ComputeError(
|
||||
"Integer overflow computing buffer size".to_string(),
|
||||
)
|
||||
})?;
|
||||
let buffer_size = self.len.checked_mul(*byte_width).ok_or_else(|| {
|
||||
ArrowError::ComputeError(
|
||||
"Integer overflow computing buffer size".to_string(),
|
||||
)
|
||||
})?;
|
||||
result += buffer_size;
|
||||
}
|
||||
BufferSpec::VariableWidth => {
|
||||
@@ -590,9 +586,7 @@ impl ArrayData {
|
||||
DataType::LargeBinary | DataType::LargeUtf8 => {
|
||||
(vec![zeroed((len + 1) * 8), zeroed(0)], vec![], true)
|
||||
}
|
||||
DataType::FixedSizeBinary(i) => {
|
||||
(vec![zeroed(*i as usize * len)], vec![], true)
|
||||
}
|
||||
DataType::FixedSizeBinary(i) => (vec![zeroed(*i as usize * len)], vec![], true),
|
||||
DataType::List(f) | DataType::Map(f, _) => (
|
||||
vec![zeroed((len + 1) * 4)],
|
||||
vec![ArrayData::new_empty(f.data_type())],
|
||||
@@ -749,9 +743,7 @@ impl ArrayData {
|
||||
)));
|
||||
}
|
||||
|
||||
for (i, (buffer, spec)) in
|
||||
self.buffers.iter().zip(layout.buffers.iter()).enumerate()
|
||||
{
|
||||
for (i, (buffer, spec)) in self.buffers.iter().zip(layout.buffers.iter()).enumerate() {
|
||||
match spec {
|
||||
BufferSpec::FixedWidth {
|
||||
byte_width,
|
||||
@@ -999,10 +991,8 @@ impl ArrayData {
|
||||
}
|
||||
DataType::RunEndEncoded(run_ends_field, values_field) => {
|
||||
self.validate_num_child_data(2)?;
|
||||
let run_ends_data =
|
||||
self.get_valid_child_data(0, run_ends_field.data_type())?;
|
||||
let values_data =
|
||||
self.get_valid_child_data(1, values_field.data_type())?;
|
||||
let run_ends_data = self.get_valid_child_data(0, run_ends_field.data_type())?;
|
||||
let values_data = self.get_valid_child_data(1, values_field.data_type())?;
|
||||
if run_ends_data.len != values_data.len {
|
||||
return Err(ArrowError::InvalidArgumentError(format!(
|
||||
"The run_ends array length should be the same as values array length. Run_ends array length is {}, values array length is {}",
|
||||
@@ -1022,9 +1012,7 @@ impl ArrayData {
|
||||
for (i, (_, field)) in fields.iter().enumerate() {
|
||||
let field_data = self.get_valid_child_data(i, field.data_type())?;
|
||||
|
||||
if mode == &UnionMode::Sparse
|
||||
&& field_data.len < (self.len + self.offset)
|
||||
{
|
||||
if mode == &UnionMode::Sparse && field_data.len < (self.len + self.offset) {
|
||||
return Err(ArrowError::InvalidArgumentError(format!(
|
||||
"Sparse union child array #{} has length smaller than expected for union array ({} < {})",
|
||||
i, field_data.len, self.len + self.offset
|
||||
@@ -1083,14 +1071,14 @@ impl ArrayData {
|
||||
i: usize,
|
||||
expected_type: &DataType,
|
||||
) -> Result<&ArrayData, ArrowError> {
|
||||
let values_data = self.child_data
|
||||
.get(i)
|
||||
.ok_or_else(|| {
|
||||
ArrowError::InvalidArgumentError(format!(
|
||||
"{} did not have enough child arrays. Expected at least {} but had only {}",
|
||||
self.data_type, i+1, self.child_data.len()
|
||||
))
|
||||
})?;
|
||||
let values_data = self.child_data.get(i).ok_or_else(|| {
|
||||
ArrowError::InvalidArgumentError(format!(
|
||||
"{} did not have enough child arrays. Expected at least {} but had only {}",
|
||||
self.data_type,
|
||||
i + 1,
|
||||
self.child_data.len()
|
||||
))
|
||||
})?;
|
||||
|
||||
if expected_type != &values_data.data_type {
|
||||
return Err(ArrowError::InvalidArgumentError(format!(
|
||||
@@ -1160,7 +1148,8 @@ impl ArrayData {
|
||||
if actual != nulls.null_count() {
|
||||
return Err(ArrowError::InvalidArgumentError(format!(
|
||||
"null_count value ({}) doesn't match actual number of nulls in array ({})",
|
||||
nulls.null_count(), actual
|
||||
nulls.null_count(),
|
||||
actual
|
||||
)));
|
||||
}
|
||||
}
|
||||
@@ -1209,23 +1198,22 @@ impl ArrayData {
|
||||
) -> Result<(), ArrowError> {
|
||||
let mask = match mask {
|
||||
Some(mask) => mask,
|
||||
None => return match child.null_count() {
|
||||
0 => Ok(()),
|
||||
_ => Err(ArrowError::InvalidArgumentError(format!(
|
||||
"non-nullable child of type {} contains nulls not present in parent {}",
|
||||
child.data_type,
|
||||
self.data_type
|
||||
))),
|
||||
},
|
||||
None => {
|
||||
return match child.null_count() {
|
||||
0 => Ok(()),
|
||||
_ => Err(ArrowError::InvalidArgumentError(format!(
|
||||
"non-nullable child of type {} contains nulls not present in parent {}",
|
||||
child.data_type, self.data_type
|
||||
))),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
match child.nulls() {
|
||||
Some(nulls) if !mask.contains(nulls) => {
|
||||
Err(ArrowError::InvalidArgumentError(format!(
|
||||
"non-nullable child of type {} contains nulls not present in parent",
|
||||
child.data_type
|
||||
)))
|
||||
}
|
||||
Some(nulls) if !mask.contains(nulls) => Err(ArrowError::InvalidArgumentError(format!(
|
||||
"non-nullable child of type {} contains nulls not present in parent",
|
||||
child.data_type
|
||||
))),
|
||||
_ => Ok(()),
|
||||
}
|
||||
}
|
||||
@@ -1240,9 +1228,7 @@ impl ArrayData {
|
||||
DataType::Utf8 => self.validate_utf8::<i32>(),
|
||||
DataType::LargeUtf8 => self.validate_utf8::<i64>(),
|
||||
DataType::Binary => self.validate_offsets_full::<i32>(self.buffers[1].len()),
|
||||
DataType::LargeBinary => {
|
||||
self.validate_offsets_full::<i64>(self.buffers[1].len())
|
||||
}
|
||||
DataType::LargeBinary => self.validate_offsets_full::<i64>(self.buffers[1].len()),
|
||||
DataType::List(_) | DataType::Map(_, _) => {
|
||||
let child = &self.child_data[0];
|
||||
self.validate_offsets_full::<i32>(child.len)
|
||||
@@ -1300,11 +1286,7 @@ impl ArrayData {
|
||||
///
|
||||
/// For example, the offsets buffer contained `[1, 2, 4]`, this
|
||||
/// function would call `validate([1,2])`, and `validate([2,4])`
|
||||
fn validate_each_offset<T, V>(
|
||||
&self,
|
||||
offset_limit: usize,
|
||||
validate: V,
|
||||
) -> Result<(), ArrowError>
|
||||
fn validate_each_offset<T, V>(&self, offset_limit: usize, validate: V) -> Result<(), ArrowError>
|
||||
where
|
||||
T: ArrowNativeType + TryInto<usize> + num::Num + std::fmt::Display,
|
||||
V: Fn(usize, Range<usize>) -> Result<(), ArrowError>,
|
||||
@@ -1358,32 +1340,26 @@ impl ArrayData {
|
||||
let values_buffer = &self.buffers[1].as_slice();
|
||||
if let Ok(values_str) = std::str::from_utf8(values_buffer) {
|
||||
// Validate Offsets are correct
|
||||
self.validate_each_offset::<T, _>(
|
||||
values_buffer.len(),
|
||||
|string_index, range| {
|
||||
if !values_str.is_char_boundary(range.start)
|
||||
|| !values_str.is_char_boundary(range.end)
|
||||
{
|
||||
return Err(ArrowError::InvalidArgumentError(format!(
|
||||
"incomplete utf-8 byte sequence from index {string_index}"
|
||||
)));
|
||||
}
|
||||
Ok(())
|
||||
},
|
||||
)
|
||||
self.validate_each_offset::<T, _>(values_buffer.len(), |string_index, range| {
|
||||
if !values_str.is_char_boundary(range.start)
|
||||
|| !values_str.is_char_boundary(range.end)
|
||||
{
|
||||
return Err(ArrowError::InvalidArgumentError(format!(
|
||||
"incomplete utf-8 byte sequence from index {string_index}"
|
||||
)));
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
} else {
|
||||
// find specific offset that failed utf8 validation
|
||||
self.validate_each_offset::<T, _>(
|
||||
values_buffer.len(),
|
||||
|string_index, range| {
|
||||
std::str::from_utf8(&values_buffer[range.clone()]).map_err(|e| {
|
||||
ArrowError::InvalidArgumentError(format!(
|
||||
"Invalid UTF8 sequence at string index {string_index} ({range:?}): {e}"
|
||||
))
|
||||
})?;
|
||||
Ok(())
|
||||
},
|
||||
)
|
||||
self.validate_each_offset::<T, _>(values_buffer.len(), |string_index, range| {
|
||||
std::str::from_utf8(&values_buffer[range.clone()]).map_err(|e| {
|
||||
ArrowError::InvalidArgumentError(format!(
|
||||
"Invalid UTF8 sequence at string index {string_index} ({range:?}): {e}"
|
||||
))
|
||||
})?;
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1414,8 +1390,7 @@ impl ArrayData {
|
||||
assert!(buffer.len() / mem::size_of::<T>() >= required_len);
|
||||
|
||||
// Justification: buffer size was validated above
|
||||
let indexes: &[T] =
|
||||
&buffer.typed_data::<T>()[self.offset..self.offset + self.len];
|
||||
let indexes: &[T] = &buffer.typed_data::<T>()[self.offset..self.offset + self.len];
|
||||
|
||||
indexes.iter().enumerate().try_for_each(|(i, &dict_index)| {
|
||||
// Do not check the value is null (value can be arbitrary)
|
||||
|
||||
+372
-375
File diff suppressed because it is too large
Load Diff
@@ -78,11 +78,10 @@ pub(super) fn boolean_equal(
|
||||
// get a ref of the null buffer bytes, to use in testing for nullness
|
||||
let lhs_nulls = lhs.nulls().unwrap();
|
||||
|
||||
BitIndexIterator::new(lhs_nulls.validity(), lhs_start + lhs_nulls.offset(), len)
|
||||
.all(|i| {
|
||||
let lhs_pos = lhs_start + lhs.offset() + i;
|
||||
let rhs_pos = rhs_start + rhs.offset() + i;
|
||||
get_bit(lhs_values, lhs_pos) == get_bit(rhs_values, rhs_pos)
|
||||
})
|
||||
BitIndexIterator::new(lhs_nulls.validity(), lhs_start + lhs_nulls.offset(), len).all(|i| {
|
||||
let lhs_pos = lhs_start + lhs.offset() + i;
|
||||
let rhs_pos = rhs_start + rhs.offset() + i;
|
||||
get_bit(lhs_values, lhs_pos) == get_bit(rhs_values, rhs_pos)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -75,20 +75,15 @@ pub(super) fn fixed_binary_equal(
|
||||
})
|
||||
} else {
|
||||
let lhs_nulls = lhs.nulls().unwrap();
|
||||
let lhs_slices_iter = BitSliceIterator::new(
|
||||
lhs_nulls.validity(),
|
||||
lhs_start + lhs_nulls.offset(),
|
||||
len,
|
||||
);
|
||||
let lhs_slices_iter =
|
||||
BitSliceIterator::new(lhs_nulls.validity(), lhs_start + lhs_nulls.offset(), len);
|
||||
let rhs_nulls = rhs.nulls().unwrap();
|
||||
let rhs_slices_iter = BitSliceIterator::new(
|
||||
rhs_nulls.validity(),
|
||||
rhs_start + rhs_nulls.offset(),
|
||||
len,
|
||||
);
|
||||
let rhs_slices_iter =
|
||||
BitSliceIterator::new(rhs_nulls.validity(), rhs_start + rhs_nulls.offset(), len);
|
||||
|
||||
lhs_slices_iter.zip(rhs_slices_iter).all(
|
||||
|((l_start, l_end), (r_start, r_end))| {
|
||||
lhs_slices_iter
|
||||
.zip(rhs_slices_iter)
|
||||
.all(|((l_start, l_end), (r_start, r_end))| {
|
||||
l_start == r_start
|
||||
&& l_end == r_end
|
||||
&& equal_len(
|
||||
@@ -98,8 +93,7 @@ pub(super) fn fixed_binary_equal(
|
||||
(rhs_start + r_start) * size,
|
||||
(l_end - l_start) * size,
|
||||
)
|
||||
},
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+13
-39
@@ -76,24 +76,16 @@ fn equal_values(
|
||||
DataType::Int64 => primitive_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
|
||||
DataType::Float32 => primitive_equal::<f32>(lhs, rhs, lhs_start, rhs_start, len),
|
||||
DataType::Float64 => primitive_equal::<f64>(lhs, rhs, lhs_start, rhs_start, len),
|
||||
DataType::Decimal128(_, _) => {
|
||||
primitive_equal::<i128>(lhs, rhs, lhs_start, rhs_start, len)
|
||||
}
|
||||
DataType::Decimal256(_, _) => {
|
||||
primitive_equal::<i256>(lhs, rhs, lhs_start, rhs_start, len)
|
||||
}
|
||||
DataType::Date32
|
||||
| DataType::Time32(_)
|
||||
| DataType::Interval(IntervalUnit::YearMonth) => {
|
||||
DataType::Decimal128(_, _) => primitive_equal::<i128>(lhs, rhs, lhs_start, rhs_start, len),
|
||||
DataType::Decimal256(_, _) => primitive_equal::<i256>(lhs, rhs, lhs_start, rhs_start, len),
|
||||
DataType::Date32 | DataType::Time32(_) | DataType::Interval(IntervalUnit::YearMonth) => {
|
||||
primitive_equal::<i32>(lhs, rhs, lhs_start, rhs_start, len)
|
||||
}
|
||||
DataType::Date64
|
||||
| DataType::Interval(IntervalUnit::DayTime)
|
||||
| DataType::Time64(_)
|
||||
| DataType::Timestamp(_, _)
|
||||
| DataType::Duration(_) => {
|
||||
primitive_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len)
|
||||
}
|
||||
| DataType::Duration(_) => primitive_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
|
||||
DataType::Interval(IntervalUnit::MonthDayNano) => {
|
||||
primitive_equal::<i128>(lhs, rhs, lhs_start, rhs_start, len)
|
||||
}
|
||||
@@ -103,39 +95,21 @@ fn equal_values(
|
||||
DataType::LargeUtf8 | DataType::LargeBinary => {
|
||||
variable_sized_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len)
|
||||
}
|
||||
DataType::FixedSizeBinary(_) => {
|
||||
fixed_binary_equal(lhs, rhs, lhs_start, rhs_start, len)
|
||||
}
|
||||
DataType::FixedSizeBinary(_) => fixed_binary_equal(lhs, rhs, lhs_start, rhs_start, len),
|
||||
DataType::List(_) => list_equal::<i32>(lhs, rhs, lhs_start, rhs_start, len),
|
||||
DataType::LargeList(_) => list_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
|
||||
DataType::FixedSizeList(_, _) => {
|
||||
fixed_list_equal(lhs, rhs, lhs_start, rhs_start, len)
|
||||
}
|
||||
DataType::FixedSizeList(_, _) => fixed_list_equal(lhs, rhs, lhs_start, rhs_start, len),
|
||||
DataType::Struct(_) => struct_equal(lhs, rhs, lhs_start, rhs_start, len),
|
||||
DataType::Union(_, _) => union_equal(lhs, rhs, lhs_start, rhs_start, len),
|
||||
DataType::Dictionary(data_type, _) => match data_type.as_ref() {
|
||||
DataType::Int8 => dictionary_equal::<i8>(lhs, rhs, lhs_start, rhs_start, len),
|
||||
DataType::Int16 => {
|
||||
dictionary_equal::<i16>(lhs, rhs, lhs_start, rhs_start, len)
|
||||
}
|
||||
DataType::Int32 => {
|
||||
dictionary_equal::<i32>(lhs, rhs, lhs_start, rhs_start, len)
|
||||
}
|
||||
DataType::Int64 => {
|
||||
dictionary_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len)
|
||||
}
|
||||
DataType::UInt8 => {
|
||||
dictionary_equal::<u8>(lhs, rhs, lhs_start, rhs_start, len)
|
||||
}
|
||||
DataType::UInt16 => {
|
||||
dictionary_equal::<u16>(lhs, rhs, lhs_start, rhs_start, len)
|
||||
}
|
||||
DataType::UInt32 => {
|
||||
dictionary_equal::<u32>(lhs, rhs, lhs_start, rhs_start, len)
|
||||
}
|
||||
DataType::UInt64 => {
|
||||
dictionary_equal::<u64>(lhs, rhs, lhs_start, rhs_start, len)
|
||||
}
|
||||
DataType::Int16 => dictionary_equal::<i16>(lhs, rhs, lhs_start, rhs_start, len),
|
||||
DataType::Int32 => dictionary_equal::<i32>(lhs, rhs, lhs_start, rhs_start, len),
|
||||
DataType::Int64 => dictionary_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
|
||||
DataType::UInt8 => dictionary_equal::<u8>(lhs, rhs, lhs_start, rhs_start, len),
|
||||
DataType::UInt16 => dictionary_equal::<u16>(lhs, rhs, lhs_start, rhs_start, len),
|
||||
DataType::UInt32 => dictionary_equal::<u32>(lhs, rhs, lhs_start, rhs_start, len),
|
||||
DataType::UInt64 => dictionary_equal::<u64>(lhs, rhs, lhs_start, rhs_start, len),
|
||||
_ => unreachable!(),
|
||||
},
|
||||
DataType::Float16 => primitive_equal::<f16>(lhs, rhs, lhs_start, rhs_start, len),
|
||||
|
||||
@@ -73,20 +73,15 @@ pub(super) fn primitive_equal<T>(
|
||||
})
|
||||
} else {
|
||||
let lhs_nulls = lhs.nulls().unwrap();
|
||||
let lhs_slices_iter = BitSliceIterator::new(
|
||||
lhs_nulls.validity(),
|
||||
lhs_start + lhs_nulls.offset(),
|
||||
len,
|
||||
);
|
||||
let lhs_slices_iter =
|
||||
BitSliceIterator::new(lhs_nulls.validity(), lhs_start + lhs_nulls.offset(), len);
|
||||
let rhs_nulls = rhs.nulls().unwrap();
|
||||
let rhs_slices_iter = BitSliceIterator::new(
|
||||
rhs_nulls.validity(),
|
||||
rhs_start + rhs_nulls.offset(),
|
||||
len,
|
||||
);
|
||||
let rhs_slices_iter =
|
||||
BitSliceIterator::new(rhs_nulls.validity(), rhs_start + rhs_nulls.offset(), len);
|
||||
|
||||
lhs_slices_iter.zip(rhs_slices_iter).all(
|
||||
|((l_start, l_end), (r_start, r_end))| {
|
||||
lhs_slices_iter
|
||||
.zip(rhs_slices_iter)
|
||||
.all(|((l_start, l_end), (r_start, r_end))| {
|
||||
l_start == r_start
|
||||
&& l_end == r_end
|
||||
&& equal_len(
|
||||
@@ -96,8 +91,7 @@ pub(super) fn primitive_equal<T>(
|
||||
(rhs_start + r_start) * byte_width,
|
||||
(l_end - l_start) * byte_width,
|
||||
)
|
||||
},
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -116,10 +116,7 @@ pub(super) fn union_equal(
|
||||
rhs_fields,
|
||||
)
|
||||
}
|
||||
(
|
||||
DataType::Union(_, UnionMode::Sparse),
|
||||
DataType::Union(_, UnionMode::Sparse),
|
||||
) => {
|
||||
(DataType::Union(_, UnionMode::Sparse), DataType::Union(_, UnionMode::Sparse)) => {
|
||||
lhs_type_id_range == rhs_type_id_range
|
||||
&& equal_sparse(lhs, rhs, lhs_start, rhs_start, len)
|
||||
}
|
||||
|
||||
@@ -73,11 +73,9 @@ pub(super) fn base_equal(lhs: &ArrayData, rhs: &ArrayData) -> bool {
|
||||
let r_value_field = r_fields.get(1).unwrap();
|
||||
|
||||
// We don't enforce the equality of field names
|
||||
let data_type_equal = l_key_field.data_type()
|
||||
== r_key_field.data_type()
|
||||
let data_type_equal = l_key_field.data_type() == r_key_field.data_type()
|
||||
&& l_value_field.data_type() == r_value_field.data_type();
|
||||
let nullability_equal = l_key_field.is_nullable()
|
||||
== r_key_field.is_nullable()
|
||||
let nullability_equal = l_key_field.is_nullable() == r_key_field.is_nullable()
|
||||
&& l_value_field.is_nullable() == r_value_field.is_nullable();
|
||||
let metadata_equal = l_key_field.metadata() == r_key_field.metadata()
|
||||
&& l_value_field.metadata() == r_value_field.metadata();
|
||||
|
||||
@@ -23,9 +23,7 @@ use crate::ArrayData;
|
||||
use arrow_buffer::ArrowNativeType;
|
||||
use num::{CheckedAdd, Integer};
|
||||
|
||||
pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(
|
||||
array: &ArrayData,
|
||||
) -> Extend {
|
||||
pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(array: &ArrayData) -> Extend {
|
||||
let offsets = array.buffer::<T>(0);
|
||||
Box::new(
|
||||
move |mutable: &mut _MutableArrayData, index: usize, start: usize, len: usize| {
|
||||
@@ -35,11 +33,7 @@ pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(
|
||||
let last_offset: T = unsafe { get_last_offset(offset_buffer) };
|
||||
|
||||
// offsets
|
||||
extend_offsets::<T>(
|
||||
offset_buffer,
|
||||
last_offset,
|
||||
&offsets[start..start + len + 1],
|
||||
);
|
||||
extend_offsets::<T>(offset_buffer, last_offset, &offsets[start..start + len + 1]);
|
||||
|
||||
mutable.child_data[0].extend(
|
||||
index,
|
||||
@@ -50,10 +44,7 @@ pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(
|
||||
)
|
||||
}
|
||||
|
||||
pub(super) fn extend_nulls<T: ArrowNativeType>(
|
||||
mutable: &mut _MutableArrayData,
|
||||
len: usize,
|
||||
) {
|
||||
pub(super) fn extend_nulls<T: ArrowNativeType>(mutable: &mut _MutableArrayData, len: usize) {
|
||||
let offset_buffer = &mut mutable.buffer1;
|
||||
|
||||
// this is safe due to how offset is built. See details on `get_last_offset`
|
||||
|
||||
@@ -173,11 +173,7 @@ impl<'a> std::fmt::Debug for MutableArrayData<'a> {
|
||||
/// Builds an extend that adds `offset` to the source primitive
|
||||
/// Additionally validates that `max` fits into the
|
||||
/// the underlying primitive returning None if not
|
||||
fn build_extend_dictionary(
|
||||
array: &ArrayData,
|
||||
offset: usize,
|
||||
max: usize,
|
||||
) -> Option<Extend> {
|
||||
fn build_extend_dictionary(array: &ArrayData, offset: usize, max: usize) -> Option<Extend> {
|
||||
macro_rules! validate_and_build {
|
||||
($dt: ty) => {{
|
||||
let _: $dt = max.try_into().ok()?;
|
||||
@@ -215,27 +211,19 @@ fn build_extend(array: &ArrayData) -> Extend {
|
||||
DataType::Int64 => primitive::build_extend::<i64>(array),
|
||||
DataType::Float32 => primitive::build_extend::<f32>(array),
|
||||
DataType::Float64 => primitive::build_extend::<f64>(array),
|
||||
DataType::Date32
|
||||
| DataType::Time32(_)
|
||||
| DataType::Interval(IntervalUnit::YearMonth) => {
|
||||
DataType::Date32 | DataType::Time32(_) | DataType::Interval(IntervalUnit::YearMonth) => {
|
||||
primitive::build_extend::<i32>(array)
|
||||
}
|
||||
DataType::Date64
|
||||
| DataType::Time64(_)
|
||||
| DataType::Timestamp(_, _)
|
||||
| DataType::Duration(_)
|
||||
| DataType::Interval(IntervalUnit::DayTime) => {
|
||||
primitive::build_extend::<i64>(array)
|
||||
}
|
||||
DataType::Interval(IntervalUnit::MonthDayNano) => {
|
||||
primitive::build_extend::<i128>(array)
|
||||
}
|
||||
| DataType::Interval(IntervalUnit::DayTime) => primitive::build_extend::<i64>(array),
|
||||
DataType::Interval(IntervalUnit::MonthDayNano) => primitive::build_extend::<i128>(array),
|
||||
DataType::Decimal128(_, _) => primitive::build_extend::<i128>(array),
|
||||
DataType::Decimal256(_, _) => primitive::build_extend::<i256>(array),
|
||||
DataType::Utf8 | DataType::Binary => variable_size::build_extend::<i32>(array),
|
||||
DataType::LargeUtf8 | DataType::LargeBinary => {
|
||||
variable_size::build_extend::<i64>(array)
|
||||
}
|
||||
DataType::LargeUtf8 | DataType::LargeBinary => variable_size::build_extend::<i64>(array),
|
||||
DataType::Map(_, _) | DataType::List(_) => list::build_extend::<i32>(array),
|
||||
DataType::LargeList(_) => list::build_extend::<i64>(array),
|
||||
DataType::Dictionary(_, _) => unreachable!("should use build_extend_dictionary"),
|
||||
@@ -265,9 +253,9 @@ fn build_extend_nulls(data_type: &DataType) -> ExtendNulls {
|
||||
DataType::Int64 => primitive::extend_nulls::<i64>,
|
||||
DataType::Float32 => primitive::extend_nulls::<f32>,
|
||||
DataType::Float64 => primitive::extend_nulls::<f64>,
|
||||
DataType::Date32
|
||||
| DataType::Time32(_)
|
||||
| DataType::Interval(IntervalUnit::YearMonth) => primitive::extend_nulls::<i32>,
|
||||
DataType::Date32 | DataType::Time32(_) | DataType::Interval(IntervalUnit::YearMonth) => {
|
||||
primitive::extend_nulls::<i32>
|
||||
}
|
||||
DataType::Date64
|
||||
| DataType::Time64(_)
|
||||
| DataType::Timestamp(_, _)
|
||||
@@ -380,10 +368,7 @@ impl<'a> MutableArrayData<'a> {
|
||||
array_capacity = *capacity;
|
||||
preallocate_offset_and_binary_buffer::<i64>(*capacity, *value_cap)
|
||||
}
|
||||
(
|
||||
DataType::Utf8 | DataType::Binary,
|
||||
Capacities::Binary(capacity, Some(value_cap)),
|
||||
) => {
|
||||
(DataType::Utf8 | DataType::Binary, Capacities::Binary(capacity, Some(value_cap))) => {
|
||||
array_capacity = *capacity;
|
||||
preallocate_offset_and_binary_buffer::<i32>(*capacity, *value_cap)
|
||||
}
|
||||
@@ -391,10 +376,7 @@ impl<'a> MutableArrayData<'a> {
|
||||
array_capacity = *capacity;
|
||||
new_buffers(data_type, *capacity)
|
||||
}
|
||||
(
|
||||
DataType::List(_) | DataType::LargeList(_),
|
||||
Capacities::List(capacity, _),
|
||||
) => {
|
||||
(DataType::List(_) | DataType::LargeList(_), Capacities::List(capacity, _)) => {
|
||||
array_capacity = *capacity;
|
||||
new_buffers(data_type, *capacity)
|
||||
}
|
||||
@@ -435,16 +417,15 @@ impl<'a> MutableArrayData<'a> {
|
||||
.map(|array| &array.child_data()[0])
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let capacities = if let Capacities::List(capacity, ref child_capacities) =
|
||||
capacities
|
||||
{
|
||||
child_capacities
|
||||
.clone()
|
||||
.map(|c| *c)
|
||||
.unwrap_or(Capacities::Array(capacity))
|
||||
} else {
|
||||
Capacities::Array(array_capacity)
|
||||
};
|
||||
let capacities =
|
||||
if let Capacities::List(capacity, ref child_capacities) = capacities {
|
||||
child_capacities
|
||||
.clone()
|
||||
.map(|c| *c)
|
||||
.unwrap_or(Capacities::Array(capacity))
|
||||
} else {
|
||||
Capacities::Array(array_capacity)
|
||||
};
|
||||
|
||||
vec![MutableArrayData::with_capacities(
|
||||
children, use_nulls, capacities,
|
||||
@@ -546,8 +527,7 @@ impl<'a> MutableArrayData<'a> {
|
||||
.collect();
|
||||
let capacity = lengths.iter().sum();
|
||||
|
||||
let mut mutable =
|
||||
MutableArrayData::new(dictionaries, false, capacity);
|
||||
let mut mutable = MutableArrayData::new(dictionaries, false, capacity);
|
||||
|
||||
for (i, len) in lengths.iter().enumerate() {
|
||||
mutable.extend(i, 0, *len)
|
||||
|
||||
@@ -47,9 +47,6 @@ where
|
||||
)
|
||||
}
|
||||
|
||||
pub(super) fn extend_nulls<T: ArrowNativeType>(
|
||||
mutable: &mut _MutableArrayData,
|
||||
len: usize,
|
||||
) {
|
||||
pub(super) fn extend_nulls<T: ArrowNativeType>(mutable: &mut _MutableArrayData, len: usize) {
|
||||
mutable.buffer1.extend_zeros(len * size_of::<T>());
|
||||
}
|
||||
|
||||
@@ -45,9 +45,7 @@ pub(super) fn extend_offsets<T: ArrowNativeType + Integer + CheckedAdd>(
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(super) unsafe fn get_last_offset<T: ArrowNativeType>(
|
||||
offset_buffer: &MutableBuffer,
|
||||
) -> T {
|
||||
pub(super) unsafe fn get_last_offset<T: ArrowNativeType>(offset_buffer: &MutableBuffer) -> T {
|
||||
// JUSTIFICATION
|
||||
// Benefit
|
||||
// 20% performance improvement extend of variable sized arrays (see bench `mutable_array`)
|
||||
|
||||
@@ -39,9 +39,7 @@ fn extend_offset_values<T: ArrowNativeType + AsPrimitive<usize>>(
|
||||
buffer.extend_from_slice(new_values);
|
||||
}
|
||||
|
||||
pub(super) fn build_extend<
|
||||
T: ArrowNativeType + Integer + CheckedAdd + AsPrimitive<usize>,
|
||||
>(
|
||||
pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd + AsPrimitive<usize>>(
|
||||
array: &ArrayData,
|
||||
) -> Extend {
|
||||
let offsets = array.buffer::<T>(0);
|
||||
@@ -54,21 +52,14 @@ pub(super) fn build_extend<
|
||||
// this is safe due to how offset is built. See details on `get_last_offset`
|
||||
let last_offset = unsafe { get_last_offset(offset_buffer) };
|
||||
|
||||
extend_offsets::<T>(
|
||||
offset_buffer,
|
||||
last_offset,
|
||||
&offsets[start..start + len + 1],
|
||||
);
|
||||
extend_offsets::<T>(offset_buffer, last_offset, &offsets[start..start + len + 1]);
|
||||
// values
|
||||
extend_offset_values::<T>(values_buffer, offsets, values, start, len);
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
pub(super) fn extend_nulls<T: ArrowNativeType>(
|
||||
mutable: &mut _MutableArrayData,
|
||||
len: usize,
|
||||
) {
|
||||
pub(super) fn extend_nulls<T: ArrowNativeType>(mutable: &mut _MutableArrayData, len: usize) {
|
||||
let offset_buffer = &mut mutable.buffer1;
|
||||
|
||||
// this is safe due to how offset is built. See details on `get_last_offset`
|
||||
|
||||
@@ -32,28 +32,26 @@ use arrow_array::builder::StringBuilder;
|
||||
use arrow_array::{ArrayRef, RecordBatch};
|
||||
use arrow_flight::encode::FlightDataEncoderBuilder;
|
||||
use arrow_flight::sql::metadata::{
|
||||
SqlInfoData, SqlInfoDataBuilder, XdbcTypeInfo, XdbcTypeInfoData,
|
||||
XdbcTypeInfoDataBuilder,
|
||||
SqlInfoData, SqlInfoDataBuilder, XdbcTypeInfo, XdbcTypeInfoData, XdbcTypeInfoDataBuilder,
|
||||
};
|
||||
use arrow_flight::sql::{
|
||||
server::FlightSqlService, ActionBeginSavepointRequest, ActionBeginSavepointResult,
|
||||
ActionBeginTransactionRequest, ActionBeginTransactionResult,
|
||||
ActionCancelQueryRequest, ActionCancelQueryResult,
|
||||
ActionClosePreparedStatementRequest, ActionCreatePreparedStatementRequest,
|
||||
ActionCreatePreparedStatementResult, ActionCreatePreparedSubstraitPlanRequest,
|
||||
ActionEndSavepointRequest, ActionEndTransactionRequest, Any, CommandGetCatalogs,
|
||||
CommandGetCrossReference, CommandGetDbSchemas, CommandGetExportedKeys,
|
||||
CommandGetImportedKeys, CommandGetPrimaryKeys, CommandGetSqlInfo,
|
||||
CommandGetTableTypes, CommandGetTables, CommandGetXdbcTypeInfo,
|
||||
ActionBeginTransactionRequest, ActionBeginTransactionResult, ActionCancelQueryRequest,
|
||||
ActionCancelQueryResult, ActionClosePreparedStatementRequest,
|
||||
ActionCreatePreparedStatementRequest, ActionCreatePreparedStatementResult,
|
||||
ActionCreatePreparedSubstraitPlanRequest, ActionEndSavepointRequest,
|
||||
ActionEndTransactionRequest, Any, CommandGetCatalogs, CommandGetCrossReference,
|
||||
CommandGetDbSchemas, CommandGetExportedKeys, CommandGetImportedKeys, CommandGetPrimaryKeys,
|
||||
CommandGetSqlInfo, CommandGetTableTypes, CommandGetTables, CommandGetXdbcTypeInfo,
|
||||
CommandPreparedStatementQuery, CommandPreparedStatementUpdate, CommandStatementQuery,
|
||||
CommandStatementSubstraitPlan, CommandStatementUpdate, Nullable, ProstMessageExt,
|
||||
Searchable, SqlInfo, TicketStatementQuery, XdbcDataType,
|
||||
CommandStatementSubstraitPlan, CommandStatementUpdate, Nullable, ProstMessageExt, Searchable,
|
||||
SqlInfo, TicketStatementQuery, XdbcDataType,
|
||||
};
|
||||
use arrow_flight::utils::batches_to_flight_data;
|
||||
use arrow_flight::{
|
||||
flight_service_server::FlightService, flight_service_server::FlightServiceServer,
|
||||
Action, FlightData, FlightDescriptor, FlightEndpoint, FlightInfo, HandshakeRequest,
|
||||
HandshakeResponse, IpcMessage, Location, SchemaAsIpc, Ticket,
|
||||
flight_service_server::FlightService, flight_service_server::FlightServiceServer, Action,
|
||||
FlightData, FlightDescriptor, FlightEndpoint, FlightInfo, HandshakeRequest, HandshakeResponse,
|
||||
IpcMessage, Location, SchemaAsIpc, Ticket,
|
||||
};
|
||||
use arrow_ipc::writer::IpcWriteOptions;
|
||||
use arrow_schema::{ArrowError, DataType, Field, Schema};
|
||||
@@ -167,8 +165,7 @@ impl FlightSqlService for FlightSqlServiceImpl {
|
||||
let bytes = BASE64_STANDARD
|
||||
.decode(base64)
|
||||
.map_err(|e| status!("authorization not decodable", e))?;
|
||||
let str = String::from_utf8(bytes)
|
||||
.map_err(|e| status!("authorization not parsable", e))?;
|
||||
let str = String::from_utf8(bytes).map_err(|e| status!("authorization not parsable", e))?;
|
||||
let parts: Vec<_> = str.split(':').collect();
|
||||
let (user, pass) = match parts.as_slice() {
|
||||
[user, pass] => (user, pass),
|
||||
@@ -195,8 +192,7 @@ impl FlightSqlService for FlightSqlServiceImpl {
|
||||
_message: Any,
|
||||
) -> Result<Response<<Self as FlightService>::DoGetStream>, Status> {
|
||||
self.check_token(&request)?;
|
||||
let batch =
|
||||
Self::fake_result().map_err(|e| status!("Could not fake a result", e))?;
|
||||
let batch = Self::fake_result().map_err(|e| status!("Could not fake a result", e))?;
|
||||
let schema = batch.schema();
|
||||
let batches = vec![batch];
|
||||
let flight_data = batches_to_flight_data(schema.as_ref(), batches)
|
||||
@@ -238,8 +234,7 @@ impl FlightSqlService for FlightSqlServiceImpl {
|
||||
self.check_token(&request)?;
|
||||
let handle = std::str::from_utf8(&cmd.prepared_statement_handle)
|
||||
.map_err(|e| status!("Unable to parse handle", e))?;
|
||||
let batch =
|
||||
Self::fake_result().map_err(|e| status!("Could not fake a result", e))?;
|
||||
let batch = Self::fake_result().map_err(|e| status!("Could not fake a result", e))?;
|
||||
let schema = (*batch.schema()).clone();
|
||||
let num_rows = batch.num_rows();
|
||||
let num_bytes = batch.get_array_memory_size();
|
||||
@@ -736,8 +731,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
if std::env::var("USE_TLS").ok().is_some() {
|
||||
let cert = std::fs::read_to_string("arrow-flight/examples/data/server.pem")?;
|
||||
let key = std::fs::read_to_string("arrow-flight/examples/data/server.key")?;
|
||||
let client_ca =
|
||||
std::fs::read_to_string("arrow-flight/examples/data/client_ca.pem")?;
|
||||
let client_ca = std::fs::read_to_string("arrow-flight/examples/data/client_ca.pem")?;
|
||||
|
||||
let tls_config = ServerTlsConfig::new()
|
||||
.identity(Identity::from_pem(&cert, &key))
|
||||
|
||||
@@ -20,9 +20,9 @@ use tonic::transport::Server;
|
||||
use tonic::{Request, Response, Status, Streaming};
|
||||
|
||||
use arrow_flight::{
|
||||
flight_service_server::FlightService, flight_service_server::FlightServiceServer,
|
||||
Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo,
|
||||
HandshakeRequest, HandshakeResponse, PutResult, SchemaResult, Ticket,
|
||||
flight_service_server::FlightService, flight_service_server::FlightServiceServer, Action,
|
||||
ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo, HandshakeRequest,
|
||||
HandshakeResponse, PutResult, SchemaResult, Ticket,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
|
||||
@@ -249,10 +249,7 @@ impl FlightClient {
|
||||
/// .expect("error fetching data");
|
||||
/// # }
|
||||
/// ```
|
||||
pub async fn get_flight_info(
|
||||
&mut self,
|
||||
descriptor: FlightDescriptor,
|
||||
) -> Result<FlightInfo> {
|
||||
pub async fn get_flight_info(&mut self, descriptor: FlightDescriptor) -> Result<FlightInfo> {
|
||||
let request = self.make_request(descriptor);
|
||||
|
||||
let response = self.inner.get_flight_info(request).await?.into_inner();
|
||||
@@ -452,10 +449,7 @@ impl FlightClient {
|
||||
/// .expect("error making request");
|
||||
/// # }
|
||||
/// ```
|
||||
pub async fn get_schema(
|
||||
&mut self,
|
||||
flight_descriptor: FlightDescriptor,
|
||||
) -> Result<Schema> {
|
||||
pub async fn get_schema(&mut self, flight_descriptor: FlightDescriptor) -> Result<Schema> {
|
||||
let request = self.make_request(flight_descriptor);
|
||||
|
||||
let schema_result = self.inner.get_schema(request).await?.into_inner();
|
||||
@@ -488,9 +482,7 @@ impl FlightClient {
|
||||
/// .expect("error gathering actions");
|
||||
/// # }
|
||||
/// ```
|
||||
pub async fn list_actions(
|
||||
&mut self,
|
||||
) -> Result<BoxStream<'static, Result<ActionType>>> {
|
||||
pub async fn list_actions(&mut self) -> Result<BoxStream<'static, Result<ActionType>>> {
|
||||
let request = self.make_request(Empty {});
|
||||
|
||||
let action_stream = self
|
||||
@@ -528,10 +520,7 @@ impl FlightClient {
|
||||
/// .expect("error gathering action results");
|
||||
/// # }
|
||||
/// ```
|
||||
pub async fn do_action(
|
||||
&mut self,
|
||||
action: Action,
|
||||
) -> Result<BoxStream<'static, Result<Bytes>>> {
|
||||
pub async fn do_action(&mut self, action: Action) -> Result<BoxStream<'static, Result<Bytes>>> {
|
||||
let request = self.make_request(action);
|
||||
|
||||
let result_stream = self
|
||||
|
||||
+12
-21
@@ -21,9 +21,7 @@ use arrow_buffer::Buffer;
|
||||
use arrow_schema::{Schema, SchemaRef};
|
||||
use bytes::Bytes;
|
||||
use futures::{ready, stream::BoxStream, Stream, StreamExt};
|
||||
use std::{
|
||||
collections::HashMap, convert::TryFrom, fmt::Debug, pin::Pin, sync::Arc, task::Poll,
|
||||
};
|
||||
use std::{collections::HashMap, convert::TryFrom, fmt::Debug, pin::Pin, sync::Arc, task::Poll};
|
||||
use tonic::metadata::MetadataMap;
|
||||
|
||||
use crate::error::{FlightError, Result};
|
||||
@@ -270,16 +268,14 @@ impl FlightDataDecoder {
|
||||
/// state as necessary.
|
||||
fn extract_message(&mut self, data: FlightData) -> Result<Option<DecodedFlightData>> {
|
||||
use arrow_ipc::MessageHeader;
|
||||
let message = arrow_ipc::root_as_message(&data.data_header[..]).map_err(|e| {
|
||||
FlightError::DecodeError(format!("Error decoding root message: {e}"))
|
||||
})?;
|
||||
let message = arrow_ipc::root_as_message(&data.data_header[..])
|
||||
.map_err(|e| FlightError::DecodeError(format!("Error decoding root message: {e}")))?;
|
||||
|
||||
match message.header_type() {
|
||||
MessageHeader::NONE => Ok(Some(DecodedFlightData::new_none(data))),
|
||||
MessageHeader::Schema => {
|
||||
let schema = Schema::try_from(&data).map_err(|e| {
|
||||
FlightError::DecodeError(format!("Error decoding schema: {e}"))
|
||||
})?;
|
||||
let schema = Schema::try_from(&data)
|
||||
.map_err(|e| FlightError::DecodeError(format!("Error decoding schema: {e}")))?;
|
||||
|
||||
let schema = Arc::new(schema);
|
||||
let dictionaries_by_field = HashMap::new();
|
||||
@@ -300,12 +296,11 @@ impl FlightDataDecoder {
|
||||
};
|
||||
|
||||
let buffer = Buffer::from_bytes(data.data_body.into());
|
||||
let dictionary_batch =
|
||||
message.header_as_dictionary_batch().ok_or_else(|| {
|
||||
FlightError::protocol(
|
||||
"Could not get dictionary batch from DictionaryBatch message",
|
||||
)
|
||||
})?;
|
||||
let dictionary_batch = message.header_as_dictionary_batch().ok_or_else(|| {
|
||||
FlightError::protocol(
|
||||
"Could not get dictionary batch from DictionaryBatch message",
|
||||
)
|
||||
})?;
|
||||
|
||||
arrow_ipc::reader::read_dictionary(
|
||||
&buffer,
|
||||
@@ -315,9 +310,7 @@ impl FlightDataDecoder {
|
||||
&message.version(),
|
||||
)
|
||||
.map_err(|e| {
|
||||
FlightError::DecodeError(format!(
|
||||
"Error decoding ipc dictionary: {e}"
|
||||
))
|
||||
FlightError::DecodeError(format!("Error decoding ipc dictionary: {e}"))
|
||||
})?;
|
||||
|
||||
// Updated internal state, but no decoded message
|
||||
@@ -338,9 +331,7 @@ impl FlightDataDecoder {
|
||||
&state.dictionaries_by_field,
|
||||
)
|
||||
.map_err(|e| {
|
||||
FlightError::DecodeError(format!(
|
||||
"Error decoding ipc RecordBatch: {e}"
|
||||
))
|
||||
FlightError::DecodeError(format!("Error decoding ipc RecordBatch: {e}"))
|
||||
})?;
|
||||
|
||||
Ok(Some(DecodedFlightData::new_record_batch(data, batch)))
|
||||
|
||||
+42
-84
@@ -159,10 +159,7 @@ impl FlightDataEncoderBuilder {
|
||||
}
|
||||
|
||||
/// Set [`DictionaryHandling`] for encoder
|
||||
pub fn with_dictionary_handling(
|
||||
mut self,
|
||||
dictionary_handling: DictionaryHandling,
|
||||
) -> Self {
|
||||
pub fn with_dictionary_handling(mut self, dictionary_handling: DictionaryHandling) -> Self {
|
||||
self.dictionary_handling = dictionary_handling;
|
||||
self
|
||||
}
|
||||
@@ -191,10 +188,7 @@ impl FlightDataEncoderBuilder {
|
||||
}
|
||||
|
||||
/// Specify a flight descriptor in the first FlightData message.
|
||||
pub fn with_flight_descriptor(
|
||||
mut self,
|
||||
descriptor: Option<FlightDescriptor>,
|
||||
) -> Self {
|
||||
pub fn with_flight_descriptor(mut self, descriptor: Option<FlightDescriptor>) -> Self {
|
||||
self.descriptor = descriptor;
|
||||
self
|
||||
}
|
||||
@@ -334,8 +328,7 @@ impl FlightDataEncoder {
|
||||
let batch = prepare_batch_for_flight(&batch, schema, send_dictionaries)?;
|
||||
|
||||
for batch in split_batch_for_grpc_response(batch, self.max_flight_data_size) {
|
||||
let (flight_dictionaries, flight_batch) =
|
||||
self.encoder.encode_batch(&batch)?;
|
||||
let (flight_dictionaries, flight_batch) = self.encoder.encode_batch(&batch)?;
|
||||
|
||||
self.queue_messages(flight_dictionaries);
|
||||
self.queue_message(flight_batch);
|
||||
@@ -460,9 +453,8 @@ fn split_batch_for_grpc_response(
|
||||
.map(|col| col.get_buffer_memory_size())
|
||||
.sum::<usize>();
|
||||
|
||||
let n_batches = (size / max_flight_data_size
|
||||
+ usize::from(size % max_flight_data_size != 0))
|
||||
.max(1);
|
||||
let n_batches =
|
||||
(size / max_flight_data_size + usize::from(size % max_flight_data_size != 0)).max(1);
|
||||
let rows_per_batch = (batch.num_rows() / n_batches).max(1);
|
||||
let mut out = Vec::with_capacity(n_batches + 1);
|
||||
|
||||
@@ -505,18 +497,12 @@ impl FlightIpcEncoder {
|
||||
|
||||
/// Convert a `RecordBatch` to a Vec of `FlightData` representing
|
||||
/// dictionaries and a `FlightData` representing the batch
|
||||
fn encode_batch(
|
||||
&mut self,
|
||||
batch: &RecordBatch,
|
||||
) -> Result<(Vec<FlightData>, FlightData)> {
|
||||
let (encoded_dictionaries, encoded_batch) = self.data_gen.encoded_batch(
|
||||
batch,
|
||||
&mut self.dictionary_tracker,
|
||||
&self.options,
|
||||
)?;
|
||||
fn encode_batch(&mut self, batch: &RecordBatch) -> Result<(Vec<FlightData>, FlightData)> {
|
||||
let (encoded_dictionaries, encoded_batch) =
|
||||
self.data_gen
|
||||
.encoded_batch(batch, &mut self.dictionary_tracker, &self.options)?;
|
||||
|
||||
let flight_dictionaries =
|
||||
encoded_dictionaries.into_iter().map(Into::into).collect();
|
||||
let flight_dictionaries = encoded_dictionaries.into_iter().map(Into::into).collect();
|
||||
let flight_batch = encoded_batch.into();
|
||||
|
||||
Ok((flight_dictionaries, flight_batch))
|
||||
@@ -553,9 +539,7 @@ fn prepare_batch_for_flight(
|
||||
/// but does enable sending DictionaryArray's via Flight.
|
||||
fn hydrate_dictionary(array: &ArrayRef, send_dictionaries: bool) -> Result<ArrayRef> {
|
||||
let arr = match array.data_type() {
|
||||
DataType::Dictionary(_, value) if !send_dictionaries => {
|
||||
arrow_cast::cast(array, value)?
|
||||
}
|
||||
DataType::Dictionary(_, value) if !send_dictionaries => arrow_cast::cast(array, value)?,
|
||||
_ => Arc::clone(array),
|
||||
};
|
||||
Ok(arr)
|
||||
@@ -586,11 +570,9 @@ mod tests {
|
||||
let (_, baseline_flight_batch) = make_flight_data(&batch, &options);
|
||||
|
||||
let big_batch = batch.slice(0, batch.num_rows() - 1);
|
||||
let optimized_big_batch =
|
||||
prepare_batch_for_flight(&big_batch, Arc::clone(&schema), false)
|
||||
.expect("failed to optimize");
|
||||
let (_, optimized_big_flight_batch) =
|
||||
make_flight_data(&optimized_big_batch, &options);
|
||||
let optimized_big_batch = prepare_batch_for_flight(&big_batch, Arc::clone(&schema), false)
|
||||
.expect("failed to optimize");
|
||||
let (_, optimized_big_flight_batch) = make_flight_data(&optimized_big_batch, &options);
|
||||
|
||||
assert_eq!(
|
||||
baseline_flight_batch.data_body.len(),
|
||||
@@ -601,12 +583,10 @@ mod tests {
|
||||
let optimized_small_batch =
|
||||
prepare_batch_for_flight(&small_batch, Arc::clone(&schema), false)
|
||||
.expect("failed to optimize");
|
||||
let (_, optimized_small_flight_batch) =
|
||||
make_flight_data(&optimized_small_batch, &options);
|
||||
let (_, optimized_small_flight_batch) = make_flight_data(&optimized_small_batch, &options);
|
||||
|
||||
assert!(
|
||||
baseline_flight_batch.data_body.len()
|
||||
> optimized_small_flight_batch.data_body.len()
|
||||
baseline_flight_batch.data_body.len() > optimized_small_flight_batch.data_body.len()
|
||||
);
|
||||
}
|
||||
|
||||
@@ -620,11 +600,10 @@ mod tests {
|
||||
false,
|
||||
)]));
|
||||
let batch = RecordBatch::try_new(schema, vec![Arc::new(arr)]).unwrap();
|
||||
let encoder = FlightDataEncoderBuilder::default()
|
||||
.build(futures::stream::once(async { Ok(batch) }));
|
||||
let encoder =
|
||||
FlightDataEncoderBuilder::default().build(futures::stream::once(async { Ok(batch) }));
|
||||
let mut decoder = FlightDataDecoder::new(encoder);
|
||||
let expected_schema =
|
||||
Schema::new(vec![Field::new("dict", DataType::Utf8, false)]);
|
||||
let expected_schema = Schema::new(vec![Field::new("dict", DataType::Utf8, false)]);
|
||||
let expected_schema = Arc::new(expected_schema);
|
||||
while let Some(decoded) = decoder.next().await {
|
||||
let decoded = decoded.unwrap();
|
||||
@@ -656,10 +635,8 @@ mod tests {
|
||||
Arc::new(vec!["a", "a", "b"].into_iter().collect());
|
||||
let arr_two: Arc<DictionaryArray<UInt16Type>> =
|
||||
Arc::new(vec!["b", "a", "c"].into_iter().collect());
|
||||
let batch_one =
|
||||
RecordBatch::try_new(schema.clone(), vec![arr_one.clone()]).unwrap();
|
||||
let batch_two =
|
||||
RecordBatch::try_new(schema.clone(), vec![arr_two.clone()]).unwrap();
|
||||
let batch_one = RecordBatch::try_new(schema.clone(), vec![arr_one.clone()]).unwrap();
|
||||
let batch_two = RecordBatch::try_new(schema.clone(), vec![arr_two.clone()]).unwrap();
|
||||
|
||||
let encoder = FlightDataEncoderBuilder::default()
|
||||
.with_dictionary_handling(DictionaryHandling::Resend)
|
||||
@@ -675,10 +652,9 @@ mod tests {
|
||||
DecodedPayload::RecordBatch(b) => {
|
||||
assert_eq!(b.schema(), schema);
|
||||
|
||||
let actual_array =
|
||||
Arc::new(downcast_array::<DictionaryArray<UInt16Type>>(
|
||||
b.column_by_name("dict").unwrap(),
|
||||
));
|
||||
let actual_array = Arc::new(downcast_array::<DictionaryArray<UInt16Type>>(
|
||||
b.column_by_name("dict").unwrap(),
|
||||
));
|
||||
|
||||
assert_eq!(actual_array, expected_array);
|
||||
|
||||
@@ -690,10 +666,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_schema_metadata_encoded() {
|
||||
let schema =
|
||||
Schema::new(vec![Field::new("data", DataType::Int32, false)]).with_metadata(
|
||||
HashMap::from([("some_key".to_owned(), "some_value".to_owned())]),
|
||||
);
|
||||
let schema = Schema::new(vec![Field::new("data", DataType::Int32, false)]).with_metadata(
|
||||
HashMap::from([("some_key".to_owned(), "some_value".to_owned())]),
|
||||
);
|
||||
|
||||
let got = prepare_schema_for_flight(&schema, false);
|
||||
assert!(got.metadata().contains_key("some_key"));
|
||||
@@ -708,8 +683,7 @@ mod tests {
|
||||
)
|
||||
.expect("cannot create record batch");
|
||||
|
||||
prepare_batch_for_flight(&batch, batch.schema(), false)
|
||||
.expect("failed to optimize");
|
||||
prepare_batch_for_flight(&batch, batch.schema(), false).expect("failed to optimize");
|
||||
}
|
||||
|
||||
pub fn make_flight_data(
|
||||
@@ -723,8 +697,7 @@ mod tests {
|
||||
.encoded_batch(batch, &mut dictionary_tracker, options)
|
||||
.expect("DictionaryTracker configured above to not error on replacement");
|
||||
|
||||
let flight_dictionaries =
|
||||
encoded_dictionaries.into_iter().map(Into::into).collect();
|
||||
let flight_dictionaries = encoded_dictionaries.into_iter().map(Into::into).collect();
|
||||
let flight_batch = encoded_batch.into();
|
||||
|
||||
(flight_dictionaries, flight_batch)
|
||||
@@ -745,8 +718,7 @@ mod tests {
|
||||
// split once
|
||||
let n_rows = max_flight_data_size + 1;
|
||||
assert!(n_rows % 2 == 1, "should be an odd number");
|
||||
let c =
|
||||
UInt8Array::from((0..n_rows).map(|i| (i % 256) as u8).collect::<Vec<_>>());
|
||||
let c = UInt8Array::from((0..n_rows).map(|i| (i % 256) as u8).collect::<Vec<_>>());
|
||||
let batch = RecordBatch::try_from_iter(vec![("a", Arc::new(c) as ArrayRef)])
|
||||
.expect("cannot create record batch");
|
||||
let split = split_batch_for_grpc_response(batch.clone(), max_flight_data_size);
|
||||
@@ -793,8 +765,7 @@ mod tests {
|
||||
|
||||
let input_rows = batch.num_rows();
|
||||
|
||||
let split =
|
||||
split_batch_for_grpc_response(batch.clone(), max_flight_data_size_bytes);
|
||||
let split = split_batch_for_grpc_response(batch.clone(), max_flight_data_size_bytes);
|
||||
let sizes: Vec<_> = split.iter().map(|batch| batch.num_rows()).collect();
|
||||
let output_rows: usize = sizes.iter().sum();
|
||||
|
||||
@@ -807,8 +778,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn flight_data_size_even() {
|
||||
let s1 =
|
||||
StringArray::from_iter_values(std::iter::repeat(".10 bytes.").take(1024));
|
||||
let s1 = StringArray::from_iter_values(std::iter::repeat(".10 bytes.").take(1024));
|
||||
let i1 = Int16Array::from_iter_values(0..1024);
|
||||
let s2 = StringArray::from_iter_values(std::iter::repeat("6bytes").take(1024));
|
||||
let i2 = Int64Array::from_iter_values(0..1024);
|
||||
@@ -828,8 +798,7 @@ mod tests {
|
||||
async fn flight_data_size_uneven_variable_lengths() {
|
||||
// each row has a longer string than the last with increasing lengths 0 --> 1024
|
||||
let array = StringArray::from_iter_values((0..1024).map(|i| "*".repeat(i)));
|
||||
let batch =
|
||||
RecordBatch::try_from_iter(vec![("data", Arc::new(array) as _)]).unwrap();
|
||||
let batch = RecordBatch::try_from_iter(vec![("data", Arc::new(array) as _)]).unwrap();
|
||||
|
||||
// overage is much higher than ideal
|
||||
// https://github.com/apache/arrow-rs/issues/3478
|
||||
@@ -883,8 +852,7 @@ mod tests {
|
||||
})
|
||||
.collect();
|
||||
|
||||
let batch =
|
||||
RecordBatch::try_from_iter(vec![("a1", Arc::new(array) as _)]).unwrap();
|
||||
let batch = RecordBatch::try_from_iter(vec![("a1", Arc::new(array) as _)]).unwrap();
|
||||
|
||||
verify_encoded_split(batch, 160).await;
|
||||
}
|
||||
@@ -894,11 +862,9 @@ mod tests {
|
||||
// large dictionary (all distinct values ==> 1024 entries in dictionary)
|
||||
let values: Vec<_> = (1..1024).map(|i| "**".repeat(i)).collect();
|
||||
|
||||
let array: DictionaryArray<Int32Type> =
|
||||
values.iter().map(|s| Some(s.as_str())).collect();
|
||||
let array: DictionaryArray<Int32Type> = values.iter().map(|s| Some(s.as_str())).collect();
|
||||
|
||||
let batch =
|
||||
RecordBatch::try_from_iter(vec![("a1", Arc::new(array) as _)]).unwrap();
|
||||
let batch = RecordBatch::try_from_iter(vec![("a1", Arc::new(array) as _)]).unwrap();
|
||||
|
||||
// overage is much higher than ideal
|
||||
// https://github.com/apache/arrow-rs/issues/3478
|
||||
@@ -912,8 +878,7 @@ mod tests {
|
||||
let keys = Int32Array::from_iter_values((0..3000).map(|i| (3000 - i) % 1024));
|
||||
let array = DictionaryArray::new(keys, Arc::new(values));
|
||||
|
||||
let batch =
|
||||
RecordBatch::try_from_iter(vec![("a1", Arc::new(array) as _)]).unwrap();
|
||||
let batch = RecordBatch::try_from_iter(vec![("a1", Arc::new(array) as _)]).unwrap();
|
||||
|
||||
// overage is much higher than ideal
|
||||
// https://github.com/apache/arrow-rs/issues/3478
|
||||
@@ -929,12 +894,9 @@ mod tests {
|
||||
// medium cardinality
|
||||
let values3: Vec<_> = (1..1024).map(|i| "**".repeat(i % 100)).collect();
|
||||
|
||||
let array1: DictionaryArray<Int32Type> =
|
||||
values1.iter().map(|s| Some(s.as_str())).collect();
|
||||
let array2: DictionaryArray<Int32Type> =
|
||||
values2.iter().map(|s| Some(s.as_str())).collect();
|
||||
let array3: DictionaryArray<Int32Type> =
|
||||
values3.iter().map(|s| Some(s.as_str())).collect();
|
||||
let array1: DictionaryArray<Int32Type> = values1.iter().map(|s| Some(s.as_str())).collect();
|
||||
let array2: DictionaryArray<Int32Type> = values2.iter().map(|s| Some(s.as_str())).collect();
|
||||
let array3: DictionaryArray<Int32Type> = values3.iter().map(|s| Some(s.as_str())).collect();
|
||||
|
||||
let batch = RecordBatch::try_from_iter(vec![
|
||||
("a1", Arc::new(array1) as _),
|
||||
@@ -954,17 +916,13 @@ mod tests {
|
||||
.flight_descriptor
|
||||
.as_ref()
|
||||
.map(|descriptor| {
|
||||
let path_len: usize =
|
||||
descriptor.path.iter().map(|p| p.as_bytes().len()).sum();
|
||||
let path_len: usize = descriptor.path.iter().map(|p| p.as_bytes().len()).sum();
|
||||
|
||||
std::mem::size_of_val(descriptor) + descriptor.cmd.len() + path_len
|
||||
})
|
||||
.unwrap_or(0);
|
||||
|
||||
flight_descriptor_size
|
||||
+ d.app_metadata.len()
|
||||
+ d.data_body.len()
|
||||
+ d.data_header.len()
|
||||
flight_descriptor_size + d.app_metadata.len() + d.data_body.len() + d.data_header.len()
|
||||
}
|
||||
|
||||
/// Coverage for <https://github.com/apache/arrow-rs/issues/3478>
|
||||
|
||||
@@ -133,10 +133,7 @@ pub struct IpcMessage(pub Bytes);
|
||||
|
||||
// Useful conversion functions
|
||||
|
||||
fn flight_schema_as_encoded_data(
|
||||
arrow_schema: &Schema,
|
||||
options: &IpcWriteOptions,
|
||||
) -> EncodedData {
|
||||
fn flight_schema_as_encoded_data(arrow_schema: &Schema, options: &IpcWriteOptions) -> EncodedData {
|
||||
let data_gen = writer::IpcDataGenerator::default();
|
||||
data_gen.schema_to_bytes(arrow_schema, options)
|
||||
}
|
||||
|
||||
@@ -31,17 +31,16 @@ use crate::flight_service_client::FlightServiceClient;
|
||||
use crate::sql::server::{CLOSE_PREPARED_STATEMENT, CREATE_PREPARED_STATEMENT};
|
||||
use crate::sql::{
|
||||
ActionClosePreparedStatementRequest, ActionCreatePreparedStatementRequest,
|
||||
ActionCreatePreparedStatementResult, Any, CommandGetCatalogs,
|
||||
CommandGetCrossReference, CommandGetDbSchemas, CommandGetExportedKeys,
|
||||
CommandGetImportedKeys, CommandGetPrimaryKeys, CommandGetSqlInfo,
|
||||
CommandGetTableTypes, CommandGetTables, CommandGetXdbcTypeInfo,
|
||||
ActionCreatePreparedStatementResult, Any, CommandGetCatalogs, CommandGetCrossReference,
|
||||
CommandGetDbSchemas, CommandGetExportedKeys, CommandGetImportedKeys, CommandGetPrimaryKeys,
|
||||
CommandGetSqlInfo, CommandGetTableTypes, CommandGetTables, CommandGetXdbcTypeInfo,
|
||||
CommandPreparedStatementQuery, CommandPreparedStatementUpdate, CommandStatementQuery,
|
||||
CommandStatementUpdate, DoPutUpdateResult, ProstMessageExt, SqlInfo,
|
||||
};
|
||||
use crate::trailers::extract_lazy_trailers;
|
||||
use crate::{
|
||||
Action, FlightData, FlightDescriptor, FlightInfo, HandshakeRequest,
|
||||
HandshakeResponse, IpcMessage, PutResult, Ticket,
|
||||
Action, FlightData, FlightDescriptor, FlightInfo, HandshakeRequest, HandshakeResponse,
|
||||
IpcMessage, PutResult, Ticket,
|
||||
};
|
||||
use arrow_array::RecordBatch;
|
||||
use arrow_buffer::Buffer;
|
||||
@@ -134,11 +133,7 @@ impl FlightSqlServiceClient<Channel> {
|
||||
|
||||
/// Perform a `handshake` with the server, passing credentials and establishing a session
|
||||
/// Returns arbitrary auth/handshake info binary blob
|
||||
pub async fn handshake(
|
||||
&mut self,
|
||||
username: &str,
|
||||
password: &str,
|
||||
) -> Result<Bytes, ArrowError> {
|
||||
pub async fn handshake(&mut self, username: &str, password: &str) -> Result<Bytes, ArrowError> {
|
||||
let cmd = HandshakeRequest {
|
||||
protocol_version: 0,
|
||||
payload: Default::default(),
|
||||
@@ -156,9 +151,9 @@ impl FlightSqlServiceClient<Channel> {
|
||||
.await
|
||||
.map_err(|e| ArrowError::IpcError(format!("Can't handshake {e}")))?;
|
||||
if let Some(auth) = resp.metadata().get("authorization") {
|
||||
let auth = auth.to_str().map_err(|_| {
|
||||
ArrowError::ParseError("Can't read auth header".to_string())
|
||||
})?;
|
||||
let auth = auth
|
||||
.to_str()
|
||||
.map_err(|_| ArrowError::ParseError("Can't read auth header".to_string()))?;
|
||||
let bearer = "Bearer ";
|
||||
if !auth.starts_with(bearer) {
|
||||
Err(ArrowError::ParseError("Invalid auth header!".to_string()))?;
|
||||
@@ -166,10 +161,11 @@ impl FlightSqlServiceClient<Channel> {
|
||||
let auth = auth[bearer.len()..].to_string();
|
||||
self.token = Some(auth);
|
||||
}
|
||||
let responses: Vec<HandshakeResponse> =
|
||||
resp.into_inner().try_collect().await.map_err(|_| {
|
||||
ArrowError::ParseError("Can't collect responses".to_string())
|
||||
})?;
|
||||
let responses: Vec<HandshakeResponse> = resp
|
||||
.into_inner()
|
||||
.try_collect()
|
||||
.await
|
||||
.map_err(|_| ArrowError::ParseError("Can't collect responses".to_string()))?;
|
||||
let resp = match responses.as_slice() {
|
||||
[resp] => resp.payload.clone(),
|
||||
[] => Bytes::new(),
|
||||
@@ -209,8 +205,7 @@ impl FlightSqlServiceClient<Channel> {
|
||||
.await
|
||||
.map_err(status_to_arrow_error)?
|
||||
.unwrap();
|
||||
let any =
|
||||
Any::decode(&*result.app_metadata).map_err(decode_error_to_arrow_error)?;
|
||||
let any = Any::decode(&*result.app_metadata).map_err(decode_error_to_arrow_error)?;
|
||||
let result: DoPutUpdateResult = any.unpack()?.unwrap();
|
||||
Ok(result.record_count)
|
||||
}
|
||||
@@ -405,17 +400,13 @@ impl FlightSqlServiceClient<Channel> {
|
||||
ArrowError::ParseError(format!("Cannot convert header key \"{k}\": {e}"))
|
||||
})?;
|
||||
let v = v.parse().map_err(|e| {
|
||||
ArrowError::ParseError(format!(
|
||||
"Cannot convert header value \"{v}\": {e}"
|
||||
))
|
||||
ArrowError::ParseError(format!("Cannot convert header value \"{v}\": {e}"))
|
||||
})?;
|
||||
req.metadata_mut().insert(k, v);
|
||||
}
|
||||
if let Some(token) = &self.token {
|
||||
let val = format!("Bearer {token}").parse().map_err(|e| {
|
||||
ArrowError::ParseError(format!(
|
||||
"Cannot convert token to header value: {e}"
|
||||
))
|
||||
ArrowError::ParseError(format!("Cannot convert token to header value: {e}"))
|
||||
})?;
|
||||
req.metadata_mut().insert("authorization", val);
|
||||
}
|
||||
@@ -484,8 +475,7 @@ impl PreparedStatement<Channel> {
|
||||
.await
|
||||
.map_err(status_to_arrow_error)?
|
||||
.unwrap();
|
||||
let any =
|
||||
Any::decode(&*result.app_metadata).map_err(decode_error_to_arrow_error)?;
|
||||
let any = Any::decode(&*result.app_metadata).map_err(decode_error_to_arrow_error)?;
|
||||
let result: DoPutUpdateResult = any.unpack()?.unwrap();
|
||||
Ok(result.record_count)
|
||||
}
|
||||
@@ -501,10 +491,7 @@ impl PreparedStatement<Channel> {
|
||||
}
|
||||
|
||||
/// Set a RecordBatch that contains the parameters that will be bind.
|
||||
pub fn set_parameters(
|
||||
&mut self,
|
||||
parameter_binding: RecordBatch,
|
||||
) -> Result<(), ArrowError> {
|
||||
pub fn set_parameters(&mut self, parameter_binding: RecordBatch) -> Result<(), ArrowError> {
|
||||
self.parameter_binding = Some(parameter_binding);
|
||||
Ok(())
|
||||
}
|
||||
@@ -580,19 +567,16 @@ pub fn arrow_data_from_flight_data(
|
||||
flight_data: FlightData,
|
||||
arrow_schema_ref: &SchemaRef,
|
||||
) -> Result<ArrowFlightData, ArrowError> {
|
||||
let ipc_message = root_as_message(&flight_data.data_header[..]).map_err(|err| {
|
||||
ArrowError::ParseError(format!("Unable to get root as message: {err:?}"))
|
||||
})?;
|
||||
let ipc_message = root_as_message(&flight_data.data_header[..])
|
||||
.map_err(|err| ArrowError::ParseError(format!("Unable to get root as message: {err:?}")))?;
|
||||
|
||||
match ipc_message.header_type() {
|
||||
MessageHeader::RecordBatch => {
|
||||
let ipc_record_batch =
|
||||
ipc_message.header_as_record_batch().ok_or_else(|| {
|
||||
ArrowError::ComputeError(
|
||||
"Unable to convert flight data header to a record batch"
|
||||
.to_string(),
|
||||
)
|
||||
})?;
|
||||
let ipc_record_batch = ipc_message.header_as_record_batch().ok_or_else(|| {
|
||||
ArrowError::ComputeError(
|
||||
"Unable to convert flight data header to a record batch".to_string(),
|
||||
)
|
||||
})?;
|
||||
|
||||
let dictionaries_by_field = HashMap::new();
|
||||
let record_batch = read_record_batch(
|
||||
@@ -618,13 +602,11 @@ pub fn arrow_data_from_flight_data(
|
||||
MessageHeader::DictionaryBatch => {
|
||||
let _ = ipc_message.header_as_dictionary_batch().ok_or_else(|| {
|
||||
ArrowError::ComputeError(
|
||||
"Unable to convert flight data header to a dictionary batch"
|
||||
.to_string(),
|
||||
"Unable to convert flight data header to a dictionary batch".to_string(),
|
||||
)
|
||||
})?;
|
||||
Err(ArrowError::NotYetImplemented(
|
||||
"no idea on how to convert an ipc dictionary batch to an arrow type"
|
||||
.to_string(),
|
||||
"no idea on how to convert an ipc dictionary batch to an arrow type".to_string(),
|
||||
))
|
||||
}
|
||||
MessageHeader::Tensor => {
|
||||
@@ -644,8 +626,7 @@ pub fn arrow_data_from_flight_data(
|
||||
)
|
||||
})?;
|
||||
Err(ArrowError::NotYetImplemented(
|
||||
"no idea on how to convert an ipc sparse tensor to an arrow type"
|
||||
.to_string(),
|
||||
"no idea on how to convert an ipc sparse tensor to an arrow type".to_string(),
|
||||
))
|
||||
}
|
||||
_ => Err(ArrowError::ComputeError(format!(
|
||||
|
||||
@@ -95,11 +95,7 @@ impl GetDbSchemasBuilder {
|
||||
/// Append a row
|
||||
///
|
||||
/// In case the catalog should be considered as empty, pass in an empty string '""'.
|
||||
pub fn append(
|
||||
&mut self,
|
||||
catalog_name: impl AsRef<str>,
|
||||
schema_name: impl AsRef<str>,
|
||||
) {
|
||||
pub fn append(&mut self, catalog_name: impl AsRef<str>, schema_name: impl AsRef<str>) {
|
||||
self.catalog_name.append_value(catalog_name);
|
||||
self.db_schema_name.append_value(schema_name);
|
||||
}
|
||||
|
||||
@@ -30,8 +30,8 @@ use std::sync::Arc;
|
||||
use arrow_arith::boolean::or;
|
||||
use arrow_array::array::{Array, UInt32Array, UnionArray};
|
||||
use arrow_array::builder::{
|
||||
ArrayBuilder, BooleanBuilder, Int32Builder, Int64Builder, Int8Builder, ListBuilder,
|
||||
MapBuilder, StringBuilder, UInt32Builder,
|
||||
ArrayBuilder, BooleanBuilder, Int32Builder, Int64Builder, Int8Builder, ListBuilder, MapBuilder,
|
||||
StringBuilder, UInt32Builder,
|
||||
};
|
||||
use arrow_array::{RecordBatch, Scalar};
|
||||
use arrow_data::ArrayData;
|
||||
@@ -184,11 +184,7 @@ static UNION_TYPE: Lazy<DataType> = Lazy::new(|| {
|
||||
Field::new("keys", DataType::Int32, false),
|
||||
Field::new(
|
||||
"values",
|
||||
DataType::List(Arc::new(Field::new(
|
||||
"item",
|
||||
DataType::Int32,
|
||||
true,
|
||||
))),
|
||||
DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
|
||||
true,
|
||||
),
|
||||
])),
|
||||
@@ -420,10 +416,7 @@ pub struct SqlInfoData {
|
||||
impl SqlInfoData {
|
||||
/// Return a [`RecordBatch`] containing only the requested `u32`, if any
|
||||
/// from [`CommandGetSqlInfo`]
|
||||
pub fn record_batch(
|
||||
&self,
|
||||
info: impl IntoIterator<Item = u32>,
|
||||
) -> Result<RecordBatch> {
|
||||
pub fn record_batch(&self, info: impl IntoIterator<Item = u32>) -> Result<RecordBatch> {
|
||||
let arr = self.batch.column(0);
|
||||
let type_filter = info
|
||||
.into_iter()
|
||||
@@ -493,9 +486,7 @@ mod tests {
|
||||
|
||||
use super::SqlInfoDataBuilder;
|
||||
use crate::sql::metadata::tests::assert_batches_eq;
|
||||
use crate::sql::{
|
||||
SqlInfo, SqlNullOrdering, SqlSupportedTransaction, SqlSupportsConvert,
|
||||
};
|
||||
use crate::sql::{SqlInfo, SqlNullOrdering, SqlSupportedTransaction, SqlSupportsConvert};
|
||||
|
||||
#[test]
|
||||
fn test_sql_infos() {
|
||||
|
||||
@@ -329,12 +329,12 @@ mod tests {
|
||||
"b_catalog",
|
||||
])) as ArrayRef,
|
||||
Arc::new(StringArray::from(vec![
|
||||
"a_schema", "a_schema", "b_schema", "b_schema", "a_schema",
|
||||
"a_schema", "b_schema", "b_schema",
|
||||
"a_schema", "a_schema", "b_schema", "b_schema", "a_schema", "a_schema",
|
||||
"b_schema", "b_schema",
|
||||
])) as ArrayRef,
|
||||
Arc::new(StringArray::from(vec![
|
||||
"a_table", "b_table", "a_table", "b_table", "a_table", "a_table",
|
||||
"b_table", "b_table",
|
||||
"a_table", "b_table", "a_table", "b_table", "a_table", "a_table", "b_table",
|
||||
"b_table",
|
||||
])) as ArrayRef,
|
||||
Arc::new(StringArray::from(vec![
|
||||
"TABLE", "TABLE", "TABLE", "TABLE", "TABLE", "VIEW", "TABLE", "VIEW",
|
||||
|
||||
@@ -36,9 +36,7 @@ use once_cell::sync::Lazy;
|
||||
|
||||
use super::lexsort_to_indices;
|
||||
use crate::error::*;
|
||||
use crate::sql::{
|
||||
CommandGetXdbcTypeInfo, Nullable, Searchable, XdbcDataType, XdbcDatetimeSubcode,
|
||||
};
|
||||
use crate::sql::{CommandGetXdbcTypeInfo, Nullable, Searchable, XdbcDataType, XdbcDatetimeSubcode};
|
||||
|
||||
/// Data structure representing type information for xdbc types.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
@@ -201,8 +199,7 @@ impl XdbcTypeInfoDataBuilder {
|
||||
minimum_scale_builder.append_option(info.minimum_scale);
|
||||
maximum_scale_builder.append_option(info.maximum_scale);
|
||||
sql_data_type_builder.append_value(info.sql_data_type as i32);
|
||||
datetime_subcode_builder
|
||||
.append_option(info.datetime_subcode.map(|code| code as i32));
|
||||
datetime_subcode_builder.append_option(info.datetime_subcode.map(|code| code as i32));
|
||||
num_prec_radix_builder.append_option(info.num_prec_radix);
|
||||
interval_precision_builder.append_option(info.interval_precision);
|
||||
});
|
||||
@@ -215,8 +212,7 @@ impl XdbcTypeInfoDataBuilder {
|
||||
let (field, offsets, values, nulls) = create_params_builder.finish().into_parts();
|
||||
// Re-defined the field to be non-nullable
|
||||
let new_field = Arc::new(field.as_ref().clone().with_nullable(false));
|
||||
let create_params =
|
||||
Arc::new(ListArray::new(new_field, offsets, values, nulls)) as ArrayRef;
|
||||
let create_params = Arc::new(ListArray::new(new_field, offsets, values, nulls)) as ArrayRef;
|
||||
let nullable = Arc::new(nullable_builder.finish());
|
||||
let case_sensitive = Arc::new(case_sensitive_builder.finish());
|
||||
let searchable = Arc::new(searchable_builder.finish());
|
||||
|
||||
@@ -295,9 +295,8 @@ impl Any {
|
||||
if !self.is::<M>() {
|
||||
return Ok(None);
|
||||
}
|
||||
let m = Message::decode(&*self.value).map_err(|err| {
|
||||
ArrowError::ParseError(format!("Unable to decode Any value: {err}"))
|
||||
})?;
|
||||
let m = Message::decode(&*self.value)
|
||||
.map_err(|err| ArrowError::ParseError(format!("Unable to decode Any value: {err}")))?;
|
||||
Ok(Some(m))
|
||||
}
|
||||
|
||||
|
||||
@@ -24,23 +24,21 @@ use prost::Message;
|
||||
use tonic::{Request, Response, Status, Streaming};
|
||||
|
||||
use super::{
|
||||
ActionBeginSavepointRequest, ActionBeginSavepointResult,
|
||||
ActionBeginTransactionRequest, ActionBeginTransactionResult,
|
||||
ActionCancelQueryRequest, ActionCancelQueryResult,
|
||||
ActionBeginSavepointRequest, ActionBeginSavepointResult, ActionBeginTransactionRequest,
|
||||
ActionBeginTransactionResult, ActionCancelQueryRequest, ActionCancelQueryResult,
|
||||
ActionClosePreparedStatementRequest, ActionCreatePreparedStatementRequest,
|
||||
ActionCreatePreparedStatementResult, ActionCreatePreparedSubstraitPlanRequest,
|
||||
ActionEndSavepointRequest, ActionEndTransactionRequest, Any, Command,
|
||||
CommandGetCatalogs, CommandGetCrossReference, CommandGetDbSchemas,
|
||||
CommandGetExportedKeys, CommandGetImportedKeys, CommandGetPrimaryKeys,
|
||||
CommandGetSqlInfo, CommandGetTableTypes, CommandGetTables, CommandGetXdbcTypeInfo,
|
||||
CommandPreparedStatementQuery, CommandPreparedStatementUpdate, CommandStatementQuery,
|
||||
CommandStatementSubstraitPlan, CommandStatementUpdate, DoPutUpdateResult,
|
||||
ProstMessageExt, SqlInfo, TicketStatementQuery,
|
||||
ActionEndSavepointRequest, ActionEndTransactionRequest, Any, Command, CommandGetCatalogs,
|
||||
CommandGetCrossReference, CommandGetDbSchemas, CommandGetExportedKeys, CommandGetImportedKeys,
|
||||
CommandGetPrimaryKeys, CommandGetSqlInfo, CommandGetTableTypes, CommandGetTables,
|
||||
CommandGetXdbcTypeInfo, CommandPreparedStatementQuery, CommandPreparedStatementUpdate,
|
||||
CommandStatementQuery, CommandStatementSubstraitPlan, CommandStatementUpdate,
|
||||
DoPutUpdateResult, ProstMessageExt, SqlInfo, TicketStatementQuery,
|
||||
};
|
||||
use crate::{
|
||||
flight_service_server::FlightService, Action, ActionType, Criteria, Empty,
|
||||
FlightData, FlightDescriptor, FlightInfo, HandshakeRequest, HandshakeResponse,
|
||||
PutResult, SchemaResult, Ticket,
|
||||
flight_service_server::FlightService, Action, ActionType, Criteria, Empty, FlightData,
|
||||
FlightDescriptor, FlightInfo, HandshakeRequest, HandshakeResponse, PutResult, SchemaResult,
|
||||
Ticket,
|
||||
};
|
||||
|
||||
pub(crate) static CREATE_PREPARED_STATEMENT: &str = "CreatePreparedStatement";
|
||||
@@ -549,13 +547,10 @@ where
|
||||
Pin<Box<dyn Stream<Item = Result<HandshakeResponse, Status>> + Send + 'static>>;
|
||||
type ListFlightsStream =
|
||||
Pin<Box<dyn Stream<Item = Result<FlightInfo, Status>> + Send + 'static>>;
|
||||
type DoGetStream =
|
||||
Pin<Box<dyn Stream<Item = Result<FlightData, Status>> + Send + 'static>>;
|
||||
type DoPutStream =
|
||||
Pin<Box<dyn Stream<Item = Result<PutResult, Status>> + Send + 'static>>;
|
||||
type DoActionStream = Pin<
|
||||
Box<dyn Stream<Item = Result<super::super::Result, Status>> + Send + 'static>,
|
||||
>;
|
||||
type DoGetStream = Pin<Box<dyn Stream<Item = Result<FlightData, Status>> + Send + 'static>>;
|
||||
type DoPutStream = Pin<Box<dyn Stream<Item = Result<PutResult, Status>> + Send + 'static>>;
|
||||
type DoActionStream =
|
||||
Pin<Box<dyn Stream<Item = Result<super::super::Result, Status>> + Send + 'static>>;
|
||||
type ListActionsStream =
|
||||
Pin<Box<dyn Stream<Item = Result<ActionType, Status>> + Send + 'static>>;
|
||||
type DoExchangeStream =
|
||||
@@ -580,8 +575,7 @@ where
|
||||
&self,
|
||||
request: Request<FlightDescriptor>,
|
||||
) -> Result<Response<FlightInfo>, Status> {
|
||||
let message =
|
||||
Any::decode(&*request.get_ref().cmd).map_err(decode_error_to_status)?;
|
||||
let message = Any::decode(&*request.get_ref().cmd).map_err(decode_error_to_status)?;
|
||||
|
||||
match Command::try_from(message).map_err(arrow_error_to_status)? {
|
||||
Command::CommandStatementQuery(token) => {
|
||||
@@ -600,9 +594,7 @@ where
|
||||
Command::CommandGetDbSchemas(token) => {
|
||||
return self.get_flight_info_schemas(token, request).await
|
||||
}
|
||||
Command::CommandGetTables(token) => {
|
||||
self.get_flight_info_tables(token, request).await
|
||||
}
|
||||
Command::CommandGetTables(token) => self.get_flight_info_tables(token, request).await,
|
||||
Command::CommandGetTableTypes(token) => {
|
||||
self.get_flight_info_table_types(token, request).await
|
||||
}
|
||||
@@ -642,31 +634,21 @@ where
|
||||
&self,
|
||||
request: Request<Ticket>,
|
||||
) -> Result<Response<Self::DoGetStream>, Status> {
|
||||
let msg: Any = Message::decode(&*request.get_ref().ticket)
|
||||
.map_err(decode_error_to_status)?;
|
||||
let msg: Any =
|
||||
Message::decode(&*request.get_ref().ticket).map_err(decode_error_to_status)?;
|
||||
|
||||
match Command::try_from(msg).map_err(arrow_error_to_status)? {
|
||||
Command::TicketStatementQuery(command) => {
|
||||
self.do_get_statement(command, request).await
|
||||
}
|
||||
Command::TicketStatementQuery(command) => self.do_get_statement(command, request).await,
|
||||
Command::CommandPreparedStatementQuery(command) => {
|
||||
self.do_get_prepared_statement(command, request).await
|
||||
}
|
||||
Command::CommandGetCatalogs(command) => {
|
||||
self.do_get_catalogs(command, request).await
|
||||
}
|
||||
Command::CommandGetDbSchemas(command) => {
|
||||
self.do_get_schemas(command, request).await
|
||||
}
|
||||
Command::CommandGetTables(command) => {
|
||||
self.do_get_tables(command, request).await
|
||||
}
|
||||
Command::CommandGetCatalogs(command) => self.do_get_catalogs(command, request).await,
|
||||
Command::CommandGetDbSchemas(command) => self.do_get_schemas(command, request).await,
|
||||
Command::CommandGetTables(command) => self.do_get_tables(command, request).await,
|
||||
Command::CommandGetTableTypes(command) => {
|
||||
self.do_get_table_types(command, request).await
|
||||
}
|
||||
Command::CommandGetSqlInfo(command) => {
|
||||
self.do_get_sql_info(command, request).await
|
||||
}
|
||||
Command::CommandGetSqlInfo(command) => self.do_get_sql_info(command, request).await,
|
||||
Command::CommandGetPrimaryKeys(command) => {
|
||||
self.do_get_primary_keys(command, request).await
|
||||
}
|
||||
@@ -699,8 +681,8 @@ where
|
||||
let mut request = request.map(PeekableFlightDataStream::new);
|
||||
let cmd = Pin::new(request.get_mut()).peek().await.unwrap().clone()?;
|
||||
|
||||
let message = Any::decode(&*cmd.flight_descriptor.unwrap().cmd)
|
||||
.map_err(decode_error_to_status)?;
|
||||
let message =
|
||||
Any::decode(&*cmd.flight_descriptor.unwrap().cmd).map_err(decode_error_to_status)?;
|
||||
match Command::try_from(message).map_err(arrow_error_to_status)? {
|
||||
Command::CommandStatementUpdate(command) => {
|
||||
let record_count = self.do_put_statement_update(command, request).await?;
|
||||
@@ -755,11 +737,10 @@ where
|
||||
};
|
||||
let create_prepared_substrait_plan_action_type = ActionType {
|
||||
r#type: CREATE_PREPARED_SUBSTRAIT_PLAN.to_string(),
|
||||
description:
|
||||
"Creates a reusable prepared substrait plan resource on the server.\n
|
||||
description: "Creates a reusable prepared substrait plan resource on the server.\n
|
||||
Request Message: ActionCreatePreparedSubstraitPlanRequest\n
|
||||
Response Message: ActionCreatePreparedStatementResult"
|
||||
.into(),
|
||||
.into(),
|
||||
};
|
||||
let begin_transaction_action_type = ActionType {
|
||||
r#type: BEGIN_TRANSACTION.to_string(),
|
||||
@@ -820,8 +801,7 @@ where
|
||||
request: Request<Action>,
|
||||
) -> Result<Response<Self::DoActionStream>, Status> {
|
||||
if request.get_ref().r#type == CREATE_PREPARED_STATEMENT {
|
||||
let any =
|
||||
Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
|
||||
let any = Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
|
||||
|
||||
let cmd: ActionCreatePreparedStatementRequest = any
|
||||
.unpack()
|
||||
@@ -839,8 +819,7 @@ where
|
||||
})]);
|
||||
return Ok(Response::new(Box::pin(output)));
|
||||
} else if request.get_ref().r#type == CLOSE_PREPARED_STATEMENT {
|
||||
let any =
|
||||
Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
|
||||
let any = Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
|
||||
|
||||
let cmd: ActionClosePreparedStatementRequest = any
|
||||
.unpack()
|
||||
@@ -854,8 +833,7 @@ where
|
||||
.await?;
|
||||
return Ok(Response::new(Box::pin(futures::stream::empty())));
|
||||
} else if request.get_ref().r#type == CREATE_PREPARED_SUBSTRAIT_PLAN {
|
||||
let any =
|
||||
Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
|
||||
let any = Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
|
||||
|
||||
let cmd: ActionCreatePreparedSubstraitPlanRequest = any
|
||||
.unpack()
|
||||
@@ -869,47 +847,38 @@ where
|
||||
.await?;
|
||||
return Ok(Response::new(Box::pin(futures::stream::empty())));
|
||||
} else if request.get_ref().r#type == BEGIN_TRANSACTION {
|
||||
let any =
|
||||
Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
|
||||
let any = Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
|
||||
|
||||
let cmd: ActionBeginTransactionRequest = any
|
||||
.unpack()
|
||||
.map_err(arrow_error_to_status)?
|
||||
.ok_or_else(|| {
|
||||
Status::invalid_argument(
|
||||
"Unable to unpack ActionBeginTransactionRequest.",
|
||||
)
|
||||
})?;
|
||||
Status::invalid_argument("Unable to unpack ActionBeginTransactionRequest.")
|
||||
})?;
|
||||
let stmt = self.do_action_begin_transaction(cmd, request).await?;
|
||||
let output = futures::stream::iter(vec![Ok(super::super::gen::Result {
|
||||
body: stmt.as_any().encode_to_vec().into(),
|
||||
})]);
|
||||
return Ok(Response::new(Box::pin(output)));
|
||||
} else if request.get_ref().r#type == END_TRANSACTION {
|
||||
let any =
|
||||
Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
|
||||
let any = Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
|
||||
|
||||
let cmd: ActionEndTransactionRequest = any
|
||||
.unpack()
|
||||
.map_err(arrow_error_to_status)?
|
||||
.ok_or_else(|| {
|
||||
Status::invalid_argument(
|
||||
"Unable to unpack ActionEndTransactionRequest.",
|
||||
)
|
||||
Status::invalid_argument("Unable to unpack ActionEndTransactionRequest.")
|
||||
})?;
|
||||
self.do_action_end_transaction(cmd, request).await?;
|
||||
return Ok(Response::new(Box::pin(futures::stream::empty())));
|
||||
} else if request.get_ref().r#type == BEGIN_SAVEPOINT {
|
||||
let any =
|
||||
Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
|
||||
let any = Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
|
||||
|
||||
let cmd: ActionBeginSavepointRequest = any
|
||||
.unpack()
|
||||
.map_err(arrow_error_to_status)?
|
||||
.ok_or_else(|| {
|
||||
Status::invalid_argument(
|
||||
"Unable to unpack ActionBeginSavepointRequest.",
|
||||
)
|
||||
Status::invalid_argument("Unable to unpack ActionBeginSavepointRequest.")
|
||||
})?;
|
||||
let stmt = self.do_action_begin_savepoint(cmd, request).await?;
|
||||
let output = futures::stream::iter(vec![Ok(super::super::gen::Result {
|
||||
@@ -917,22 +886,18 @@ where
|
||||
})]);
|
||||
return Ok(Response::new(Box::pin(output)));
|
||||
} else if request.get_ref().r#type == END_SAVEPOINT {
|
||||
let any =
|
||||
Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
|
||||
let any = Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
|
||||
|
||||
let cmd: ActionEndSavepointRequest = any
|
||||
.unpack()
|
||||
.map_err(arrow_error_to_status)?
|
||||
.ok_or_else(|| {
|
||||
Status::invalid_argument(
|
||||
"Unable to unpack ActionEndSavepointRequest.",
|
||||
)
|
||||
Status::invalid_argument("Unable to unpack ActionEndSavepointRequest.")
|
||||
})?;
|
||||
self.do_action_end_savepoint(cmd, request).await?;
|
||||
return Ok(Response::new(Box::pin(futures::stream::empty())));
|
||||
} else if request.get_ref().r#type == CANCEL_QUERY {
|
||||
let any =
|
||||
Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
|
||||
let any = Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
|
||||
|
||||
let cmd: ActionCancelQueryRequest = any
|
||||
.unpack()
|
||||
|
||||
@@ -28,9 +28,7 @@ use tonic::{metadata::MetadataMap, Status, Streaming};
|
||||
///
|
||||
/// Note that [`LazyTrailers`] has inner mutability and will only hold actual data after [`ExtractTrailersStream`] is
|
||||
/// fully consumed (dropping it is not required though).
|
||||
pub fn extract_lazy_trailers<T>(
|
||||
s: Streaming<T>,
|
||||
) -> (ExtractTrailersStream<T>, LazyTrailers) {
|
||||
pub fn extract_lazy_trailers<T>(s: Streaming<T>) -> (ExtractTrailersStream<T>, LazyTrailers) {
|
||||
let trailers: SharedTrailers = Default::default();
|
||||
let stream = ExtractTrailersStream {
|
||||
inner: s,
|
||||
@@ -54,10 +52,7 @@ pub struct ExtractTrailersStream<T> {
|
||||
impl<T> Stream for ExtractTrailersStream<T> {
|
||||
type Item = Result<T, Status>;
|
||||
|
||||
fn poll_next(
|
||||
mut self: Pin<&mut Self>,
|
||||
cx: &mut Context<'_>,
|
||||
) -> Poll<Option<Self::Item>> {
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
let res = ready!(self.inner.poll_next_unpin(cx));
|
||||
|
||||
if res.is_none() {
|
||||
|
||||
@@ -52,26 +52,23 @@ pub fn flight_data_from_arrow_batch(
|
||||
}
|
||||
|
||||
/// Convert a slice of wire protocol `FlightData`s into a vector of `RecordBatch`es
|
||||
pub fn flight_data_to_batches(
|
||||
flight_data: &[FlightData],
|
||||
) -> Result<Vec<RecordBatch>, ArrowError> {
|
||||
pub fn flight_data_to_batches(flight_data: &[FlightData]) -> Result<Vec<RecordBatch>, ArrowError> {
|
||||
let schema = flight_data.get(0).ok_or_else(|| {
|
||||
ArrowError::CastError("Need at least one FlightData for schema".to_string())
|
||||
})?;
|
||||
let message = root_as_message(&schema.data_header[..])
|
||||
.map_err(|_| ArrowError::CastError("Cannot get root as message".to_string()))?;
|
||||
|
||||
let ipc_schema: arrow_ipc::Schema = message.header_as_schema().ok_or_else(|| {
|
||||
ArrowError::CastError("Cannot get header as Schema".to_string())
|
||||
})?;
|
||||
let ipc_schema: arrow_ipc::Schema = message
|
||||
.header_as_schema()
|
||||
.ok_or_else(|| ArrowError::CastError("Cannot get header as Schema".to_string()))?;
|
||||
let schema = fb_to_schema(ipc_schema);
|
||||
let schema = Arc::new(schema);
|
||||
|
||||
let mut batches = vec![];
|
||||
let dictionaries_by_id = HashMap::new();
|
||||
for datum in flight_data[1..].iter() {
|
||||
let batch =
|
||||
flight_data_to_arrow_batch(datum, schema.clone(), &dictionaries_by_id)?;
|
||||
let batch = flight_data_to_arrow_batch(datum, schema.clone(), &dictionaries_by_id)?;
|
||||
batches.push(batch);
|
||||
}
|
||||
Ok(batches)
|
||||
@@ -84,9 +81,8 @@ pub fn flight_data_to_arrow_batch(
|
||||
dictionaries_by_id: &HashMap<i64, ArrayRef>,
|
||||
) -> Result<RecordBatch, ArrowError> {
|
||||
// check that the data_header is a record batch message
|
||||
let message = arrow_ipc::root_as_message(&data.data_header[..]).map_err(|err| {
|
||||
ArrowError::ParseError(format!("Unable to get root as message: {err:?}"))
|
||||
})?;
|
||||
let message = arrow_ipc::root_as_message(&data.data_header[..])
|
||||
.map_err(|err| ArrowError::ParseError(format!("Unable to get root as message: {err:?}")))?;
|
||||
|
||||
message
|
||||
.header_as_record_batch()
|
||||
@@ -124,10 +120,7 @@ pub fn flight_schema_from_arrow_schema(
|
||||
since = "4.4.0",
|
||||
note = "Use From trait, e.g.: SchemaAsIpc::new(schema, options).into()"
|
||||
)]
|
||||
pub fn flight_data_from_arrow_schema(
|
||||
schema: &Schema,
|
||||
options: &IpcWriteOptions,
|
||||
) -> FlightData {
|
||||
pub fn flight_data_from_arrow_schema(schema: &Schema, options: &IpcWriteOptions) -> FlightData {
|
||||
SchemaAsIpc::new(schema, options).into()
|
||||
}
|
||||
|
||||
|
||||
@@ -23,9 +23,9 @@ mod common {
|
||||
}
|
||||
use arrow_array::{RecordBatch, UInt64Array};
|
||||
use arrow_flight::{
|
||||
decode::FlightRecordBatchStream, encode::FlightDataEncoderBuilder,
|
||||
error::FlightError, Action, ActionType, Criteria, Empty, FlightClient, FlightData,
|
||||
FlightDescriptor, FlightInfo, HandshakeRequest, HandshakeResponse, PutResult, Ticket,
|
||||
decode::FlightRecordBatchStream, encode::FlightDataEncoderBuilder, error::FlightError, Action,
|
||||
ActionType, Criteria, Empty, FlightClient, FlightData, FlightDescriptor, FlightInfo,
|
||||
HandshakeRequest, HandshakeResponse, PutResult, Ticket,
|
||||
};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use bytes::Bytes;
|
||||
@@ -271,8 +271,7 @@ async fn test_do_put() {
|
||||
},
|
||||
];
|
||||
|
||||
test_server
|
||||
.set_do_put_response(expected_response.clone().into_iter().map(Ok).collect());
|
||||
test_server.set_do_put_response(expected_response.clone().into_iter().map(Ok).collect());
|
||||
|
||||
let input_stream = futures::stream::iter(input_flight_data.clone()).map(Ok);
|
||||
|
||||
@@ -446,9 +445,8 @@ async fn test_do_exchange() {
|
||||
let input_flight_data = test_flight_data().await;
|
||||
let output_flight_data = test_flight_data2().await;
|
||||
|
||||
test_server.set_do_exchange_response(
|
||||
output_flight_data.clone().into_iter().map(Ok).collect(),
|
||||
);
|
||||
test_server
|
||||
.set_do_exchange_response(output_flight_data.clone().into_iter().map(Ok).collect());
|
||||
|
||||
let response_stream = client
|
||||
.do_exchange(futures::stream::iter(input_flight_data.clone()))
|
||||
|
||||
@@ -174,10 +174,7 @@ impl TestFlightServer {
|
||||
}
|
||||
|
||||
/// Specify the response returned from the next call to `do_action`
|
||||
pub fn set_do_action_response(
|
||||
&self,
|
||||
response: Vec<Result<arrow_flight::Result, Status>>,
|
||||
) {
|
||||
pub fn set_do_action_response(&self, response: Vec<Result<arrow_flight::Result, Status>>) {
|
||||
let mut state = self.state.lock().expect("mutex not poisoned");
|
||||
state.do_action_response.replace(response);
|
||||
}
|
||||
@@ -278,9 +275,10 @@ impl FlightService for TestFlightServer {
|
||||
let mut state = self.state.lock().expect("mutex not poisoned");
|
||||
state.handshake_request = Some(handshake_request);
|
||||
|
||||
let response = state.handshake_response.take().unwrap_or_else(|| {
|
||||
Err(Status::internal("No handshake response configured"))
|
||||
})?;
|
||||
let response = state
|
||||
.handshake_response
|
||||
.take()
|
||||
.unwrap_or_else(|| Err(Status::internal("No handshake response configured")))?;
|
||||
|
||||
// turn into a streaming response
|
||||
let output = futures::stream::iter(std::iter::once(Ok(response)));
|
||||
@@ -313,9 +311,10 @@ impl FlightService for TestFlightServer {
|
||||
self.save_metadata(&request);
|
||||
let mut state = self.state.lock().expect("mutex not poisoned");
|
||||
state.get_flight_info_request = Some(request.into_inner());
|
||||
let response = state.get_flight_info_response.take().unwrap_or_else(|| {
|
||||
Err(Status::internal("No get_flight_info response configured"))
|
||||
})?;
|
||||
let response = state
|
||||
.get_flight_info_response
|
||||
.take()
|
||||
.unwrap_or_else(|| Err(Status::internal("No get_flight_info response configured")))?;
|
||||
Ok(Response::new(response))
|
||||
}
|
||||
|
||||
@@ -326,9 +325,10 @@ impl FlightService for TestFlightServer {
|
||||
self.save_metadata(&request);
|
||||
let mut state = self.state.lock().expect("mutex not poisoned");
|
||||
state.get_schema_request = Some(request.into_inner());
|
||||
let schema = state.get_schema_response.take().unwrap_or_else(|| {
|
||||
Err(Status::internal("No get_schema response configured"))
|
||||
})?;
|
||||
let schema = state
|
||||
.get_schema_response
|
||||
.take()
|
||||
.unwrap_or_else(|| Err(Status::internal("No get_schema response configured")))?;
|
||||
|
||||
// encode the schema
|
||||
let options = arrow_ipc::writer::IpcWriteOptions::default();
|
||||
|
||||
@@ -81,9 +81,7 @@ where
|
||||
ready!(self.as_mut().project().inner.poll(cx));
|
||||
|
||||
match result {
|
||||
Ok(response) => {
|
||||
Poll::Ready(Ok(response.map(|body| WrappedBody { inner: body })))
|
||||
}
|
||||
Ok(response) => Poll::Ready(Ok(response.map(|body| WrappedBody { inner: body }))),
|
||||
Err(e) => Poll::Ready(Err(e)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -195,8 +195,7 @@ async fn test_app_metadata() {
|
||||
let encode_stream = encoder.build(input_batch_stream);
|
||||
|
||||
// use lower level stream to get access to app metadata
|
||||
let decode_stream =
|
||||
FlightRecordBatchStream::new_from_flight_data(encode_stream).into_inner();
|
||||
let decode_stream = FlightRecordBatchStream::new_from_flight_data(encode_stream).into_inner();
|
||||
|
||||
let mut messages: Vec<_> = decode_stream.try_collect().await.expect("encode fails");
|
||||
|
||||
@@ -225,8 +224,7 @@ async fn test_max_message_size() {
|
||||
let encode_stream = encoder.build(input_batch_stream);
|
||||
|
||||
// use lower level stream to get access to app metadata
|
||||
let decode_stream =
|
||||
FlightRecordBatchStream::new_from_flight_data(encode_stream).into_inner();
|
||||
let decode_stream = FlightRecordBatchStream::new_from_flight_data(encode_stream).into_inner();
|
||||
|
||||
let messages: Vec<_> = decode_stream.try_collect().await.expect("encode fails");
|
||||
|
||||
@@ -254,8 +252,8 @@ async fn test_max_message_size_fuzz() {
|
||||
];
|
||||
|
||||
for max_message_size_bytes in [10, 1024, 2048, 6400, 3211212] {
|
||||
let encoder = FlightDataEncoderBuilder::default()
|
||||
.with_max_flight_data_size(max_message_size_bytes);
|
||||
let encoder =
|
||||
FlightDataEncoderBuilder::default().with_max_flight_data_size(max_message_size_bytes);
|
||||
|
||||
let input_batch_stream = futures::stream::iter(input.clone()).map(Ok);
|
||||
|
||||
@@ -299,10 +297,10 @@ async fn test_chained_streams_batch_decoder() {
|
||||
let batch2 = make_dictionary_batch(3);
|
||||
|
||||
// Model sending two flight streams back to back, with different schemas
|
||||
let encode_stream1 = FlightDataEncoderBuilder::default()
|
||||
.build(futures::stream::iter(vec![Ok(batch1.clone())]));
|
||||
let encode_stream2 = FlightDataEncoderBuilder::default()
|
||||
.build(futures::stream::iter(vec![Ok(batch2.clone())]));
|
||||
let encode_stream1 =
|
||||
FlightDataEncoderBuilder::default().build(futures::stream::iter(vec![Ok(batch1.clone())]));
|
||||
let encode_stream2 =
|
||||
FlightDataEncoderBuilder::default().build(futures::stream::iter(vec![Ok(batch2.clone())]));
|
||||
|
||||
// append the two streams (so they will have two different schema messages)
|
||||
let encode_stream = encode_stream1.chain(encode_stream2);
|
||||
@@ -324,10 +322,10 @@ async fn test_chained_streams_data_decoder() {
|
||||
let batch2 = make_dictionary_batch(3);
|
||||
|
||||
// Model sending two flight streams back to back, with different schemas
|
||||
let encode_stream1 = FlightDataEncoderBuilder::default()
|
||||
.build(futures::stream::iter(vec![Ok(batch1.clone())]));
|
||||
let encode_stream2 = FlightDataEncoderBuilder::default()
|
||||
.build(futures::stream::iter(vec![Ok(batch2.clone())]));
|
||||
let encode_stream1 =
|
||||
FlightDataEncoderBuilder::default().build(futures::stream::iter(vec![Ok(batch1.clone())]));
|
||||
let encode_stream2 =
|
||||
FlightDataEncoderBuilder::default().build(futures::stream::iter(vec![Ok(batch2.clone())]));
|
||||
|
||||
// append the two streams (so they will have two different schema messages)
|
||||
let encode_stream = encode_stream1.chain(encode_stream2);
|
||||
@@ -335,8 +333,7 @@ async fn test_chained_streams_data_decoder() {
|
||||
// lower level decode stream can handle multiple schema messages
|
||||
let decode_stream = FlightDataDecoder::new(encode_stream);
|
||||
|
||||
let decoded_data: Vec<_> =
|
||||
decode_stream.try_collect().await.expect("encode / decode");
|
||||
let decoded_data: Vec<_> = decode_stream.try_collect().await.expect("encode / decode");
|
||||
|
||||
println!("decoded data: {decoded_data:#?}");
|
||||
|
||||
@@ -425,8 +422,7 @@ fn make_primitive_batch(num_rows: usize) -> RecordBatch {
|
||||
})
|
||||
.collect();
|
||||
|
||||
RecordBatch::try_from_iter(vec![("i", Arc::new(i) as ArrayRef), ("f", Arc::new(f))])
|
||||
.unwrap()
|
||||
RecordBatch::try_from_iter(vec![("i", Arc::new(i) as ArrayRef), ("f", Arc::new(f))]).unwrap()
|
||||
}
|
||||
|
||||
/// Make a dictionary batch for testing
|
||||
@@ -459,8 +455,7 @@ fn make_dictionary_batch(num_rows: usize) -> RecordBatch {
|
||||
/// match the input.
|
||||
async fn roundtrip(input: Vec<RecordBatch>) {
|
||||
let expected_output = input.clone();
|
||||
roundtrip_with_encoder(FlightDataEncoderBuilder::default(), input, expected_output)
|
||||
.await
|
||||
roundtrip_with_encoder(FlightDataEncoderBuilder::default(), input, expected_output).await
|
||||
}
|
||||
|
||||
/// Encodes input as a FlightData stream, and then decodes it using
|
||||
@@ -475,8 +470,7 @@ async fn roundtrip_dictionary(input: Vec<RecordBatch>) {
|
||||
.iter()
|
||||
.map(|batch| prepare_batch_for_flight(batch, schema.clone()).unwrap())
|
||||
.collect();
|
||||
roundtrip_with_encoder(FlightDataEncoderBuilder::default(), input, expected_output)
|
||||
.await
|
||||
roundtrip_with_encoder(FlightDataEncoderBuilder::default(), input, expected_output).await
|
||||
}
|
||||
|
||||
async fn roundtrip_with_encoder(
|
||||
@@ -491,8 +485,7 @@ async fn roundtrip_with_encoder(
|
||||
let encode_stream = encoder.build(input_batch_stream);
|
||||
|
||||
let decode_stream = FlightRecordBatchStream::new_from_flight_data(encode_stream);
|
||||
let output_batches: Vec<_> =
|
||||
decode_stream.try_collect().await.expect("encode / decode");
|
||||
let output_batches: Vec<_> = decode_stream.try_collect().await.expect("encode / decode");
|
||||
|
||||
// remove any empty batches from input as they are not transmitted
|
||||
let expected_batches: Vec<_> = expected_batches
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user