mirror of
https://github.com/langchain-ai/datafusion.git
synced 2026-07-01 21:24:06 -04:00
fix: Fix scalar broadcast for to_timestamp() (#20224)
When to_timestamp() was invoked with a scalar Float64 and an array of
strings, the previous coding neglected to broadcast the scalar to the
array properly when producing the return value. That is, a query like
`SELECT to_timestamp(123.5, t.x) FROM t` would result in:
Internal error: UDF to_timestamp returned a different number of rows
than expected
## Which issue does this PR close?
<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->
- Closes #20223
## Rationale for this change
<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->
## What changes are included in this PR?
<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->
## Are these changes tested?
Yes, added SLT.
<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code
If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->
## Are there any user-facing changes?
<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->
<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
This commit is contained in:
@@ -430,27 +430,56 @@ impl ScalarUDFImpl for ToTimestampFunc {
|
||||
.cast_to(&Timestamp(Second, None), None)?
|
||||
.cast_to(&Timestamp(Nanosecond, tz), None),
|
||||
Null | Timestamp(_, _) => args[0].cast_to(&Timestamp(Nanosecond, tz), None),
|
||||
Float16 => {
|
||||
let arr = args[0].to_array(1)?;
|
||||
let f16_arr = downcast_arg!(&arr, Float16Array);
|
||||
let result: TimestampNanosecondArray =
|
||||
f16_arr.unary(|x| (x.to_f64() * 1_000_000_000.0) as i64);
|
||||
Ok(ColumnarValue::Array(Arc::new(result.with_timezone_opt(tz))))
|
||||
}
|
||||
Float32 => {
|
||||
let arr = args[0].to_array(1)?;
|
||||
let f32_arr = downcast_arg!(&arr, Float32Array);
|
||||
let result: TimestampNanosecondArray =
|
||||
f32_arr.unary(|x| (x as f64 * 1_000_000_000.0) as i64);
|
||||
Ok(ColumnarValue::Array(Arc::new(result.with_timezone_opt(tz))))
|
||||
}
|
||||
Float64 => {
|
||||
let arr = args[0].to_array(1)?;
|
||||
let f64_arr = downcast_arg!(&arr, Float64Array);
|
||||
let result: TimestampNanosecondArray =
|
||||
f64_arr.unary(|x| (x * 1_000_000_000.0) as i64);
|
||||
Ok(ColumnarValue::Array(Arc::new(result.with_timezone_opt(tz))))
|
||||
}
|
||||
Float16 => match &args[0] {
|
||||
ColumnarValue::Scalar(ScalarValue::Float16(value)) => {
|
||||
let timestamp_nanos =
|
||||
value.map(|v| (v.to_f64() * 1_000_000_000.0) as i64);
|
||||
Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
|
||||
timestamp_nanos,
|
||||
tz,
|
||||
)))
|
||||
}
|
||||
ColumnarValue::Array(arr) => {
|
||||
let f16_arr = downcast_arg!(arr, Float16Array);
|
||||
let result: TimestampNanosecondArray =
|
||||
f16_arr.unary(|x| (x.to_f64() * 1_000_000_000.0) as i64);
|
||||
Ok(ColumnarValue::Array(Arc::new(result.with_timezone_opt(tz))))
|
||||
}
|
||||
_ => exec_err!("Invalid Float16 value for to_timestamp"),
|
||||
},
|
||||
Float32 => match &args[0] {
|
||||
ColumnarValue::Scalar(ScalarValue::Float32(value)) => {
|
||||
let timestamp_nanos =
|
||||
value.map(|v| (v as f64 * 1_000_000_000.0) as i64);
|
||||
Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
|
||||
timestamp_nanos,
|
||||
tz,
|
||||
)))
|
||||
}
|
||||
ColumnarValue::Array(arr) => {
|
||||
let f32_arr = downcast_arg!(arr, Float32Array);
|
||||
let result: TimestampNanosecondArray =
|
||||
f32_arr.unary(|x| (x as f64 * 1_000_000_000.0) as i64);
|
||||
Ok(ColumnarValue::Array(Arc::new(result.with_timezone_opt(tz))))
|
||||
}
|
||||
_ => exec_err!("Invalid Float32 value for to_timestamp"),
|
||||
},
|
||||
Float64 => match &args[0] {
|
||||
ColumnarValue::Scalar(ScalarValue::Float64(value)) => {
|
||||
let timestamp_nanos = value.map(|v| (v * 1_000_000_000.0) as i64);
|
||||
Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
|
||||
timestamp_nanos,
|
||||
tz,
|
||||
)))
|
||||
}
|
||||
ColumnarValue::Array(arr) => {
|
||||
let f64_arr = downcast_arg!(arr, Float64Array);
|
||||
let result: TimestampNanosecondArray =
|
||||
f64_arr.unary(|x| (x * 1_000_000_000.0) as i64);
|
||||
Ok(ColumnarValue::Array(Arc::new(result.with_timezone_opt(tz))))
|
||||
}
|
||||
_ => exec_err!("Invalid Float64 value for to_timestamp"),
|
||||
},
|
||||
Decimal32(_, _) | Decimal64(_, _) | Decimal256(_, _) => {
|
||||
let arg = args[0].cast_to(&Decimal128(38, 9), None)?;
|
||||
decimal128_to_timestamp_nanos(&arg, tz)
|
||||
|
||||
@@ -113,4 +113,3 @@ SELECT '2001-09-28'::date / '03:00'::time
|
||||
|
||||
query error Invalid timestamp arithmetic operation
|
||||
SELECT '2001-09-28'::date % '03:00'::time
|
||||
|
||||
|
||||
@@ -144,4 +144,4 @@ query error Invalid timestamp arithmetic operation
|
||||
SELECT '2001-09-28T01:00:00'::timestamp % arrow_cast(12345, 'Duration(Second)');
|
||||
|
||||
query error Invalid timestamp arithmetic operation
|
||||
SELECT '2001-09-28T01:00:00'::timestamp / arrow_cast(12345, 'Duration(Second)');
|
||||
SELECT '2001-09-28T01:00:00'::timestamp / arrow_cast(12345, 'Duration(Second)');
|
||||
|
||||
@@ -5328,3 +5328,33 @@ drop table ts_data_secs
|
||||
|
||||
statement ok
|
||||
drop table ts_data_micros_kolkata
|
||||
|
||||
##########
|
||||
## Test to_timestamp with scalar float inputs
|
||||
##########
|
||||
|
||||
statement ok
|
||||
create table test_to_timestamp_scalar(id int, name varchar) as values
|
||||
(1, 'foo'),
|
||||
(2, 'bar');
|
||||
|
||||
query P
|
||||
SELECT to_timestamp(123.5, name) FROM test_to_timestamp_scalar ORDER BY id;
|
||||
----
|
||||
1970-01-01T00:02:03.500
|
||||
1970-01-01T00:02:03.500
|
||||
|
||||
query P
|
||||
SELECT to_timestamp(456.789::float, name) FROM test_to_timestamp_scalar ORDER BY id;
|
||||
----
|
||||
1970-01-01T00:07:36.789001464
|
||||
1970-01-01T00:07:36.789001464
|
||||
|
||||
query P
|
||||
SELECT to_timestamp(arrow_cast(100.5, 'Float16'), name) FROM test_to_timestamp_scalar ORDER BY id;
|
||||
----
|
||||
1970-01-01T00:01:40.500
|
||||
1970-01-01T00:01:40.500
|
||||
|
||||
statement ok
|
||||
drop table test_to_timestamp_scalar
|
||||
|
||||
@@ -871,4 +871,4 @@ DROP TABLE test_limit_with_partitions;
|
||||
|
||||
# Tear down src_table table:
|
||||
statement ok
|
||||
DROP TABLE src_table;
|
||||
DROP TABLE src_table;
|
||||
|
||||
@@ -19,4 +19,4 @@ SELECT COUNT(*) FROM (SELECT i FROM filter_limit WHERE i <> 0 LIMIT 1);
|
||||
1
|
||||
|
||||
statement ok
|
||||
DROP TABLE filter_limit;
|
||||
DROP TABLE filter_limit;
|
||||
|
||||
@@ -129,4 +129,3 @@ SELECT size(column1) FROM VALUES (map(['a'], [1])), (map(['a','b'], [1,2])), (NU
|
||||
1
|
||||
2
|
||||
-1
|
||||
|
||||
|
||||
@@ -71,4 +71,3 @@ NULL
|
||||
# incorrect format
|
||||
query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: The format argument of `TIME_TRUNC` must be one of: hour, minute, second, millisecond, microsecond
|
||||
SELECT time_trunc('test', '09:32:05.123456'::time);
|
||||
|
||||
|
||||
@@ -90,4 +90,3 @@ SELECT trunc('2009-02-12'::date, NULL::string);
|
||||
# incorrect format
|
||||
query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nError during planning: The format argument of `TRUNC` must be one of: year, yy, yyyy, month, mm, mon, day, week, quarter.
|
||||
SELECT trunc('2009-02-12'::date, 'test'::string);
|
||||
|
||||
|
||||
@@ -1666,4 +1666,4 @@ order by id;
|
||||
3 2 150
|
||||
|
||||
statement ok
|
||||
drop table t_agg_window;
|
||||
drop table t_agg_window;
|
||||
|
||||
@@ -82,4 +82,4 @@ logical_plan
|
||||
physical_plan_error
|
||||
01)TRUNCATE operation on table 't1'
|
||||
02)caused by
|
||||
03)This feature is not implemented: TRUNCATE not supported for Base table
|
||||
03)This feature is not implemented: TRUNCATE not supported for Base table
|
||||
|
||||
Reference in New Issue
Block a user