doc-gen: migrate scalar functions (datetime) documentation 2/2 (#13921)

* doc-gen: migrate scalar functions (datetime) documentation 2/2

* fix: fix typo and update function docs

* doc: update function docs

* doc-gen: remove slash

---------

Co-authored-by: Cheng-Yuan-Lai <a186235@g,ail.com>
This commit is contained in:
Ian Lai
2025-01-01 01:35:43 +08:00
committed by GitHub
parent 9d393586ce
commit 9eca7d165c
8 changed files with 352 additions and 412 deletions
+37 -40
View File
@@ -16,7 +16,7 @@
// under the License.
use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;
use arrow::array::builder::PrimitiveBuilder;
use arrow::array::cast::AsArray;
@@ -27,11 +27,45 @@ use arrow::datatypes::DataType::{Date32, Int32, Int64, UInt32, UInt64, Utf8, Utf
use chrono::prelude::*;
use datafusion_common::{exec_err, Result, ScalarValue};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_macros::user_doc;
#[user_doc(
doc_section(label = "Time and Date Functions"),
description = "Make a date from year/month/day component parts.",
syntax_example = "make_date(year, month, day)",
sql_example = r#"```sql
> select make_date(2023, 1, 31);
+-------------------------------------------+
| make_date(Int64(2023),Int64(1),Int64(31)) |
+-------------------------------------------+
| 2023-01-31 |
+-------------------------------------------+
> select make_date('2023', '01', '31');
+-----------------------------------------------+
| make_date(Utf8("2023"),Utf8("01"),Utf8("31")) |
+-----------------------------------------------+
| 2023-01-31 |
+-----------------------------------------------+
```
Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/make_date.rs)
"#,
argument(
name = "year",
description = "Year to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators."
),
argument(
name = "month",
description = "Month to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators."
),
argument(
name = "day",
description = "Day to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators."
)
)]
#[derive(Debug)]
pub struct MakeDateFunc {
signature: Signature,
@@ -156,47 +190,10 @@ impl ScalarUDFImpl for MakeDateFunc {
Ok(value)
}
fn documentation(&self) -> Option<&Documentation> {
Some(get_make_date_doc())
self.doc()
}
}
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
fn get_make_date_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_DATETIME,
"Make a date from year/month/day component parts.",
"make_date(year, month, day)")
.with_argument(
"year",
" Year to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators.", )
.with_argument(
"month",
"Month to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators.",
)
.with_argument("day", "Day to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators.")
.with_sql_example(r#"```sql
> select make_date(2023, 1, 31);
+-------------------------------------------+
| make_date(Int64(2023),Int64(1),Int64(31)) |
+-------------------------------------------+
| 2023-01-31 |
+-------------------------------------------+
> select make_date('2023', '01', '31');
+-----------------------------------------------+
| make_date(Utf8("2023"),Utf8("01"),Utf8("31")) |
+-----------------------------------------------+
| 2023-01-31 |
+-----------------------------------------------+
```
Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/make_date.rs)
"#)
.build()
})
}
/// Converts the year/month/day fields to an `i32` representing the days from
/// the unix epoch and invokes `date_consumer_fn` with the value
fn make_date_inner<F: FnMut(i32)>(
+14 -21
View File
@@ -19,15 +19,23 @@ use arrow::datatypes::DataType;
use arrow::datatypes::DataType::Timestamp;
use arrow::datatypes::TimeUnit::Nanosecond;
use std::any::Any;
use std::sync::OnceLock;
use datafusion_common::{internal_err, ExprSchema, Result, ScalarValue};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
use datafusion_expr::{
ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_macros::user_doc;
#[user_doc(
doc_section(label = "Time and Date Functions"),
description = r#"
Returns the current UTC timestamp.
The `now()` return value is determined at query time and will return the same timestamp, no matter when in the query plan the function executes.
"#,
syntax_example = "now()"
)]
#[derive(Debug)]
pub struct NowFunc {
signature: Signature,
@@ -93,9 +101,6 @@ impl ScalarUDFImpl for NowFunc {
ScalarValue::TimestampNanosecond(now_ts, Some("+00:00".into())),
)))
}
fn documentation(&self) -> Option<&Documentation> {
Some(get_to_unixtime_doc())
}
fn aliases(&self) -> &[String] {
&self.aliases
@@ -104,20 +109,8 @@ impl ScalarUDFImpl for NowFunc {
fn is_nullable(&self, _args: &[Expr], _schema: &dyn ExprSchema) -> bool {
false
}
}
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
fn get_to_unixtime_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_DATETIME,
r#"
Returns the current UTC timestamp.
The `now()` return value is determined at query time and will return the same timestamp, no matter when in the query plan the function executes.
"#,
"now()")
.build()
})
fn documentation(&self) -> Option<&Documentation> {
self.doc()
}
}
+31 -35
View File
@@ -16,7 +16,7 @@
// under the License.
use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;
use arrow::array::cast::AsArray;
use arrow::array::{new_null_array, Array, ArrayRef, StringArray};
@@ -29,12 +29,40 @@ use arrow::error::ArrowError;
use arrow::util::display::{ArrayFormatter, DurationFormat, FormatOptions};
use datafusion_common::{exec_err, Result, ScalarValue};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
};
use datafusion_macros::user_doc;
#[user_doc(
doc_section(label = "Time and Date Functions"),
description = "Returns a string representation of a date, time, timestamp or duration based on a [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html). Unlike the PostgreSQL equivalent of this function numerical formatting is not supported.",
syntax_example = "to_char(expression, format)",
sql_example = r#"```sql
> select to_char('2023-03-01'::date, '%d-%m-%Y');
+----------------------------------------------+
| to_char(Utf8("2023-03-01"),Utf8("%d-%m-%Y")) |
+----------------------------------------------+
| 01-03-2023 |
+----------------------------------------------+
```
Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_char.rs)
"#,
argument(
name = "expression",
description = "Expression to operate on. Can be a constant, column, or function that results in a date, time, timestamp or duration."
),
argument(
name = "format",
description = "A [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) string to use to convert the expression."
),
argument(
name = "day",
description = "Day to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators."
)
)]
#[derive(Debug)]
pub struct ToCharFunc {
signature: Signature,
@@ -143,42 +171,10 @@ impl ScalarUDFImpl for ToCharFunc {
&self.aliases
}
fn documentation(&self) -> Option<&Documentation> {
Some(get_to_char_doc())
self.doc()
}
}
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
fn get_to_char_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_DATETIME,
"Returns a string representation of a date, time, timestamp or duration based on a [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html). Unlike the PostgreSQL equivalent of this function numerical formatting is not supported.",
"to_char(expression, format)")
.with_argument(
"expression",
" Expression to operate on. Can be a constant, column, or function that results in a date, time, timestamp or duration."
)
.with_argument(
"format",
"A [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) string to use to convert the expression.",
)
.with_argument("day", "Day to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators.")
.with_sql_example(r#"```sql
> select to_char('2023-03-01'::date, '%d-%m-%Y');
+----------------------------------------------+
| to_char(Utf8("2023-03-01"),Utf8("%d-%m-%Y")) |
+----------------------------------------------+
| 01-03-2023 |
+----------------------------------------------+
```
Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_char.rs)
"#)
.build()
})
}
fn _build_format_options<'a>(
data_type: &DataType,
format: Option<&'a str>,
+17 -15
View File
@@ -38,21 +38,23 @@ Returns the corresponding date.
Note: `to_date` returns Date32, which represents its values as the number of days since unix epoch(`1970-01-01`) stored as signed 32 bit value. The largest supported date value is `9999-12-31`.",
syntax_example = "to_date('2017-05-31', '%Y-%m-%d')",
sql_example = "```sql\n\
> select to_date('2023-01-31');\n\
+-----------------------------+\n\
| to_date(Utf8(\"2023-01-31\")) |\n\
+-----------------------------+\n\
| 2023-01-31 |\n\
+-----------------------------+\n\
> select to_date('2023/01/31', '%Y-%m-%d', '%Y/%m/%d');\n\
+---------------------------------------------------------------+\n\
| to_date(Utf8(\"2023/01/31\"),Utf8(\"%Y-%m-%d\"),Utf8(\"%Y/%m/%d\")) |\n\
+---------------------------------------------------------------+\n\
| 2023-01-31 |\n\
+---------------------------------------------------------------+\n\
```\n\n\
Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_date.rs)",
sql_example = r#"```sql
> select to_date('2023-01-31');
+-------------------------------+
| to_date(Utf8("2023-01-31")) |
+-------------------------------+
| 2023-01-31 |
+-------------------------------+
> select to_date('2023/01/31', '%Y-%m-%d', '%Y/%m/%d');
+---------------------------------------------------------------------+
| to_date(Utf8("2023/01/31"),Utf8("%Y-%m-%d"),Utf8("%Y/%m/%d")) |
+---------------------------------------------------------------------+
| 2023-01-31 |
+---------------------------------------------------------------------+
```
Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_date.rs)
"#,
standard_argument(name = "expression", prefix = "String"),
argument(
name = "format_n",
@@ -17,7 +17,7 @@
use std::any::Any;
use std::ops::Add;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;
use arrow::array::timezone::Tz;
use arrow::array::{Array, ArrayRef, PrimitiveBuilder};
@@ -31,14 +31,69 @@ use arrow::datatypes::{
use chrono::{DateTime, MappedLocalTime, Offset, TimeDelta, TimeZone, Utc};
use datafusion_common::cast::as_primitive_array;
use datafusion_common::{exec_err, plan_err, DataFusionError, Result, ScalarValue};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_macros::user_doc;
/// A UDF function that converts a timezone-aware timestamp to local time (with no offset or
/// timezone information). In other words, this function strips off the timezone from the timestamp,
/// while keep the display value of the timestamp the same.
#[user_doc(
doc_section(label = "Time and Date Functions"),
description = "Converts a timestamp with a timezone to a timestamp without a timezone (with no offset or timezone information). This function handles daylight saving time changes.",
syntax_example = "to_local_time(expression)",
sql_example = r#"```sql
> SELECT to_local_time('2024-04-01T00:00:20Z'::timestamp);
+---------------------------------------------+
| to_local_time(Utf8("2024-04-01T00:00:20Z")) |
+---------------------------------------------+
| 2024-04-01T00:00:20 |
+---------------------------------------------+
> SELECT to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels');
+---------------------------------------------+
| to_local_time(Utf8("2024-04-01T00:00:20Z")) |
+---------------------------------------------+
| 2024-04-01T00:00:20 |
+---------------------------------------------+
> SELECT
time,
arrow_typeof(time) as type,
to_local_time(time) as to_local_time,
arrow_typeof(to_local_time(time)) as to_local_time_type
FROM (
SELECT '2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels' AS time
);
+---------------------------+------------------------------------------------+---------------------+-----------------------------+
| time | type | to_local_time | to_local_time_type |
+---------------------------+------------------------------------------------+---------------------+-----------------------------+
| 2024-04-01T00:00:20+02:00 | Timestamp(Nanosecond, Some("Europe/Brussels")) | 2024-04-01T00:00:20 | Timestamp(Nanosecond, None) |
+---------------------------+------------------------------------------------+---------------------+-----------------------------+
# combine `to_local_time()` with `date_bin()` to bin on boundaries in the timezone rather
# than UTC boundaries
> SELECT date_bin(interval '1 day', to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels')) AS date_bin;
+---------------------+
| date_bin |
+---------------------+
| 2024-04-01T00:00:00 |
+---------------------+
> SELECT date_bin(interval '1 day', to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels')) AT TIME ZONE 'Europe/Brussels' AS date_bin_with_timezone;
+---------------------------+
| date_bin_with_timezone |
+---------------------------+
| 2024-04-01T00:00:00+02:00 |
+---------------------------+
```"#,
argument(
name = "expression",
description = "Time expression to operate on. Can be a constant, column, or function."
)
)]
#[derive(Debug)]
pub struct ToLocalTimeFunc {
signature: Signature,
@@ -359,72 +414,10 @@ impl ScalarUDFImpl for ToLocalTimeFunc {
}
}
fn documentation(&self) -> Option<&Documentation> {
Some(get_to_local_time_doc())
self.doc()
}
}
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
fn get_to_local_time_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_DATETIME,
"Converts a timestamp with a timezone to a timestamp without a timezone (with no offset or timezone information). This function handles daylight saving time changes.",
"to_local_time(expression)")
.with_argument(
"expression",
"Time expression to operate on. Can be a constant, column, or function."
)
.with_sql_example(r#"```sql
> SELECT to_local_time('2024-04-01T00:00:20Z'::timestamp);
+---------------------------------------------+
| to_local_time(Utf8("2024-04-01T00:00:20Z")) |
+---------------------------------------------+
| 2024-04-01T00:00:20 |
+---------------------------------------------+
> SELECT to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels');
+---------------------------------------------+
| to_local_time(Utf8("2024-04-01T00:00:20Z")) |
+---------------------------------------------+
| 2024-04-01T00:00:20 |
+---------------------------------------------+
> SELECT
time,
arrow_typeof(time) as type,
to_local_time(time) as to_local_time,
arrow_typeof(to_local_time(time)) as to_local_time_type
FROM (
SELECT '2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels' AS time
);
+---------------------------+------------------------------------------------+---------------------+-----------------------------+
| time | type | to_local_time | to_local_time_type |
+---------------------------+------------------------------------------------+---------------------+-----------------------------+
| 2024-04-01T00:00:20+02:00 | Timestamp(Nanosecond, Some("Europe/Brussels")) | 2024-04-01T00:00:20 | Timestamp(Nanosecond, None) |
+---------------------------+------------------------------------------------+---------------------+-----------------------------+
# combine `to_local_time()` with `date_bin()` to bin on boundaries in the timezone rather
# than UTC boundaries
> SELECT date_bin(interval '1 day', to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels')) AS date_bin;
+---------------------+
| date_bin |
+---------------------+
| 2024-04-01T00:00:00 |
+---------------------+
> SELECT date_bin(interval '1 day', to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels')) AT TIME ZONE 'Europe/Brussels' AS date_bin_with_timezone;
+---------------------------+
| date_bin_with_timezone |
+---------------------------+
| 2024-04-01T00:00:00+02:00 |
+---------------------------+
```"#)
.build()
})
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
+156 -191
View File
@@ -16,7 +16,7 @@
// under the License.
use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;
use arrow::datatypes::DataType::*;
use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
@@ -27,31 +27,180 @@ use arrow::datatypes::{
use crate::datetime::common::*;
use datafusion_common::{exec_err, Result, ScalarType};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_macros::user_doc;
#[user_doc(
doc_section(label = "Time and Date Functions"),
description = r#"
Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). Supports strings, integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided. Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.
Note: `to_timestamp` returns `Timestamp(Nanosecond)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds.
"#,
syntax_example = "to_timestamp(expression[, ..., format_n])",
sql_example = r#"```sql
> select to_timestamp('2023-01-31T09:26:56.123456789-05:00');
+-----------------------------------------------------------+
| to_timestamp(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
+-----------------------------------------------------------+
| 2023-01-31T14:26:56.123456789 |
+-----------------------------------------------------------+
> select to_timestamp('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
+--------------------------------------------------------------------------------------------------------+
| to_timestamp(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
+--------------------------------------------------------------------------------------------------------+
| 2023-05-17T03:59:00.123456789 |
+--------------------------------------------------------------------------------------------------------+
```
Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
"#,
argument(
name = "expression",
description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
),
argument(
name = "format_n",
description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
)
)]
#[derive(Debug)]
pub struct ToTimestampFunc {
signature: Signature,
}
#[user_doc(
doc_section(label = "Time and Date Functions"),
description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
syntax_example = "to_timestamp_seconds(expression[, ..., format_n])",
sql_example = r#"```sql
> select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00');
+-------------------------------------------------------------------+
| to_timestamp_seconds(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
+-------------------------------------------------------------------+
| 2023-01-31T14:26:56 |
+-------------------------------------------------------------------+
> select to_timestamp_seconds('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
+----------------------------------------------------------------------------------------------------------------+
| to_timestamp_seconds(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
+----------------------------------------------------------------------------------------------------------------+
| 2023-05-17T03:59:00 |
+----------------------------------------------------------------------------------------------------------------+
```
Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
"#,
argument(
name = "expression",
description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
),
argument(
name = "format_n",
description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
)
)]
#[derive(Debug)]
pub struct ToTimestampSecondsFunc {
signature: Signature,
}
#[user_doc(
doc_section(label = "Time and Date Functions"),
description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
syntax_example = "to_timestamp_millis(expression[, ..., format_n])",
sql_example = r#"```sql
> select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00');
+------------------------------------------------------------------+
| to_timestamp_millis(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
+------------------------------------------------------------------+
| 2023-01-31T14:26:56.123 |
+------------------------------------------------------------------+
> select to_timestamp_millis('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
+---------------------------------------------------------------------------------------------------------------+
| to_timestamp_millis(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
+---------------------------------------------------------------------------------------------------------------+
| 2023-05-17T03:59:00.123 |
+---------------------------------------------------------------------------------------------------------------+
```
Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
"#,
argument(
name = "expression",
description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
),
argument(
name = "format_n",
description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
)
)]
#[derive(Debug)]
pub struct ToTimestampMillisFunc {
signature: Signature,
}
#[user_doc(
doc_section(label = "Time and Date Functions"),
description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`) Returns the corresponding timestamp.",
syntax_example = "to_timestamp_micros(expression[, ..., format_n])",
sql_example = r#"```sql
> select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00');
+------------------------------------------------------------------+
| to_timestamp_micros(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
+------------------------------------------------------------------+
| 2023-01-31T14:26:56.123456 |
+------------------------------------------------------------------+
> select to_timestamp_micros('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
+---------------------------------------------------------------------------------------------------------------+
| to_timestamp_micros(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
+---------------------------------------------------------------------------------------------------------------+
| 2023-05-17T03:59:00.123456 |
+---------------------------------------------------------------------------------------------------------------+
```
Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
"#,
argument(
name = "expression",
description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
),
argument(
name = "format_n",
description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
)
)]
#[derive(Debug)]
pub struct ToTimestampMicrosFunc {
signature: Signature,
}
#[user_doc(
doc_section(label = "Time and Date Functions"),
description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
syntax_example = "to_timestamp_nanos(expression[, ..., format_n])",
sql_example = r#"```sql
> select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00');
+-----------------------------------------------------------------+
| to_timestamp_nanos(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
+-----------------------------------------------------------------+
| 2023-01-31T14:26:56.123456789 |
+-----------------------------------------------------------------+
> select to_timestamp_nanos('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
+--------------------------------------------------------------------------------------------------------------+
| to_timestamp_nanos(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
+--------------------------------------------------------------------------------------------------------------+
| 2023-05-17T03:59:00.123456789 |
+---------------------------------------------------------------------------------------------------------------+
```
Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
"#,
argument(
name = "expression",
description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
),
argument(
name = "format_n",
description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
)
)]
#[derive(Debug)]
pub struct ToTimestampNanosFunc {
signature: Signature,
@@ -189,50 +338,10 @@ impl ScalarUDFImpl for ToTimestampFunc {
}
}
fn documentation(&self) -> Option<&Documentation> {
Some(get_to_timestamp_doc())
self.doc()
}
}
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
fn get_to_timestamp_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_DATETIME,
r#"
Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). Supports strings, integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided. Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.
Note: `to_timestamp` returns `Timestamp(Nanosecond)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds.
"#,
"to_timestamp(expression[, ..., format_n])")
.with_argument(
"expression",
"Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
)
.with_argument(
"format_n",
"Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.",
)
.with_sql_example(r#"```sql
> select to_timestamp('2023-01-31T09:26:56.123456789-05:00');
+-----------------------------------------------------------+
| to_timestamp(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
+-----------------------------------------------------------+
| 2023-01-31T14:26:56.123456789 |
+-----------------------------------------------------------+
> select to_timestamp('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
+--------------------------------------------------------------------------------------------------------+
| to_timestamp(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
+--------------------------------------------------------------------------------------------------------+
| 2023-05-17T03:59:00.123456789 |
+--------------------------------------------------------------------------------------------------------+
```
Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
"#)
.build()
})
}
impl ScalarUDFImpl for ToTimestampSecondsFunc {
fn as_any(&self) -> &dyn Any {
self
@@ -284,46 +393,10 @@ impl ScalarUDFImpl for ToTimestampSecondsFunc {
}
}
fn documentation(&self) -> Option<&Documentation> {
Some(get_to_timestamp_seconds_doc())
self.doc()
}
}
static TO_TIMESTAMP_SECONDS_DOC: OnceLock<Documentation> = OnceLock::new();
fn get_to_timestamp_seconds_doc() -> &'static Documentation {
TO_TIMESTAMP_SECONDS_DOC.get_or_init(|| {
Documentation::builder(
DOC_SECTION_DATETIME,
"Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
"to_timestamp_seconds(expression[, ..., format_n])")
.with_argument(
"expression",
"Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
)
.with_argument(
"format_n",
"Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.",
)
.with_sql_example(r#"```sql
> select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00');
+-------------------------------------------------------------------+
| to_timestamp_seconds(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
+-------------------------------------------------------------------+
| 2023-01-31T14:26:56 |
+-------------------------------------------------------------------+
> select to_timestamp_seconds('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
+----------------------------------------------------------------------------------------------------------------+
| to_timestamp_seconds(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
+----------------------------------------------------------------------------------------------------------------+
| 2023-05-17T03:59:00 |
+----------------------------------------------------------------------------------------------------------------+
```
Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
"#)
.build()
})
}
impl ScalarUDFImpl for ToTimestampMillisFunc {
fn as_any(&self) -> &dyn Any {
self
@@ -377,46 +450,10 @@ impl ScalarUDFImpl for ToTimestampMillisFunc {
}
}
fn documentation(&self) -> Option<&Documentation> {
Some(get_to_timestamp_millis_doc())
self.doc()
}
}
static TO_TIMESTAMP_MILLIS_DOC: OnceLock<Documentation> = OnceLock::new();
fn get_to_timestamp_millis_doc() -> &'static Documentation {
TO_TIMESTAMP_MILLIS_DOC.get_or_init(|| {
Documentation::builder(
DOC_SECTION_DATETIME,
"Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
"to_timestamp_millis(expression[, ..., format_n])")
.with_argument(
"expression",
"Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
)
.with_argument(
"format_n",
"Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.",
)
.with_sql_example(r#"```sql
> select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00');
+------------------------------------------------------------------+
| to_timestamp_millis(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
+------------------------------------------------------------------+
| 2023-01-31T14:26:56.123 |
+------------------------------------------------------------------+
> select to_timestamp_millis('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
+---------------------------------------------------------------------------------------------------------------+
| to_timestamp_millis(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
+---------------------------------------------------------------------------------------------------------------+
| 2023-05-17T03:59:00.123 |
+---------------------------------------------------------------------------------------------------------------+
```
Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
"#)
.build()
})
}
impl ScalarUDFImpl for ToTimestampMicrosFunc {
fn as_any(&self) -> &dyn Any {
self
@@ -470,46 +507,10 @@ impl ScalarUDFImpl for ToTimestampMicrosFunc {
}
}
fn documentation(&self) -> Option<&Documentation> {
Some(get_to_timestamp_micros_doc())
self.doc()
}
}
static TO_TIMESTAMP_MICROS_DOC: OnceLock<Documentation> = OnceLock::new();
fn get_to_timestamp_micros_doc() -> &'static Documentation {
TO_TIMESTAMP_MICROS_DOC.get_or_init(|| {
Documentation::builder(
DOC_SECTION_DATETIME,
"Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`) Returns the corresponding timestamp.",
"to_timestamp_micros(expression[, ..., format_n])")
.with_argument(
"expression",
"Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
)
.with_argument(
"format_n",
"Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.",
)
.with_sql_example(r#"```sql
> select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00');
+------------------------------------------------------------------+
| to_timestamp_micros(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
+------------------------------------------------------------------+
| 2023-01-31T14:26:56.123456 |
+------------------------------------------------------------------+
> select to_timestamp_micros('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
+---------------------------------------------------------------------------------------------------------------+
| to_timestamp_micros(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
+---------------------------------------------------------------------------------------------------------------+
| 2023-05-17T03:59:00.123456 |
+---------------------------------------------------------------------------------------------------------------+
```
Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
"#)
.build()
})
}
impl ScalarUDFImpl for ToTimestampNanosFunc {
fn as_any(&self) -> &dyn Any {
self
@@ -563,46 +564,10 @@ impl ScalarUDFImpl for ToTimestampNanosFunc {
}
}
fn documentation(&self) -> Option<&Documentation> {
Some(get_to_timestamp_nanos_doc())
self.doc()
}
}
static TO_TIMESTAMP_NANOS_DOC: OnceLock<Documentation> = OnceLock::new();
fn get_to_timestamp_nanos_doc() -> &'static Documentation {
TO_TIMESTAMP_NANOS_DOC.get_or_init(|| {
Documentation::builder(
DOC_SECTION_DATETIME,
"Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
"to_timestamp_nanos(expression[, ..., format_n])")
.with_argument(
"expression",
"Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
)
.with_argument(
"format_n",
"Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.",
)
.with_sql_example(r#"```sql
> select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00');
+-----------------------------------------------------------------+
| to_timestamp_nanos(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
+-----------------------------------------------------------------+
| 2023-01-31T14:26:56.123456789 |
+-----------------------------------------------------------------+
> select to_timestamp_nanos('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
+--------------------------------------------------------------------------------------------------------------+
| to_timestamp_nanos(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
+--------------------------------------------------------------------------------------------------------------+
| 2023-05-17T03:59:00.123456789 |
+---------------------------------------------------------------------------------------------------------------+
```
Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
"#)
.build()
})
}
/// Returns the return type for the to_timestamp_* function, preserving
/// the timezone if it exists.
fn return_type_for(arg: &DataType, unit: TimeUnit) -> DataType {
@@ -19,13 +19,41 @@ use super::to_timestamp::ToTimestampSecondsFunc;
use crate::datetime::common::*;
use arrow::datatypes::{DataType, TimeUnit};
use datafusion_common::{exec_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_macros::user_doc;
use std::any::Any;
use std::sync::OnceLock;
#[user_doc(
doc_section(label = "Time and Date Functions"),
description = "Converts a value to seconds since the unix epoch (`1970-01-01T00:00:00Z`). Supports strings, dates, timestamps and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided.",
syntax_example = "to_unixtime(expression[, ..., format_n])",
sql_example = r#"
```sql
> select to_unixtime('2020-09-08T12:00:00+00:00');
+------------------------------------------------+
| to_unixtime(Utf8("2020-09-08T12:00:00+00:00")) |
+------------------------------------------------+
| 1599566400 |
+------------------------------------------------+
> select to_unixtime('01-14-2023 01:01:30+05:30', '%q', '%d-%m-%Y %H/%M/%S', '%+', '%m-%d-%Y %H:%M:%S%#z');
+-----------------------------------------------------------------------------------------------------------------------------+
| to_unixtime(Utf8("01-14-2023 01:01:30+05:30"),Utf8("%q"),Utf8("%d-%m-%Y %H/%M/%S"),Utf8("%+"),Utf8("%m-%d-%Y %H:%M:%S%#z")) |
+-----------------------------------------------------------------------------------------------------------------------------+
| 1673638290 |
+-----------------------------------------------------------------------------------------------------------------------------+
```
"#,
argument(
name = "expression",
description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
),
argument(
name = "format_n",
description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
)
)]
#[derive(Debug)]
pub struct ToUnixtimeFunc {
signature: Signature,
@@ -93,40 +121,6 @@ impl ScalarUDFImpl for ToUnixtimeFunc {
}
}
fn documentation(&self) -> Option<&Documentation> {
Some(get_to_unixtime_doc())
self.doc()
}
}
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
fn get_to_unixtime_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_DATETIME,
"Converts a value to seconds since the unix epoch (`1970-01-01T00:00:00Z`). Supports strings, dates, timestamps and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided.",
"to_unixtime(expression[, ..., format_n])")
.with_argument(
"expression",
"Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
).with_argument(
"format_n",
"Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned.")
.with_sql_example(r#"
```sql
> select to_unixtime('2020-09-08T12:00:00+00:00');
+------------------------------------------------+
| to_unixtime(Utf8("2020-09-08T12:00:00+00:00")) |
+------------------------------------------------+
| 1599566400 |
+------------------------------------------------+
> select to_unixtime('01-14-2023 01:01:30+05:30', '%q', '%d-%m-%Y %H/%M/%S', '%+', '%m-%d-%Y %H:%M:%S%#z');
+-----------------------------------------------------------------------------------------------------------------------------+
| to_unixtime(Utf8("01-14-2023 01:01:30+05:30"),Utf8("%q"),Utf8("%d-%m-%Y %H/%M/%S"),Utf8("%+"),Utf8("%m-%d-%Y %H:%M:%S%#z")) |
+-----------------------------------------------------------------------------------------------------------------------------+
| 1673638290 |
+-----------------------------------------------------------------------------------------------------------------------------+
```
"#)
.build()
})
}
@@ -2231,17 +2231,17 @@ to_date('2017-05-31', '%Y-%m-%d')
```sql
> select to_date('2023-01-31');
+-----------------------------+
+-------------------------------+
| to_date(Utf8("2023-01-31")) |
+-----------------------------+
| 2023-01-31 |
+-----------------------------+
+-------------------------------+
| 2023-01-31 |
+-------------------------------+
> select to_date('2023/01/31', '%Y-%m-%d', '%Y/%m/%d');
+---------------------------------------------------------------+
+---------------------------------------------------------------------+
| to_date(Utf8("2023/01/31"),Utf8("%Y-%m-%d"),Utf8("%Y/%m/%d")) |
+---------------------------------------------------------------+
| 2023-01-31 |
+---------------------------------------------------------------+
+---------------------------------------------------------------------+
| 2023-01-31 |
+---------------------------------------------------------------------+
```
Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_date.rs)